[FFmpeg-devel] [PATCH 1/2] aacenc: add a faster version of twoloop as the "fast" coder

Sat Aug 13 19:04:40 EEST 2016

On 7 August 2016 at 00:51, Rostislav Pehlivanov <atomnuker at gmail.com> wrote:

> Does nothing fancy but still sounds very decent at 128kbps.
> Still room to improve by bringing in the low pass and PNS management
> from the main big twoloop which should improve its quality but not
> sacrifice that much speed.
>
> Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
> ---
>  libavcodec/aaccoder.c | 154 ++++++++++++++++++++++++++++++
> +++++++++++++-------
>  1 file changed, 134 insertions(+), 20 deletions(-)
>
> diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
> index bca1f59..edf29f4 100644
> --- a/libavcodec/aaccoder.c
> +++ b/libavcodec/aaccoder.c
> @@ -396,34 +396,148 @@ static void search_for_quantizers_fast(AVCodecContext
> *avctx, AACEncContext *s,
>                                         SingleChannelElement *sce,
>                                         const float lambda)
>  {
> -    int i, w, w2, g;
> -    int minq = 255;
> -
> -    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
> +    int start = 0, i, w, w2, g;
> +    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate /
> avctx->channels * (lambda / 120.f);
> +    float dists[128] = { 0 }, uplims[128] = { 0 };
> +    float maxvals[128];
> +    int fflag, minscaler;
> +    int its  = 0;
> +    int allz = 0;
> +    float minthr = INFINITY;
> +
> +    // for values above this the decoder might end up in an endless loop
> +    // due to always having more bits than what can be encoded.
> +    destbits = FFMIN(destbits, 5800);
> +    //XXX: some heuristic to determine initial quantizers will reduce
> search time
> +    //determine zero bands and upper limits
>      for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> -        for (g = 0; g < sce->ics.num_swb; g++) {
> +        start = 0;
> +        for (g = 0;  g < sce->ics.num_swb; g++) {
> +            int nz = 0;
> +            float uplim = 0.0f, energy = 0.0f;
>              for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
>                  FFPsyBand *band = &s->psy.ch[s->cur_channel].
> psy_bands[(w+w2)*16+g];
> -                if (band->energy <= band->threshold) {
> -                    sce->sf_idx[(w+w2)*16+g] = 218;
> +                uplim += band->threshold;
> +                energy += band->energy;
> +                if (band->energy <= band->threshold || band->threshold ==
> 0.0f) {
>                      sce->zeroes[(w+w2)*16+g] = 1;
> -                } else {
> -                    sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS -
> SCALE_DIV_512 + log2f(band->threshold), 80, 218);
> -                    sce->zeroes[(w+w2)*16+g] = 0;
> +                    continue;
>                  }
> -                minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
> +                nz = 1;
>              }
> +            uplims[w*16+g] = uplim *512;
> +            sce->band_type[w*16+g] = 0;
> +            sce->zeroes[w*16+g] = !nz;
> +            if (nz)
> +                minthr = FFMIN(minthr, uplim);
> +            allz |= nz;
> +            start += sce->ics.swb_sizes[g];
>          }
>      }
> -    for (i = 0; i < 128; i++) {
> -        sce->sf_idx[i] = 140;
> -        //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
> +    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> +        for (g = 0;  g < sce->ics.num_swb; g++) {
> +            if (sce->zeroes[w*16+g]) {
> +                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
> +                continue;
> +            }
> +            sce->sf_idx[w*16+g] = SCALE_ONE_POS +
> FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
> +        }
>      }
> -    //set the same quantizers inside window groups
> -    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
> -        for (g = 0;  g < sce->ics.num_swb; g++)
> -            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
> -                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
> +
> +    if (!allz)
> +        return;
> +    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
> +    ff_quantize_band_cost_cache_init(s);
> +
> +    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> +        start = w*128;
> +        for (g = 0;  g < sce->ics.num_swb; g++) {
> +            const float *scaled = s->scoefs + start;
> +            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w],
> sce->ics.swb_sizes[g], scaled);
> +            start += sce->ics.swb_sizes[g];
> +        }
> +    }
> +
> +    //perform two-loop search
> +    //outer loop - improve quality
> +    do {
> +        int tbits, qstep;
> +        minscaler = sce->sf_idx[0];
> +        //inner loop - quantize spectrum to fit into given number of bits
> +        qstep = its ? 1 : 32;
> +        do {
> +            int prev = -1;
> +            tbits = 0;
> +            for (w = 0; w < sce->ics.num_windows; w +=
> sce->ics.group_len[w]) {
> +                start = w*128;
> +                for (g = 0;  g < sce->ics.num_swb; g++) {
> +                    const float *coefs = sce->coeffs + start;
> +                    const float *scaled = s->scoefs + start;
> +                    int bits = 0;
> +                    int cb;
> +                    float dist = 0.0f;
> +
> +                    if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >=
> 218) {
> +                        start += sce->ics.swb_sizes[g];
> +                        continue;
> +                    }
> +                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
> +                    cb = find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]);
> +                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
> +                        int b;
> +                        dist += quantize_band_cost_cached(s, w + w2, g,
> +                                                          coefs + w2*128,
> +                                                          scaled + w2*128,
> +
> sce->ics.swb_sizes[g],
> +
> sce->sf_idx[w*16+g],
> +                                                          cb, 1.0f,
> INFINITY,
> +                                                          &b, NULL, 0);
> +                        bits += b;
> +                    }
> +                    dists[w*16+g] = dist - bits;
> +                    if (prev != -1) {
> +                        bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g]
> - prev + SCALE_DIFF_ZERO];
> +                    }
> +                    tbits += bits;
> +                    start += sce->ics.swb_sizes[g];
> +                    prev = sce->sf_idx[w*16+g];
> +                }
> +            }
> +            if (tbits > destbits) {
> +                for (i = 0; i < 128; i++)
> +                    if (sce->sf_idx[i] < 218 - qstep)
> +                        sce->sf_idx[i] += qstep;
> +            } else {
> +                for (i = 0; i < 128; i++)
> +                    if (sce->sf_idx[i] > 60 - qstep)
> +                        sce->sf_idx[i] -= qstep;
> +            }
> +            qstep >>= 1;
> +            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
> +                qstep = 1;
> +        } while (qstep);
> +
> +        fflag = 0;
> +        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
> +
> +        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
> {
> +            for (g = 0; g < sce->ics.num_swb; g++) {
> +                int prevsc = sce->sf_idx[w*16+g];
> +                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g]
> > 60) {
> +                    if (find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]-1))
> +                        sce->sf_idx[w*16+g]--;
> +                    else //Try to make sure there is some energy in every
> band
> +                        sce->sf_idx[w*16+g]-=2;
> +                }
> +                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g],
> minscaler, minscaler + SCALE_MAX_DIFF);
> +                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
> +                if (sce->sf_idx[w*16+g] != prevsc)
> +                    fflag = 1;
> +                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]);
> +            }
> +        }
> +        its++;
> +    } while (fflag && its < 10);
>  }
>
>  static void search_for_pns(AACEncContext *s, AVCodecContext *avctx,
> SingleChannelElement *sce)
> @@ -828,7 +942,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
>      },
>      [AAC_CODER_FAST] = {
>          search_for_quantizers_fast,
> -        encode_window_bands_info,
> +        codebook_trellis_rate,
>          quantize_and_encode_band,
>          ff_aac_encode_tns_info,
>          ff_aac_encode_ltp_info,
> --
> 2.8.1.369.geae769a
>
>

Pushed, thanks for the reviews