[FFmpeg-devel] [PATCH 1/2] aacenc: add a faster version of twoloop as the "fast" coder
Rostislav Pehlivanov
atomnuker at gmail.com
Sat Aug 13 19:04:40 EEST 2016
On 7 August 2016 at 00:51, Rostislav Pehlivanov <atomnuker at gmail.com> wrote:
> Does nothing fancy but still sounds very decent at 128kbps.
> Still room to improve by bringing in the low pass and PNS management
> from the main big twoloop which should improve its quality but not
> sacrifice that much speed.
>
> Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
> ---
> libavcodec/aaccoder.c | 154 ++++++++++++++++++++++++++++++
> +++++++++++++-------
> 1 file changed, 134 insertions(+), 20 deletions(-)
>
> diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
> index bca1f59..edf29f4 100644
> --- a/libavcodec/aaccoder.c
> +++ b/libavcodec/aaccoder.c
> @@ -396,34 +396,148 @@ static void search_for_quantizers_fast(AVCodecContext
> *avctx, AACEncContext *s,
> SingleChannelElement *sce,
> const float lambda)
> {
> - int i, w, w2, g;
> - int minq = 255;
> -
> - memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
> + int start = 0, i, w, w2, g;
> + int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate /
> avctx->channels * (lambda / 120.f);
> + float dists[128] = { 0 }, uplims[128] = { 0 };
> + float maxvals[128];
> + int fflag, minscaler;
> + int its = 0;
> + int allz = 0;
> + float minthr = INFINITY;
> +
> + // for values above this the decoder might end up in an endless loop
> + // due to always having more bits than what can be encoded.
> + destbits = FFMIN(destbits, 5800);
> + //XXX: some heuristic to determine initial quantizers will reduce
> search time
> + //determine zero bands and upper limits
> for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> - for (g = 0; g < sce->ics.num_swb; g++) {
> + start = 0;
> + for (g = 0; g < sce->ics.num_swb; g++) {
> + int nz = 0;
> + float uplim = 0.0f, energy = 0.0f;
> for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
> FFPsyBand *band = &s->psy.ch[s->cur_channel].
> psy_bands[(w+w2)*16+g];
> - if (band->energy <= band->threshold) {
> - sce->sf_idx[(w+w2)*16+g] = 218;
> + uplim += band->threshold;
> + energy += band->energy;
> + if (band->energy <= band->threshold || band->threshold ==
> 0.0f) {
> sce->zeroes[(w+w2)*16+g] = 1;
> - } else {
> - sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS -
> SCALE_DIV_512 + log2f(band->threshold), 80, 218);
> - sce->zeroes[(w+w2)*16+g] = 0;
> + continue;
> }
> - minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
> + nz = 1;
> }
> + uplims[w*16+g] = uplim *512;
> + sce->band_type[w*16+g] = 0;
> + sce->zeroes[w*16+g] = !nz;
> + if (nz)
> + minthr = FFMIN(minthr, uplim);
> + allz |= nz;
> + start += sce->ics.swb_sizes[g];
> }
> }
> - for (i = 0; i < 128; i++) {
> - sce->sf_idx[i] = 140;
> - //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
> + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> + for (g = 0; g < sce->ics.num_swb; g++) {
> + if (sce->zeroes[w*16+g]) {
> + sce->sf_idx[w*16+g] = SCALE_ONE_POS;
> + continue;
> + }
> + sce->sf_idx[w*16+g] = SCALE_ONE_POS +
> FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
> + }
> }
> - //set the same quantizers inside window groups
> - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
> - for (g = 0; g < sce->ics.num_swb; g++)
> - for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
> - sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
> +
> + if (!allz)
> + return;
> + abs_pow34_v(s->scoefs, sce->coeffs, 1024);
> + ff_quantize_band_cost_cache_init(s);
> +
> + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
> + start = w*128;
> + for (g = 0; g < sce->ics.num_swb; g++) {
> + const float *scaled = s->scoefs + start;
> + maxvals[w*16+g] = find_max_val(sce->ics.group_len[w],
> sce->ics.swb_sizes[g], scaled);
> + start += sce->ics.swb_sizes[g];
> + }
> + }
> +
> + //perform two-loop search
> + //outer loop - improve quality
> + do {
> + int tbits, qstep;
> + minscaler = sce->sf_idx[0];
> + //inner loop - quantize spectrum to fit into given number of bits
> + qstep = its ? 1 : 32;
> + do {
> + int prev = -1;
> + tbits = 0;
> + for (w = 0; w < sce->ics.num_windows; w +=
> sce->ics.group_len[w]) {
> + start = w*128;
> + for (g = 0; g < sce->ics.num_swb; g++) {
> + const float *coefs = sce->coeffs + start;
> + const float *scaled = s->scoefs + start;
> + int bits = 0;
> + int cb;
> + float dist = 0.0f;
> +
> + if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >=
> 218) {
> + start += sce->ics.swb_sizes[g];
> + continue;
> + }
> + minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
> + cb = find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]);
> + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
> + int b;
> + dist += quantize_band_cost_cached(s, w + w2, g,
> + coefs + w2*128,
> + scaled + w2*128,
> +
> sce->ics.swb_sizes[g],
> +
> sce->sf_idx[w*16+g],
> + cb, 1.0f,
> INFINITY,
> + &b, NULL, 0);
> + bits += b;
> + }
> + dists[w*16+g] = dist - bits;
> + if (prev != -1) {
> + bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g]
> - prev + SCALE_DIFF_ZERO];
> + }
> + tbits += bits;
> + start += sce->ics.swb_sizes[g];
> + prev = sce->sf_idx[w*16+g];
> + }
> + }
> + if (tbits > destbits) {
> + for (i = 0; i < 128; i++)
> + if (sce->sf_idx[i] < 218 - qstep)
> + sce->sf_idx[i] += qstep;
> + } else {
> + for (i = 0; i < 128; i++)
> + if (sce->sf_idx[i] > 60 - qstep)
> + sce->sf_idx[i] -= qstep;
> + }
> + qstep >>= 1;
> + if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
> + qstep = 1;
> + } while (qstep);
> +
> + fflag = 0;
> + minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
> +
> + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
> {
> + for (g = 0; g < sce->ics.num_swb; g++) {
> + int prevsc = sce->sf_idx[w*16+g];
> + if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g]
> > 60) {
> + if (find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]-1))
> + sce->sf_idx[w*16+g]--;
> + else //Try to make sure there is some energy in every
> band
> + sce->sf_idx[w*16+g]-=2;
> + }
> + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g],
> minscaler, minscaler + SCALE_MAX_DIFF);
> + sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
> + if (sce->sf_idx[w*16+g] != prevsc)
> + fflag = 1;
> + sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g],
> sce->sf_idx[w*16+g]);
> + }
> + }
> + its++;
> + } while (fflag && its < 10);
> }
>
> static void search_for_pns(AACEncContext *s, AVCodecContext *avctx,
> SingleChannelElement *sce)
> @@ -828,7 +942,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
> },
> [AAC_CODER_FAST] = {
> search_for_quantizers_fast,
> - encode_window_bands_info,
> + codebook_trellis_rate,
> quantize_and_encode_band,
> ff_aac_encode_tns_info,
> ff_aac_encode_ltp_info,
> --
> 2.8.1.369.geae769a
>
>
Pushed, thanks for the reviews
More information about the ffmpeg-devel
mailing list