[FFmpeg-devel] [PATCH] aacenc: optimize cost cache.
Rostislav Pehlivanov
atomnuker at gmail.com
Wed Mar 2 20:19:08 CET 2016
On 1 March 2016 at 23:34, Reimar Döffinger <Reimar.Doeffinger at gmx.de> wrote:
> Avoids trashing the CPU cache each time the
> cost cache is cleared.
> ---
> libavcodec/aaccoder_twoloop.h | 20 ++++----------------
> libavcodec/aacenc.c | 7 +------
> libavcodec/aacenc.h | 4 +---
> libavcodec/aacenc_quantization_misc.h | 17 +++++++----------
> 4 files changed, 13 insertions(+), 35 deletions(-)
>
> diff --git a/libavcodec/aaccoder_twoloop.h b/libavcodec/aaccoder_twoloop.h
> index 397a4db..73bd082 100644
> --- a/libavcodec/aaccoder_twoloop.h
> +++ b/libavcodec/aaccoder_twoloop.h
> @@ -391,10 +391,7 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> sce->ics.swb_sizes[g],
> sce->sf_idx[w*16+g],
> cb,
> - 1.0f,
> - INFINITY,
> - &b, &sqenergy,
> - 0);
> + &b, &sqenergy);
> bits += b;
> qenergy += sqenergy;
> }
> @@ -472,10 +469,7 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
> sce->ics.swb_sizes[g],
> sce->sf_idx[w*16+g],
> cb,
> - 1.0f,
> - INFINITY,
> - &b, &sqenergy,
> - 0);
> + &b, &sqenergy);
> bits += b;
> qenergy += sqenergy;
> }
> @@ -628,10 +622,7 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
>
> sce->ics.swb_sizes[g],
>
> sce->sf_idx[w*16+g]-1,
> cb,
> - 1.0f,
> - INFINITY,
> - &b, &sqenergy,
> - 0);
> + &b, &sqenergy);
> bits += b;
> qenergy += sqenergy;
> }
> @@ -665,10 +656,7 @@ static void
> search_for_quantizers_twoloop(AVCodecContext *avctx,
>
> sce->ics.swb_sizes[g],
>
> sce->sf_idx[w*16+g]+1,
> cb,
> - 1.0f,
> - INFINITY,
> - &b, &sqenergy,
> - 0);
> + &b,
> &sqenergy);
> bits += b;
> qenergy += sqenergy;
> }
> diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
> index 5a70da1..e60bbfe 100644
> --- a/libavcodec/aacenc.c
> +++ b/libavcodec/aacenc.c
> @@ -78,12 +78,7 @@ static void put_audio_specific_config(AVCodecContext
> *avctx)
>
> void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
> {
> - int sf, g;
> - for (sf = 0; sf < 256; sf++) {
> - for (g = 0; g < 128; g++) {
> - s->quantize_band_cost_cache[sf][g].bits = -1;
> - }
> - }
> + memset(s->quantize_band_cost_cache_state, 0xff,
> sizeof(s->quantize_band_cost_cache_state));
> }
>
> #define WINDOW_FUNC(type) \
> diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
> index 2252e29..d937d17 100644
> --- a/libavcodec/aacenc.h
> +++ b/libavcodec/aacenc.h
> @@ -85,9 +85,6 @@ typedef struct AACQuantizeBandCostCacheEntry {
> float rd;
> float energy;
> int bits; ///< -1 means uninitialized entry
> - char cb;
> - char rtz;
> - char padding[2]; ///< Keeps the entry size a multiple of 32 bits
> } AACQuantizeBandCostCacheEntry;
>
> /**
> @@ -126,6 +123,7 @@ typedef struct AACEncContext {
> DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized
> coefficients
> DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
>
> + uint8_t quantize_band_cost_cache_state[256][128];
> AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128];
> ///< memoization area for quantize_band_cost
>
> struct {
> diff --git a/libavcodec/aacenc_quantization_misc.h
> b/libavcodec/aacenc_quantization_misc.h
> index eaa71c9..29bb986 100644
> --- a/libavcodec/aacenc_quantization_misc.h
> +++ b/libavcodec/aacenc_quantization_misc.h
> @@ -30,22 +30,19 @@
>
> static inline float quantize_band_cost_cached(struct AACEncContext *s,
> int w, int g, const float *in,
> const float *scaled, int size, int
> scale_idx,
> - int cb, const float lambda, const float
> uplim,
> - int *bits, float *energy, int rtz)
> + int cb, int *bits, float *energy)
> {
> + uint8_t *cache_state =
> &s->quantize_band_cost_cache_state[scale_idx][w*16+g];
> AACQuantizeBandCostCacheEntry *entry;
> av_assert1(scale_idx >= 0 && scale_idx < 256);
> entry = &s->quantize_band_cost_cache[scale_idx][w*16+g];
> - if (entry->bits < 0 || entry->cb != cb || entry->rtz != rtz) {
> + if (*cache_state != cb) {
> entry->rd = quantize_band_cost(s, in, scaled, size, scale_idx,
> - cb, lambda, uplim, &entry->bits,
> &entry->energy, rtz);
> - entry->cb = cb;
> - entry->rtz = rtz;
> + cb, 1.0f, INFINITY, &entry->bits,
> &entry->energy, 0);
> + *cache_state = cb;
> }
> - if (bits)
> - *bits = entry->bits;
> - if (energy)
> - *energy = entry->energy;
> + *bits = entry->bits & 0xffffff;
> + *energy = entry->energy;
> return entry->rd;
> }
>
>
The whole point of the function was to be a universal caching system for
every single scalefactor band which could be used in other parts of the
code (e.g. intensity stereo, mid/side, prediction, pns, etc.). Granted,
caching hasn't been implemented there yet. But by reducing the number of
arguments and assuming everything uses the same value for lambda and uplim
removes any possibility of the caching system being used anywhere outside.
Can you avoid modifying the function definition and permit for alternative
values for lambda and uplim?
More information about the ffmpeg-devel
mailing list