[FFmpeg-devel] [PATCH 03/11] aaccoder: add intensity stereo coding support for the trellis quantizer
Rostislav Pehlivanov
atomnuker at gmail.com
Fri Jun 26 22:16:32 CEST 2015
This commit extends the trellis quantizer (used by the default twoloop coder) to accept and correctly encode codebooks needed for intensity stereo and perceptual noise substitution.
---
libavcodec/aaccoder.c | 81 +++++++++++++++++++++++++++++----------------------
1 file changed, 46 insertions(+), 35 deletions(-)
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index a6e4cc4..f069a3b 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -43,8 +43,23 @@
/** Frequency in Hz for lower limit of noise substitution **/
#define NOISE_LOW_LIMIT 4000
+/* Energy spread threshold value below which no PNS is used, this corresponds to
+ * typically around 17Khz, after which PNS usage decays ending at 19Khz */
+#define NOISE_SPREAD_THRESHOLD 152234544.0f
+
+/* Above ~1.26*threshold all normally-zeroed values are PNS'd. Lambda divides
+ * the defined value below as to try to get a ~1.26 multiplier so that there is
+ * a balance between noise and zero bands leaving more bits for actual signal */
+#define NOISE_LAMBDA_NUMERATOR 252.1f
+
+/** Frequency in Hz for lower limit of intensity stereo **/
+#define INT_STEREO_LOW_LIMIT 6000
+
/** Total number of usable codebooks **/
-#define CB_TOT 13
+#define CB_TOT 12
+
+/** Total number of codebooks, including special ones **/
+#define CB_TOT_ALL 15
/** bits needed to code codebook run value for long windows */
static const uint8_t run_value_bits_long[64] = {
@@ -64,9 +79,9 @@ static const uint8_t * const run_value_bits[2] = {
};
/** Map to convert values from BandCodingPath index to a codebook index **/
-static const uint8_t aac_cb_out_map[CB_TOT] = {0,1,2,3,4,5,6,7,8,9,10,11,13};
+static const uint8_t aac_cb_out_map[CB_TOT_ALL] = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
/** Inverse map to convert from codebooks to BandCodingPath indices **/
-static const uint8_t aac_cb_in_map[CB_TOT+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12};
+static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12,13,14};
/**
* Quantize one coefficient.
@@ -118,7 +133,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
int *bits, int BT_ZERO, int BT_UNSIGNED,
- int BT_PAIR, int BT_ESC, int BT_NOISE)
+ int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO)
{
const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
const float Q = ff_aac_pow2sf_tab [q_idx];
@@ -131,14 +146,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
int resbits = 0;
int off;
- if (BT_ZERO) {
- for (i = 0; i < size; i++)
- cost += in[i]*in[i];
- if (bits)
- *bits = 0;
- return cost * lambda;
- }
- if (BT_NOISE) {
+ if (BT_ZERO || BT_NOISE || BT_STEREO) {
for (i = 0; i < size; i++)
cost += in[i]*in[i];
if (bits)
@@ -231,26 +239,27 @@ static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitC
return 0.0f;
}
-#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE) \
-static float quantize_and_encode_band_cost_ ## NAME( \
- struct AACEncContext *s, \
- PutBitContext *pb, const float *in, \
- const float *scaled, int size, int scale_idx, \
- int cb, const float lambda, const float uplim, \
- int *bits) { \
- return quantize_and_encode_band_cost_template( \
- s, pb, in, scaled, size, scale_idx, \
- BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \
- BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE); \
+#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO) \
+static float quantize_and_encode_band_cost_ ## NAME( \
+ struct AACEncContext *s, \
+ PutBitContext *pb, const float *in, \
+ const float *scaled, int size, int scale_idx, \
+ int cb, const float lambda, const float uplim, \
+ int *bits) { \
+ return quantize_and_encode_band_cost_template( \
+ s, pb, in, scaled, size, scale_idx, \
+ BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \
+ BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO); \
}
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1)
static float (*const quantize_and_encode_band_cost_arr[])(
struct AACEncContext *s,
@@ -272,6 +281,8 @@ static float (*const quantize_and_encode_band_cost_arr[])(
quantize_and_encode_band_cost_ESC,
quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
quantize_and_encode_band_cost_NOISE,
+ quantize_and_encode_band_cost_STEREO,
+ quantize_and_encode_band_cost_STEREO,
};
#define quantize_and_encode_band_cost( \
@@ -454,7 +465,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
abs_pow34_v(s->scoefs, sce->coeffs, 1024);
start = win*128;
- for (cb = 0; cb < CB_TOT; cb++) {
+ for (cb = 0; cb < CB_TOT_ALL; cb++) {
path[0][cb].cost = run_bits+4;
path[0][cb].prev_idx = -1;
path[0][cb].run = 0;
@@ -478,7 +489,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
}
next_minbits = path[swb+1][0].cost;
next_mincb = 0;
- for (cb = 1; cb < CB_TOT; cb++) {
+ for (cb = 1; cb < CB_TOT_ALL; cb++) {
path[swb+1][cb].cost = 61450;
path[swb+1][cb].prev_idx = -1;
path[swb+1][cb].run = 0;
@@ -495,10 +506,10 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
path[swb+1][cb].prev_idx = -1;
path[swb+1][cb].run = 0;
}
- for (cb = startcb; cb < CB_TOT; cb++) {
+ for (cb = startcb; cb < CB_TOT_ALL; cb++) {
float cost_stay_here, cost_get_here;
float bits = 0.0f;
- if (cb == 12 && sce->band_type[win*16+swb] != NOISE_BT) {
+ if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
path[swb+1][cb].cost = 61450;
path[swb+1][cb].prev_idx = -1;
path[swb+1][cb].run = 0;
@@ -537,7 +548,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
//convert resulting path from backward-linked list
stack_len = 0;
idx = 0;
- for (cb = 1; cb < CB_TOT; cb++)
+ for (cb = 1; cb < CB_TOT_ALL; cb++)
if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
idx = cb;
ppos = max_sfb;
--
2.1.4
More information about the ffmpeg-devel
mailing list