[FFmpeg-devel] [PATCH 2/2] lavc/aacsbr: sbr_dequant optimization
Andreas Cadhalpun
andreas.cadhalpun at googlemail.com
Sat Dec 19 13:21:45 CET 2015
On 18.12.2015 16:52, Ganesh Ajjanagadde wrote:
> On Tue, Dec 15, 2015 at 9:53 PM, Ganesh Ajjanagadde
> <gajjanagadde at gmail.com> wrote:
>> This uses ff_exp2fi to get a speedup (~ 6x).
>>
>> sample benchmark (Haswell, GNU/Linux):
>> old:
>> 19102 decicycles in sbr_dequant, 1023 runs, 1 skips
>> 19002 decicycles in sbr_dequant, 2045 runs, 3 skips
>> 17638 decicycles in sbr_dequant, 4093 runs, 3 skips
>> 15825 decicycles in sbr_dequant, 8189 runs, 3 skips
>> 16404 decicycles in sbr_dequant, 16379 runs, 5 skips
>>
>> new:
>> 3063 decicycles in sbr_dequant, 1024 runs, 0 skips
>> 3049 decicycles in sbr_dequant, 2048 runs, 0 skips
>> 2968 decicycles in sbr_dequant, 4096 runs, 0 skips
>> 2818 decicycles in sbr_dequant, 8191 runs, 1 skips
>> 2853 decicycles in sbr_dequant, 16383 runs, 1 skips
>>
>> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
>> ---
>> libavcodec/aacsbr.c | 34 ++++++++++++++++++++++------------
>> 1 file changed, 22 insertions(+), 12 deletions(-)
>>
>> diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
>> index d1e3a91..15956e3 100644
>> --- a/libavcodec/aacsbr.c
>> +++ b/libavcodec/aacsbr.c
>> @@ -33,6 +33,7 @@
>> #include "aacsbrdata.h"
>> #include "aacsbr_tablegen.h"
>> #include "fft.h"
>> +#include "internal.h"
>> #include "aacps.h"
>> #include "sbrdsp.h"
>> #include "libavutil/internal.h"
>> @@ -73,15 +74,22 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>> {
>> int k, e;
>> int ch;
>> -
>> + static const double exp2_tab[2] = {1, M_SQRT2};
>> if (id_aac == TYPE_CPE && sbr->bs_coupling) {
>> - float alpha = sbr->data[0].bs_amp_res ? 1.0f : 0.5f;
>> - float pan_offset = sbr->data[0].bs_amp_res ? 12.0f : 24.0f;
>> + int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
>> for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
>> for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
>> - float temp1 = exp2f(sbr->data[0].env_facs_q[e][k] * alpha + 7.0f);
>> - float temp2 = exp2f((pan_offset - sbr->data[1].env_facs_q[e][k]) * alpha);
>> - float fac;
>> + float temp1, temp2, fac;
>> + if (sbr->data[0].bs_amp_res) {
>> + temp1 = ff_exp2fi(sbr->data[0].env_facs_q[e][k] + 7);
>> + temp2 = ff_exp2fi(pan_offset - sbr->data[1].env_facs_q[e][k]);
>> + }
>> + else {
>> + temp1 = ff_exp2fi((sbr->data[0].env_facs_q[e][k]>>1) + 7) *
>> + exp2_tab[sbr->data[0].env_facs_q[e][k] & 1];
>> + temp2 = ff_exp2fi((pan_offset - sbr->data[1].env_facs_q[e][k])>>1) *
>> + exp2_tab[(pan_offset - sbr->data[1].env_facs_q[e][k]) & 1];
>> + }
>> if (temp1 > 1E20) {
>> av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
>> temp1 = 1;
>> @@ -93,8 +101,8 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>> }
>> for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
>> for (k = 0; k < sbr->n_q; k++) {
>> - float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
>> - float temp2 = exp2f(12 - sbr->data[1].noise_facs_q[e][k]);
>> + float temp1 = ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
>> + float temp2 = ff_exp2fi(12 - sbr->data[1].noise_facs_q[e][k]);
>> float fac;
>> av_assert0(temp1 <= 1E20);
>> fac = temp1 / (1.0f + temp2);
>> @@ -104,11 +112,13 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>> }
>> } else { // SCE or one non-coupled CPE
>> for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
>> - float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
>> for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
>> for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
>> - sbr->data[ch].env_facs[e][k] =
>> - exp2f(alpha * sbr->data[ch].env_facs_q[e][k] + 6.0f);
>> + if (sbr->data[ch].bs_amp_res)
>> + sbr->data[ch].env_facs[e][k] = ff_exp2fi(sbr->data[ch].env_facs_q[e][k] + 6);
>> + else
>> + sbr->data[ch].env_facs[e][k] = ff_exp2fi((sbr->data[ch].env_facs_q[e][k]>>1) + 6)
>> + * exp2_tab[sbr->data[ch].env_facs_q[e][k] & 1];
>> if (sbr->data[ch].env_facs[e][k] > 1E20) {
>> av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
>> sbr->data[ch].env_facs[e][k] = 1;
>> @@ -118,7 +128,7 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>> for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
>> for (k = 0; k < sbr->n_q; k++)
>> sbr->data[ch].noise_facs[e][k] =
>> - exp2f(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
>> + ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
>> }
>> }
>> }
>> --
>> 2.6.4
>>
>
> ping for aac maintainers: Rostislav? thanks.
I'm not an aac maintainer, but the change looks correct and the speedup nice, so this
patch should be OK.
One caveat is that NOISE_FLOOR_OFFSET is defined as FIXR(6.0f) and is thus a float here.
Since this patch makes that unnecessary, it's probably a good idea to change that
to simply 6.
Best regards,
Andreas
More information about the ffmpeg-devel
mailing list