[FFmpeg-devel] [PATCH v2 11/16] avcodec/dca: add DSP implementation
foo86
foobaz86 at gmail.com
Thu Jan 21 19:48:28 CET 2016
---
libavcodec/dcadsp.c | 413 ++++++++++++++++++++++++++++++++++++++++++++++++++++
libavcodec/dcadsp.h | 91 ++++++++++++
2 files changed, 504 insertions(+)
create mode 100644 libavcodec/dcadsp.c
create mode 100644 libavcodec/dcadsp.h
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
new file mode 100644
index 0000000..cee3d60
--- /dev/null
+++ b/libavcodec/dcadsp.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+
+#include "dcadsp.h"
+#include "dcamath.h"
+
+static void decode_hf_c(int32_t **dst,
+ const int32_t *vq_index,
+ const int8_t hf_vq[1024][32],
+ int32_t scale_factors[32][2],
+ intptr_t sb_start, intptr_t sb_end,
+ intptr_t ofs, intptr_t len)
+{
+ int i, j;
+
+ for (i = sb_start; i < sb_end; i++) {
+ const int8_t *coeff = hf_vq[vq_index[i]];
+ int32_t scale = scale_factors[i][0];
+ for (j = 0; j < len; j++)
+ dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
+ }
+}
+
+static void decode_joint_c(int32_t **dst, int32_t **src,
+ const int32_t *scale_factors,
+ intptr_t sb_start, intptr_t sb_end,
+ intptr_t ofs, intptr_t len)
+{
+ int i, j;
+
+ for (i = sb_start; i < sb_end; i++) {
+ int32_t scale = scale_factors[i];
+ for (j = 0; j < len; j++)
+ dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
+ }
+}
+
+static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
+ const float *filter_coeff, intptr_t npcmblocks,
+ int dec_select)
+{
+ // Select decimation factor
+ int factor = 64 << dec_select;
+ int ncoeffs = 8 >> dec_select;
+ int nlfesamples = npcmblocks >> (dec_select + 1);
+ int i, j, k;
+
+ for (i = 0; i < nlfesamples; i++) {
+ // One decimated sample generates 64 or 128 interpolated ones
+ for (j = 0; j < factor / 2; j++) {
+ float a = 0;
+ float b = 0;
+
+ for (k = 0; k < ncoeffs; k++) {
+ a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k];
+ b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
+ }
+
+ pcm_samples[ j] = a;
+ pcm_samples[factor / 2 + j] = b;
+ }
+
+ lfe_samples++;
+ pcm_samples += factor;
+ }
+}
+
+static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
+ const float *filter_coeff, intptr_t npcmblocks)
+{
+ lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
+}
+
+static void lfe_fir2_float_c(float *pcm_samples, int32_t *lfe_samples,
+ const float *filter_coeff, intptr_t npcmblocks)
+{
+ lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
+}
+
+static void lfe_x96_float_c(float *dst, const float *src,
+ float *hist, intptr_t len)
+{
+ float prev = *hist;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ float a = 0.25f * src[i] + 0.75f * prev;
+ float b = 0.75f * src[i] + 0.25f * prev;
+ prev = src[i];
+ *dst++ = a;
+ *dst++ = b;
+ }
+
+ *hist = prev;
+}
+
+static void sub_qmf32_float_c(SynthFilterContext *synth,
+ FFTContext *imdct,
+ float *pcm_samples,
+ int32_t **subband_samples_lo,
+ int32_t **subband_samples_hi,
+ float *hist1, int *offset, float *hist2,
+ const float *filter_coeff, intptr_t npcmblocks,
+ float scale)
+{
+ LOCAL_ALIGNED(32, float, input, [32]);
+ int i, j;
+
+ for (j = 0; j < npcmblocks; j++) {
+ // Load in one sample from each subband
+ for (i = 0; i < 32; i++) {
+ if ((i - 1) & 2)
+ input[i] = -subband_samples_lo[i][j];
+ else
+ input[i] = subband_samples_lo[i][j];
+ }
+
+ // One subband sample generates 32 interpolated ones
+ synth->synth_filter_float(imdct, hist1, offset,
+ hist2, filter_coeff,
+ pcm_samples, input, scale);
+ pcm_samples += 32;
+ }
+}
+
+static void sub_qmf64_float_c(SynthFilterContext *synth,
+ FFTContext *imdct,
+ float *pcm_samples,
+ int32_t **subband_samples_lo,
+ int32_t **subband_samples_hi,
+ float *hist1, int *offset, float *hist2,
+ const float *filter_coeff, intptr_t npcmblocks,
+ float scale)
+{
+ LOCAL_ALIGNED(32, float, input, [64]);
+ int i, j;
+
+ if (!subband_samples_hi)
+ memset(&input[32], 0, sizeof(input[0]) * 32);
+
+ for (j = 0; j < npcmblocks; j++) {
+ // Load in one sample from each subband
+ if (subband_samples_hi) {
+ // Full 64 subbands, first 32 are residual coded
+ for (i = 0; i < 32; i++) {
+ if ((i - 1) & 2)
+ input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
+ else
+ input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
+ }
+ for (i = 32; i < 64; i++) {
+ if ((i - 1) & 2)
+ input[i] = -subband_samples_hi[i][j];
+ else
+ input[i] = subband_samples_hi[i][j];
+ }
+ } else {
+ // Only first 32 subbands
+ for (i = 0; i < 32; i++) {
+ if ((i - 1) & 2)
+ input[i] = -subband_samples_lo[i][j];
+ else
+ input[i] = subband_samples_lo[i][j];
+ }
+ }
+
+ // One subband sample generates 64 interpolated ones
+ synth->synth_filter_float_64(imdct, hist1, offset,
+ hist2, filter_coeff,
+ pcm_samples, input, scale);
+ pcm_samples += 64;
+ }
+}
+
+static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
+ const int32_t *filter_coeff, intptr_t npcmblocks)
+{
+ // Select decimation factor
+ int nlfesamples = npcmblocks >> 1;
+ int i, j, k;
+
+ for (i = 0; i < nlfesamples; i++) {
+ // One decimated sample generates 64 interpolated ones
+ for (j = 0; j < 32; j++) {
+ int64_t a = 0;
+ int64_t b = 0;
+
+ for (k = 0; k < 8; k++) {
+ a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k];
+ b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
+ }
+
+ pcm_samples[ j] = clip23(norm23(a));
+ pcm_samples[32 + j] = clip23(norm23(b));
+ }
+
+ lfe_samples++;
+ pcm_samples += 64;
+ }
+}
+
+static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
+ int32_t *hist, intptr_t len)
+{
+ int32_t prev = *hist;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
+ int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
+ prev = src[i];
+ *dst++ = clip23(norm23(a));
+ *dst++ = clip23(norm23(b));
+ }
+
+ *hist = prev;
+}
+
+static void sub_qmf32_fixed_c(SynthFilterContext *synth,
+ DCADCTContext *imdct,
+ int32_t *pcm_samples,
+ int32_t **subband_samples_lo,
+ int32_t **subband_samples_hi,
+ int32_t *hist1, int *offset, int32_t *hist2,
+ const int32_t *filter_coeff, intptr_t npcmblocks)
+{
+ LOCAL_ALIGNED(32, int32_t, input, [32]);
+ int i, j;
+
+ for (j = 0; j < npcmblocks; j++) {
+ // Load in one sample from each subband
+ for (i = 0; i < 32; i++)
+ input[i] = subband_samples_lo[i][j];
+
+ // One subband sample generates 32 interpolated ones
+ synth->synth_filter_fixed(imdct, hist1, offset,
+ hist2, filter_coeff,
+ pcm_samples, input);
+ pcm_samples += 32;
+ }
+}
+
+static void sub_qmf64_fixed_c(SynthFilterContext *synth,
+ DCADCTContext *imdct,
+ int32_t *pcm_samples,
+ int32_t **subband_samples_lo,
+ int32_t **subband_samples_hi,
+ int32_t *hist1, int *offset, int32_t *hist2,
+ const int32_t *filter_coeff, intptr_t npcmblocks)
+{
+ LOCAL_ALIGNED(32, int32_t, input, [64]);
+ int i, j;
+
+ if (!subband_samples_hi)
+ memset(&input[32], 0, sizeof(input[0]) * 32);
+
+ for (j = 0; j < npcmblocks; j++) {
+ // Load in one sample from each subband
+ if (subband_samples_hi) {
+ // Full 64 subbands, first 32 are residual coded
+ for (i = 0; i < 32; i++)
+ input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
+ for (i = 32; i < 64; i++)
+ input[i] = subband_samples_hi[i][j];
+ } else {
+ // Only first 32 subbands
+ for (i = 0; i < 32; i++)
+ input[i] = subband_samples_lo[i][j];
+ }
+
+ // One subband sample generates 64 interpolated ones
+ synth->synth_filter_fixed_64(imdct, hist1, offset,
+ hist2, filter_coeff,
+ pcm_samples, input);
+ pcm_samples += 64;
+ }
+}
+
+static void decor_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] += src[i] * coeff + (1 << 2) >> 3;
+}
+
+static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
+ const int32_t *src, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
+ dst1[i] -= cs;
+ dst2[i] -= cs;
+ }
+}
+
+static void dmix_sub_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] -= mul15(src[i], coeff);
+}
+
+static void dmix_add_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] += mul15(src[i], coeff);
+}
+
+static void dmix_scale_c(int32_t *dst, intptr_t scale, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] = mul15(dst[i], scale);
+}
+
+static void dmix_scale_inv_c(int32_t *dst, intptr_t scale_inv, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] = mul16(dst[i], scale_inv);
+}
+
+static void filter0(int32_t *dst, const int32_t *src, int32_t coeff, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] -= mul22(src[i], coeff);
+}
+
+static void filter1(int32_t *dst, const int32_t *src, int32_t coeff, intptr_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] -= mul23(src[i], coeff);
+}
+
+static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
+ const int32_t *coeff, intptr_t len)
+{
+ int i;
+
+ filter0(src0, src1, coeff[0], len);
+ filter0(src1, src0, coeff[1], len);
+ filter0(src0, src1, coeff[2], len);
+ filter0(src1, src0, coeff[3], len);
+
+ for (i = 0; i < 8; i++, src0--) {
+ filter1(src0, src1, coeff[i + 4], len);
+ filter1(src1, src0, coeff[i + 12], len);
+ filter1(src0, src1, coeff[i + 4], len);
+ }
+
+ for (i = 0; i < len; i++) {
+ *dst++ = *src1++;
+ *dst++ = *++src0;
+ }
+}
+
+av_cold void ff_dcadsp_init(DCADSPContext *s)
+{
+ s->decode_hf = decode_hf_c;
+ s->decode_joint = decode_joint_c;
+
+ s->lfe_fir_float[0] = lfe_fir1_float_c;
+ s->lfe_fir_float[1] = lfe_fir2_float_c;
+ s->lfe_x96_float = lfe_x96_float_c;
+ s->sub_qmf_float[0] = sub_qmf32_float_c;
+ s->sub_qmf_float[1] = sub_qmf64_float_c;
+
+ s->lfe_fir_fixed = lfe_fir_fixed_c;
+ s->lfe_x96_fixed = lfe_x96_fixed_c;
+ s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
+ s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
+
+ s->decor = decor_c;
+
+ s->dmix_sub_xch = dmix_sub_xch_c;
+ s->dmix_sub = dmix_sub_c;
+ s->dmix_add = dmix_add_c;
+ s->dmix_scale = dmix_scale_c;
+ s->dmix_scale_inv = dmix_scale_inv_c;
+
+ s->assemble_freq_bands = assemble_freq_bands_c;
+}
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
new file mode 100644
index 0000000..d8acf37
--- /dev/null
+++ b/libavcodec/dcadsp.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCADSP_H
+#define AVCODEC_DCADSP_H
+
+#include "libavutil/common.h"
+
+#include "fft.h"
+#include "dcadct.h"
+#include "synth_filter.h"
+
+typedef struct DCADSPContext {
+ void (*decode_hf)(int32_t **dst,
+ const int32_t *vq_index,
+ const int8_t hf_vq[1024][32],
+ int32_t scale_factors[32][2],
+ intptr_t sb_start, intptr_t sb_end,
+ intptr_t ofs, intptr_t len);
+
+ void (*decode_joint)(int32_t **dst, int32_t **src,
+ const int32_t *scale_factors,
+ intptr_t sb_start, intptr_t sb_end,
+ intptr_t ofs, intptr_t len);
+
+ void (*lfe_fir_float[2])(float *pcm_samples, int32_t *lfe_samples,
+ const float *filter_coeff, intptr_t npcmblocks);
+
+ void (*lfe_x96_float)(float *dst, const float *src,
+ float *hist, intptr_t len);
+
+ void (*sub_qmf_float[2])(SynthFilterContext *synth,
+ FFTContext *imdct,
+ float *pcm_samples,
+ int32_t **subband_samples_lo,
+ int32_t **subband_samples_hi,
+ float *hist1, int *offset, float *hist2,
+ const float *filter_coeff, intptr_t npcmblocks,
+ float scale);
+
+ void (*lfe_fir_fixed)(int32_t *pcm_samples, int32_t *lfe_samples,
+ const int32_t *filter_coeff, intptr_t npcmblocks);
+
+ void (*lfe_x96_fixed)(int32_t *dst, const int32_t *src,
+ int32_t *hist, intptr_t len);
+
+ void (*sub_qmf_fixed[2])(SynthFilterContext *synth,
+ DCADCTContext *imdct,
+ int32_t *pcm_samples,
+ int32_t **subband_samples_lo,
+ int32_t **subband_samples_hi,
+ int32_t *hist1, int *offset, int32_t *hist2,
+ const int32_t *filter_coeff, intptr_t npcmblocks);
+
+ void (*decor)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len);
+
+ void (*dmix_sub_xch)(int32_t *dst1, int32_t *dst2,
+ const int32_t *src, intptr_t len);
+
+ void (*dmix_sub)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len);
+
+ void (*dmix_add)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len);
+
+ void (*dmix_scale)(int32_t *dst, intptr_t scale, intptr_t len);
+
+ void (*dmix_scale_inv)(int32_t *dst, intptr_t scale_inv, intptr_t len);
+
+ void (*assemble_freq_bands)(int32_t *dst, int32_t *src0, int32_t *src1,
+ const int32_t *coeff, intptr_t len);
+} DCADSPContext;
+
+av_cold void ff_dcadsp_init(DCADSPContext *s);
+
+#endif
--
2.1.4
More information about the ffmpeg-devel
mailing list