[FFmpeg-cvslog] Merge commit '43717469f9daa402f6acb48997255827a56034e9'

Clément Bœsch git at videolan.org
Wed Mar 22 12:30:31 EET 2017


ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Wed Mar 22 11:11:28 2017 +0100| [e39d4ff150f45d82aabafa5a34f5c9ec7a829d15] | committer: Clément Bœsch

Merge commit '43717469f9daa402f6acb48997255827a56034e9'

* commit '43717469f9daa402f6acb48997255827a56034e9':
  ac3dsp: Reverse matrix in/out order in downmix()

Merged-by: Clément Bœsch <u at pkh.me>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e39d4ff150f45d82aabafa5a34f5c9ec7a829d15
---

 libavcodec/ac3dec.c          | 46 +++++++++++++++++++++++++++++---------------
 libavcodec/ac3dec.h          |  2 +-
 libavcodec/ac3dec_fixed.c    |  8 ++++----
 libavcodec/ac3dsp.c          | 16 +++++++--------
 libavcodec/ac3dsp.h          |  4 ++--
 libavcodec/x86/ac3dsp_init.c | 36 ++++++++++++++++++----------------
 6 files changed, 64 insertions(+), 48 deletions(-)

diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 003ce17..f9bab94 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -362,52 +362,62 @@ static int parse_frame_header(AC3DecodeContext *s)
  * Set stereo downmixing coefficients based on frame header info.
  * reference: Section 7.8.2 Downmixing Into Two Channels
  */
-static void set_downmix_coeffs(AC3DecodeContext *s)
+static int set_downmix_coeffs(AC3DecodeContext *s)
 {
     int i;
     float cmix = gain_levels[s->  center_mix_level];
     float smix = gain_levels[s->surround_mix_level];
     float norm0, norm1;
-    float downmix_coeffs[AC3_MAX_CHANNELS][2];
+    float downmix_coeffs[2][AC3_MAX_CHANNELS];
+
+    if (!s->downmix_coeffs[0]) {
+        s->downmix_coeffs[0] = av_malloc_array(2 * AC3_MAX_CHANNELS,
+                                               sizeof(**s->downmix_coeffs));
+        if (!s->downmix_coeffs[0])
+            return AVERROR(ENOMEM);
+        s->downmix_coeffs[1] = s->downmix_coeffs[0] + AC3_MAX_CHANNELS;
+    }
 
     for (i = 0; i < s->fbw_channels; i++) {
-        downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
-        downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
+        downmix_coeffs[0][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
+        downmix_coeffs[1][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
     }
     if (s->channel_mode > 1 && s->channel_mode & 1) {
-        downmix_coeffs[1][0] = downmix_coeffs[1][1] = cmix;
+        downmix_coeffs[0][1] = downmix_coeffs[1][1] = cmix;
     }
     if (s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
         int nf = s->channel_mode - 2;
-        downmix_coeffs[nf][0] = downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB;
+        downmix_coeffs[0][nf] = downmix_coeffs[1][nf] = smix * LEVEL_MINUS_3DB;
     }
     if (s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
         int nf = s->channel_mode - 4;
-        downmix_coeffs[nf][0] = downmix_coeffs[nf+1][1] = smix;
+        downmix_coeffs[0][nf] = downmix_coeffs[1][nf+1] = smix;
     }
 
     /* renormalize */
     norm0 = norm1 = 0.0;
     for (i = 0; i < s->fbw_channels; i++) {
-        norm0 += downmix_coeffs[i][0];
-        norm1 += downmix_coeffs[i][1];
+        norm0 += downmix_coeffs[0][i];
+        norm1 += downmix_coeffs[1][i];
     }
     norm0 = 1.0f / norm0;
     norm1 = 1.0f / norm1;
     for (i = 0; i < s->fbw_channels; i++) {
-        downmix_coeffs[i][0] *= norm0;
-        downmix_coeffs[i][1] *= norm1;
+        downmix_coeffs[0][i] *= norm0;
+        downmix_coeffs[1][i] *= norm1;
     }
 
     if (s->output_mode == AC3_CHMODE_MONO) {
         for (i = 0; i < s->fbw_channels; i++)
-            downmix_coeffs[i][0] = (downmix_coeffs[i][0] +
-                                    downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
+            downmix_coeffs[0][i] = (downmix_coeffs[0][i] +
+                                    downmix_coeffs[1][i]) * LEVEL_MINUS_3DB;
     }
     for (i = 0; i < s->fbw_channels; i++) {
-        s->downmix_coeffs[i][0] = FIXR12(downmix_coeffs[i][0]);
-        s->downmix_coeffs[i][1] = FIXR12(downmix_coeffs[i][1]);
+        s->downmix_coeffs[0][i] = FIXR12(downmix_coeffs[0][i]);
+        s->downmix_coeffs[1][i] = FIXR12(downmix_coeffs[1][i]);
     }
+
+    return 0;
 }
 
 /**
@@ -1562,7 +1572,10 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
         /* set downmixing coefficients if needed */
         if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
                 s->fbw_channels == s->out_channels)) {
-            set_downmix_coeffs(s);
+            if ((ret = set_downmix_coeffs(s)) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "error setting downmix coeffs\n");
+                return ret;
+            }
         }
     } else if (!s->channels) {
         av_log(avctx, AV_LOG_ERROR, "unable to determine channel mode\n");
@@ -1685,6 +1698,7 @@ static av_cold int ac3_decode_end(AVCodecContext *avctx)
     ff_mdct_end(&s->imdct_512);
     ff_mdct_end(&s->imdct_256);
     av_freep(&s->fdsp);
+    av_freep(&s->downmix_coeffs[0]);
 
     return 0;
 }
diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index 495e9a6..bac661c 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -159,7 +159,7 @@ typedef struct AC3DecodeContext {
     int fbw_channels;                           ///< number of full-bandwidth channels
     int channels;                               ///< number of total channels
     int lfe_ch;                                 ///< index of LFE channel
-    SHORTFLOAT downmix_coeffs[AC3_MAX_CHANNELS][2];  ///< stereo downmix coefficients
+    SHORTFLOAT *downmix_coeffs[2];              ///< stereo downmix coefficients
     int downmixed;                              ///< indicates if coeffs are currently downmixed
     int output_mode;                            ///< output channel configuration
     int out_channels;                           ///< number of output channels
diff --git a/libavcodec/ac3dec_fixed.c b/libavcodec/ac3dec_fixed.c
index 1f79ade..682fe93 100644
--- a/libavcodec/ac3dec_fixed.c
+++ b/libavcodec/ac3dec_fixed.c
@@ -139,7 +139,7 @@ static void scale_coefs (
  * Downmix samples from original signal to stereo or mono (this is for 16-bit samples
  * and fixed point decoder - original (for 32-bit samples) is in ac3dsp.c).
  */
-static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2],
+static void ac3_downmix_c_fixed16(int16_t **samples, int16_t **matrix,
                                   int out_ch, int in_ch, int len)
 {
     int i, j;
@@ -148,8 +148,8 @@ static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2],
         for (i = 0; i < len; i++) {
             v0 = v1 = 0;
             for (j = 0; j < in_ch; j++) {
-                v0 += samples[j][i] * matrix[j][0];
-                v1 += samples[j][i] * matrix[j][1];
+                v0 += samples[j][i] * matrix[0][j];
+                v1 += samples[j][i] * matrix[1][j];
             }
             samples[0][i] = (v0+2048)>>12;
             samples[1][i] = (v1+2048)>>12;
@@ -158,7 +158,7 @@ static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2],
         for (i = 0; i < len; i++) {
             v0 = 0;
             for (j = 0; j < in_ch; j++)
-                v0 += samples[j][i] * matrix[j][0];
+                v0 += samples[j][i] * matrix[0][j];
             samples[0][i] = (v0+2048)>>12;
         }
     }
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 9902f90..23abc56 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -213,7 +213,7 @@ static void ac3_sum_square_butterfly_float_c(float sum[4],
     }
 }
 
-static void ac3_downmix_c(float **samples, float (*matrix)[2],
+static void ac3_downmix_c(float **samples, float **matrix,
                           int out_ch, int in_ch, int len)
 {
     int i, j;
@@ -222,8 +222,8 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2],
         for (i = 0; i < len; i++) {
             v0 = v1 = 0.0f;
             for (j = 0; j < in_ch; j++) {
-                v0 += samples[j][i] * matrix[j][0];
-                v1 += samples[j][i] * matrix[j][1];
+                v0 += samples[j][i] * matrix[0][j];
+                v1 += samples[j][i] * matrix[1][j];
             }
             samples[0][i] = v0;
             samples[1][i] = v1;
@@ -232,13 +232,13 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2],
         for (i = 0; i < len; i++) {
             v0 = 0.0f;
             for (j = 0; j < in_ch; j++)
-                v0 += samples[j][i] * matrix[j][0];
+                v0 += samples[j][i] * matrix[0][j];
             samples[0][i] = v0;
         }
     }
 }
 
-static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2],
+static void ac3_downmix_c_fixed(int32_t **samples, int16_t **matrix,
                                 int out_ch, int in_ch, int len)
 {
     int i, j;
@@ -247,8 +247,8 @@ static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2],
         for (i = 0; i < len; i++) {
             v0 = v1 = 0;
             for (j = 0; j < in_ch; j++) {
-                v0 += (int64_t)samples[j][i] * matrix[j][0];
-                v1 += (int64_t)samples[j][i] * matrix[j][1];
+                v0 += (int64_t)samples[j][i] * matrix[0][j];
+                v1 += (int64_t)samples[j][i] * matrix[1][j];
             }
             samples[0][i] = (v0+2048)>>12;
             samples[1][i] = (v1+2048)>>12;
@@ -257,7 +257,7 @@ static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2],
         for (i = 0; i < len; i++) {
             v0 = 0;
             for (j = 0; j < in_ch; j++)
-                v0 += (int64_t)samples[j][i] * matrix[j][0];
+                v0 += (int64_t)samples[j][i] * matrix[0][j];
             samples[0][i] = (v0+2048)>>12;
         }
     }
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index ed98c8c..b4de307 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -132,10 +132,10 @@ typedef struct AC3DSPContext {
     void (*sum_square_butterfly_float)(float sum[4], const float *coef0,
                                        const float *coef1, int len);
 
-    void (*downmix)(float **samples, float (*matrix)[2], int out_ch,
+    void (*downmix)(float **samples, float **matrix, int out_ch,
                     int in_ch, int len);
 
-    void (*downmix_fixed)(int32_t **samples, int16_t (*matrix)[2], int out_ch,
+    void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int out_ch,
                           int in_ch, int len);
 
     /**
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index 9fd0aef..edb6c60 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -76,8 +76,8 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
 #define MIX5(mono, stereo)                                      \
     __asm__ volatile (                                          \
         "movss           0(%1), %%xmm5          \n"             \
-        "movss           8(%1), %%xmm6          \n"             \
-        "movss          24(%1), %%xmm7          \n"             \
+        "movss           4(%1), %%xmm6          \n"             \
+        "movss          12(%1), %%xmm7          \n"             \
         "shufps     $0, %%xmm5, %%xmm5          \n"             \
         "shufps     $0, %%xmm6, %%xmm6          \n"             \
         "shufps     $0, %%xmm7, %%xmm7          \n"             \
@@ -102,7 +102,7 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
         "add               $16, %0              \n"             \
         "jl                 1b                  \n"             \
         : "+&r"(i)                                              \
-        : "r"(matrix),                                          \
+        : "r"(matrix[0]),                                          \
           "r"(samples[0] + len),                                \
           "r"(samples[1] + len),                                \
           "r"(samples[2] + len),                                \
@@ -146,22 +146,22 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
         : "memory"                                              \
     );
 
-static void ac3_downmix_sse(float **samples, float (*matrix)[2],
+static void ac3_downmix_sse(float **samples, float **matrix,
                             int out_ch, int in_ch, int len)
 {
-    int (*matrix_cmp)[2] = (int(*)[2])matrix;
+    int **matrix_cmp = (int **)matrix;
     intptr_t i, j, k, m;
 
     i = -len * sizeof(float);
     if (in_ch == 5 && out_ch == 2 &&
-        !(matrix_cmp[0][1] | matrix_cmp[2][0]   |
-          matrix_cmp[3][1] | matrix_cmp[4][0]   |
-          (matrix_cmp[1][0] ^ matrix_cmp[1][1]) |
-          (matrix_cmp[0][0] ^ matrix_cmp[2][1]))) {
+        !(matrix_cmp[1][0] | matrix_cmp[0][2]   |
+          matrix_cmp[1][3] | matrix_cmp[0][4]   |
+          (matrix_cmp[0][1] ^ matrix_cmp[1][1]) |
+          (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) {
         MIX5(IF0, IF1);
     } else if (in_ch == 5 && out_ch == 1 &&
-               matrix_cmp[0][0] == matrix_cmp[2][0] &&
-               matrix_cmp[3][0] == matrix_cmp[4][0]) {
+               matrix_cmp[0][0] == matrix_cmp[0][2] &&
+               matrix_cmp[0][3] == matrix_cmp[0][4]) {
         MIX5(IF1, IF0);
     } else {
         LOCAL_ALIGNED(16, float, matrix_simd, [AC3_MAX_CHANNELS], [2][4]);
@@ -171,18 +171,20 @@ static void ac3_downmix_sse(float **samples, float (*matrix)[2],
             samp[j] = samples[j] + len;
 
         j = 2 * in_ch * sizeof(float);
+        k =     in_ch * sizeof(float);
         __asm__ volatile (
             "1:                                 \n"
+            "sub             $4, %1             \n"
             "sub             $8, %0             \n"
-            "movss     (%2, %0), %%xmm4         \n"
-            "movss    4(%2, %0), %%xmm5         \n"
+            "movss     (%3, %1), %%xmm4         \n"
+            "movss     (%4, %1), %%xmm5         \n"
             "shufps          $0, %%xmm4, %%xmm4 \n"
             "shufps          $0, %%xmm5, %%xmm5 \n"
-            "movaps      %%xmm4,   (%1, %0, 4)  \n"
-            "movaps      %%xmm5, 16(%1, %0, 4)  \n"
+            "movaps      %%xmm4,   (%2, %0, 4)  \n"
+            "movaps      %%xmm5, 16(%2, %0, 4)  \n"
             "jg              1b                 \n"
-            : "+&r"(j)
-            : "r"(matrix_simd), "r"(matrix)
+            : "+&r"(j), "+&r"(k)
+            : "r"(matrix_simd), "r"(matrix[0]), "r"(matrix[1])
             : "memory"
         );
         if (out_ch == 2) {


======================================================================

diff --cc libavcodec/ac3dec.c
index 003ce17,aba3119..f9bab94
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@@ -368,46 -338,51 +368,56 @@@ static int set_downmix_coeffs(AC3Decode
      float cmix = gain_levels[s->  center_mix_level];
      float smix = gain_levels[s->surround_mix_level];
      float norm0, norm1;
-     float downmix_coeffs[AC3_MAX_CHANNELS][2];
++    float downmix_coeffs[2][AC3_MAX_CHANNELS];
+ 
+     if (!s->downmix_coeffs[0]) {
 -        s->downmix_coeffs[0] = av_malloc(2 * AC3_MAX_CHANNELS *
 -                                         sizeof(**s->downmix_coeffs));
++        s->downmix_coeffs[0] = av_malloc_array(2 * AC3_MAX_CHANNELS,
++                                               sizeof(**s->downmix_coeffs));
+         if (!s->downmix_coeffs[0])
+             return AVERROR(ENOMEM);
+         s->downmix_coeffs[1] = s->downmix_coeffs[0] + AC3_MAX_CHANNELS;
+     }
  
      for (i = 0; i < s->fbw_channels; i++) {
-         downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
-         downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
 -        s->downmix_coeffs[0][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
 -        s->downmix_coeffs[1][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
++        downmix_coeffs[0][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
++        downmix_coeffs[1][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
      }
      if (s->channel_mode > 1 && s->channel_mode & 1) {
-         downmix_coeffs[1][0] = downmix_coeffs[1][1] = cmix;
 -        s->downmix_coeffs[0][1] = s->downmix_coeffs[1][1] = cmix;
++        downmix_coeffs[0][1] = downmix_coeffs[1][1] = cmix;
      }
      if (s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
          int nf = s->channel_mode - 2;
-         downmix_coeffs[nf][0] = downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB;
 -        s->downmix_coeffs[0][nf] = s->downmix_coeffs[1][nf] = smix * LEVEL_MINUS_3DB;
++        downmix_coeffs[0][nf] = downmix_coeffs[1][nf] = smix * LEVEL_MINUS_3DB;
      }
      if (s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
          int nf = s->channel_mode - 4;
-         downmix_coeffs[nf][0] = downmix_coeffs[nf+1][1] = smix;
 -        s->downmix_coeffs[0][nf] = s->downmix_coeffs[1][nf+1] = smix;
++        downmix_coeffs[0][nf] = downmix_coeffs[1][nf+1] = smix;
      }
  
      /* renormalize */
      norm0 = norm1 = 0.0;
      for (i = 0; i < s->fbw_channels; i++) {
-         norm0 += downmix_coeffs[i][0];
-         norm1 += downmix_coeffs[i][1];
 -        norm0 += s->downmix_coeffs[0][i];
 -        norm1 += s->downmix_coeffs[1][i];
++        norm0 += downmix_coeffs[0][i];
++        norm1 += downmix_coeffs[1][i];
      }
      norm0 = 1.0f / norm0;
      norm1 = 1.0f / norm1;
      for (i = 0; i < s->fbw_channels; i++) {
-         downmix_coeffs[i][0] *= norm0;
-         downmix_coeffs[i][1] *= norm1;
 -        s->downmix_coeffs[0][i] *= norm0;
 -        s->downmix_coeffs[1][i] *= norm1;
++        downmix_coeffs[0][i] *= norm0;
++        downmix_coeffs[1][i] *= norm1;
      }
  
      if (s->output_mode == AC3_CHMODE_MONO) {
          for (i = 0; i < s->fbw_channels; i++)
-             downmix_coeffs[i][0] = (downmix_coeffs[i][0] +
-                                     downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
 -            s->downmix_coeffs[0][i] = (s->downmix_coeffs[0][i] +
 -                                       s->downmix_coeffs[1][i]) * LEVEL_MINUS_3DB;
++            downmix_coeffs[0][i] = (downmix_coeffs[0][i] +
++                                    downmix_coeffs[1][i]) * LEVEL_MINUS_3DB;
 +    }
 +    for (i = 0; i < s->fbw_channels; i++) {
-         s->downmix_coeffs[i][0] = FIXR12(downmix_coeffs[i][0]);
-         s->downmix_coeffs[i][1] = FIXR12(downmix_coeffs[i][1]);
++        s->downmix_coeffs[0][i] = FIXR12(downmix_coeffs[0][i]);
++        s->downmix_coeffs[1][i] = FIXR12(downmix_coeffs[1][i]);
      }
+ 
+     return 0;
  }
  
  /**
@@@ -1684,7 -1579,7 +1697,8 @@@ static av_cold int ac3_decode_end(AVCod
      AC3DecodeContext *s = avctx->priv_data;
      ff_mdct_end(&s->imdct_512);
      ff_mdct_end(&s->imdct_256);
 +    av_freep(&s->fdsp);
+     av_freep(&s->downmix_coeffs[0]);
  
      return 0;
  }
diff --cc libavcodec/ac3dec.h
index 495e9a6,4a7e281..bac661c
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@@ -159,7 -147,7 +159,7 @@@ typedef struct AC3DecodeContext 
      int fbw_channels;                           ///< number of full-bandwidth channels
      int channels;                               ///< number of total channels
      int lfe_ch;                                 ///< index of LFE channel
-     SHORTFLOAT downmix_coeffs[AC3_MAX_CHANNELS][2];  ///< stereo downmix coefficients
 -    float *downmix_coeffs[2];                   ///< stereo downmix coefficients
++    SHORTFLOAT *downmix_coeffs[2];              ///< stereo downmix coefficients
      int downmixed;                              ///< indicates if coeffs are currently downmixed
      int output_mode;                            ///< output channel configuration
      int out_channels;                           ///< number of output channels
diff --cc libavcodec/ac3dec_fixed.c
index 1f79ade,0000000..682fe93
mode 100644,000000..100644
--- a/libavcodec/ac3dec_fixed.c
+++ b/libavcodec/ac3dec_fixed.c
@@@ -1,197 -1,0 +1,197 @@@
 +/*
 + * Copyright (c) 2012
 + *      MIPS Technologies, Inc., California.
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions
 + * are met:
 + * 1. Redistributions of source code must retain the above copyright
 + *    notice, this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright
 + *    notice, this list of conditions and the following disclaimer in the
 + *    documentation and/or other materials provided with the distribution.
 + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
 + *    contributors may be used to endorse or promote products derived from
 + *    this software without specific prior written permission.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 + * SUCH DAMAGE.
 + *
 + * Author:  Stanislav Ocovaj (socovaj at mips.com)
 + *
 + * AC3 fixed-point decoder for MIPS platforms
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with FFmpeg; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 + */
 +
 +#define FFT_FLOAT 0
 +#define USE_FIXED 1
 +#define FFT_FIXED_32 1
 +#include "ac3dec.h"
 +
 +
 +static const int end_freq_inv_tab[8] =
 +{
 +    50529027, 44278013, 39403370, 32292987, 27356480, 23729101, 20951060, 18755316
 +};
 +
 +static void scale_coefs (
 +    int32_t *dst,
 +    const int32_t *src,
 +    int dynrng,
 +    int len)
 +{
 +    int i, shift, round;
 +    int16_t mul;
 +    int temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 +
 +    mul = (dynrng & 0x1f) + 0x20;
 +    shift = 4 - ((dynrng << 23) >> 28);
 +    if (shift > 0 ) {
 +      round = 1 << (shift-1);
 +      for (i=0; i<len; i+=8) {
 +
 +          temp = src[i] * mul;
 +          temp1 = src[i+1] * mul;
 +          temp = temp + round;
 +          temp2 = src[i+2] * mul;
 +
 +          temp1 = temp1 + round;
 +          dst[i] = temp >> shift;
 +          temp3 = src[i+3] * mul;
 +          temp2 = temp2 + round;
 +
 +          dst[i+1] = temp1 >> shift;
 +          temp4 = src[i + 4] * mul;
 +          temp3 = temp3 + round;
 +          dst[i+2] = temp2 >> shift;
 +
 +          temp5 = src[i+5] * mul;
 +          temp4 = temp4 + round;
 +          dst[i+3] = temp3 >> shift;
 +          temp6 = src[i+6] * mul;
 +
 +          dst[i+4] = temp4 >> shift;
 +          temp5 = temp5 + round;
 +          temp7 = src[i+7] * mul;
 +          temp6 = temp6 + round;
 +
 +          dst[i+5] = temp5 >> shift;
 +          temp7 = temp7 + round;
 +          dst[i+6] = temp6 >> shift;
 +          dst[i+7] = temp7 >> shift;
 +
 +      }
 +    } else {
 +      shift = -shift;
 +      for (i=0; i<len; i+=8) {
 +
 +          temp = src[i] * mul;
 +          temp1 = src[i+1] * mul;
 +          temp2 = src[i+2] * mul;
 +
 +          dst[i] = temp << shift;
 +          temp3 = src[i+3] * mul;
 +
 +          dst[i+1] = temp1 << shift;
 +          temp4 = src[i + 4] * mul;
 +          dst[i+2] = temp2 << shift;
 +
 +          temp5 = src[i+5] * mul;
 +          dst[i+3] = temp3 << shift;
 +          temp6 = src[i+6] * mul;
 +
 +          dst[i+4] = temp4 << shift;
 +          temp7 = src[i+7] * mul;
 +
 +          dst[i+5] = temp5 << shift;
 +          dst[i+6] = temp6 << shift;
 +          dst[i+7] = temp7 << shift;
 +
 +      }
 +    }
 +}
 +
 +/**
 + * Downmix samples from original signal to stereo or mono (this is for 16-bit samples
 + * and fixed point decoder - original (for 32-bit samples) is in ac3dsp.c).
 + */
- static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2],
++static void ac3_downmix_c_fixed16(int16_t **samples, int16_t **matrix,
 +                                  int out_ch, int in_ch, int len)
 +{
 +    int i, j;
 +    int v0, v1;
 +    if (out_ch == 2) {
 +        for (i = 0; i < len; i++) {
 +            v0 = v1 = 0;
 +            for (j = 0; j < in_ch; j++) {
-                 v0 += samples[j][i] * matrix[j][0];
-                 v1 += samples[j][i] * matrix[j][1];
++                v0 += samples[j][i] * matrix[0][j];
++                v1 += samples[j][i] * matrix[1][j];
 +            }
 +            samples[0][i] = (v0+2048)>>12;
 +            samples[1][i] = (v1+2048)>>12;
 +        }
 +    } else if (out_ch == 1) {
 +        for (i = 0; i < len; i++) {
 +            v0 = 0;
 +            for (j = 0; j < in_ch; j++)
-                 v0 += samples[j][i] * matrix[j][0];
++                v0 += samples[j][i] * matrix[0][j];
 +            samples[0][i] = (v0+2048)>>12;
 +        }
 +    }
 +}
 +
 +#include "eac3dec.c"
 +#include "ac3dec.c"
 +
 +static const AVOption options[] = {
 +    { "cons_noisegen", "enable consistent noise generation", OFFSET(consistent_noise_generation), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, PAR },
 +    { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, 0.0, 6.0, PAR },
 +    { "heavy_compr", "enable heavy dynamic range compression", OFFSET(heavy_compression), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, PAR },
 +    { NULL},
 +};
 +
 +static const AVClass ac3_decoder_class = {
 +    .class_name = "Fixed-Point AC-3 Decoder",
 +    .item_name  = av_default_item_name,
 +    .option     = options,
 +    .version    = LIBAVUTIL_VERSION_INT,
 +};
 +
 +AVCodec ff_ac3_fixed_decoder = {
 +    .name           = "ac3_fixed",
 +    .type           = AVMEDIA_TYPE_AUDIO,
 +    .id             = AV_CODEC_ID_AC3,
 +    .priv_data_size = sizeof (AC3DecodeContext),
 +    .init           = ac3_decode_init,
 +    .close          = ac3_decode_end,
 +    .decode         = ac3_decode_frame,
 +    .capabilities   = AV_CODEC_CAP_DR1,
 +    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
 +    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
 +                                                      AV_SAMPLE_FMT_NONE },
 +    .priv_class     = &ac3_decoder_class,
 +};
diff --cc libavcodec/ac3dsp.c
index 9902f90,d1bf37e..23abc56
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@@ -171,49 -171,7 +171,49 @@@ static void ac3_extract_exponents_c(uin
      }
  }
  
 +static void ac3_sum_square_butterfly_int32_c(int64_t sum[4],
 +                                             const int32_t *coef0,
 +                                             const int32_t *coef1,
 +                                             int len)
 +{
 +    int i;
 +
 +    sum[0] = sum[1] = sum[2] = sum[3] = 0;
 +
 +    for (i = 0; i < len; i++) {
 +        int lt = coef0[i];
 +        int rt = coef1[i];
 +        int md = lt + rt;
 +        int sd = lt - rt;
 +        MAC64(sum[0], lt, lt);
 +        MAC64(sum[1], rt, rt);
 +        MAC64(sum[2], md, md);
 +        MAC64(sum[3], sd, sd);
 +    }
 +}
 +
 +static void ac3_sum_square_butterfly_float_c(float sum[4],
 +                                             const float *coef0,
 +                                             const float *coef1,
 +                                             int len)
 +{
 +    int i;
 +
 +    sum[0] = sum[1] = sum[2] = sum[3] = 0;
 +
 +    for (i = 0; i < len; i++) {
 +        float lt = coef0[i];
 +        float rt = coef1[i];
 +        float md = lt + rt;
 +        float sd = lt - rt;
 +        sum[0] += lt * lt;
 +        sum[1] += rt * rt;
 +        sum[2] += md * md;
 +        sum[3] += sd * sd;
 +    }
 +}
 +
- static void ac3_downmix_c(float **samples, float (*matrix)[2],
+ static void ac3_downmix_c(float **samples, float **matrix,
                            int out_ch, int in_ch, int len)
  {
      int i, j;
@@@ -238,31 -196,6 +238,31 @@@
      }
  }
  
- static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2],
++static void ac3_downmix_c_fixed(int32_t **samples, int16_t **matrix,
 +                                int out_ch, int in_ch, int len)
 +{
 +    int i, j;
 +    int64_t v0, v1;
 +    if (out_ch == 2) {
 +        for (i = 0; i < len; i++) {
 +            v0 = v1 = 0;
 +            for (j = 0; j < in_ch; j++) {
-                 v0 += (int64_t)samples[j][i] * matrix[j][0];
-                 v1 += (int64_t)samples[j][i] * matrix[j][1];
++                v0 += (int64_t)samples[j][i] * matrix[0][j];
++                v1 += (int64_t)samples[j][i] * matrix[1][j];
 +            }
 +            samples[0][i] = (v0+2048)>>12;
 +            samples[1][i] = (v1+2048)>>12;
 +        }
 +    } else if (out_ch == 1) {
 +        for (i = 0; i < len; i++) {
 +            v0 = 0;
 +            for (j = 0; j < in_ch; j++)
-                 v0 += (int64_t)samples[j][i] * matrix[j][0];
++                v0 += (int64_t)samples[j][i] * matrix[0][j];
 +            samples[0][i] = (v0+2048)>>12;
 +        }
 +    }
 +}
 +
  static void apply_window_int16_c(int16_t *output, const int16_t *input,
                                   const int16_t *window, unsigned int len)
  {
diff --cc libavcodec/ac3dsp.h
index ed98c8c,cdce21a..b4de307
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@@ -126,18 -126,9 +126,18 @@@ typedef struct AC3DSPContext 
  
      void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
  
 +    void (*sum_square_butterfly_int32)(int64_t sum[4], const int32_t *coef0,
 +                                       const int32_t *coef1, int len);
 +
 +    void (*sum_square_butterfly_float)(float sum[4], const float *coef0,
 +                                       const float *coef1, int len);
 +
-     void (*downmix)(float **samples, float (*matrix)[2], int out_ch,
+     void (*downmix)(float **samples, float **matrix, int out_ch,
                      int in_ch, int len);
  
-     void (*downmix_fixed)(int32_t **samples, int16_t (*matrix)[2], int out_ch,
++    void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int out_ch,
 +                          int in_ch, int len);
 +
      /**
       * Apply symmetric window in 16-bit fixed-point.
       * @param output destination array
diff --cc libavcodec/x86/ac3dsp_init.c
index 9fd0aef,9036389..edb6c60
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@@ -154,17 -149,17 +154,17 @@@ static void ac3_downmix_sse(float **sam
  
      i = -len * sizeof(float);
      if (in_ch == 5 && out_ch == 2 &&
-         !(matrix_cmp[0][1] | matrix_cmp[2][0]   |
-           matrix_cmp[3][1] | matrix_cmp[4][0]   |
-           (matrix_cmp[1][0] ^ matrix_cmp[1][1]) |
-           (matrix_cmp[0][0] ^ matrix_cmp[2][1]))) {
+         !(matrix_cmp[1][0] | matrix_cmp[0][2]   |
+           matrix_cmp[1][3] | matrix_cmp[0][4]   |
+           (matrix_cmp[0][1] ^ matrix_cmp[1][1]) |
+           (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) {
          MIX5(IF0, IF1);
      } else if (in_ch == 5 && out_ch == 1 &&
-                matrix_cmp[0][0] == matrix_cmp[2][0] &&
-                matrix_cmp[3][0] == matrix_cmp[4][0]) {
+                matrix_cmp[0][0] == matrix_cmp[0][2] &&
+                matrix_cmp[0][3] == matrix_cmp[0][4]) {
          MIX5(IF1, IF0);
      } else {
 -        DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
 +        LOCAL_ALIGNED(16, float, matrix_simd, [AC3_MAX_CHANNELS], [2][4]);
          float *samp[AC3_MAX_CHANNELS];
  
          for (j = 0; j < in_ch; j++)



More information about the ffmpeg-cvslog mailing list