[FFmpeg-devel] [PATCH] avcodec/takdec: add x86 SIMD for rest of decorrelation modes

Paul B Mahol onemda at gmail.com
Tue Oct 6 01:04:24 CEST 2015


Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
 libavcodec/Makefile          |   2 +-
 libavcodec/takdec.c          |  44 ++++++++----------
 libavcodec/takdsp.c          |  82 +++++++++++++++++++++++++++++++++
 libavcodec/takdsp.h          |  34 ++++++++++++++
 libavcodec/x86/Makefile      |   2 +
 libavcodec/x86/takdsp.asm    | 105 +++++++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/takdsp_init.c |  45 +++++++++++++++++++
 7 files changed, 288 insertions(+), 26 deletions(-)
 create mode 100644 libavcodec/takdsp.c
 create mode 100644 libavcodec/takdsp.h
 create mode 100644 libavcodec/x86/takdsp.asm
 create mode 100644 libavcodec/x86/takdsp_init.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9075077..60491ce 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -491,7 +491,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o    \
                                           h263.o ituh263enc.o
 OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o
 OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
-OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o
+OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
 OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
 OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
 OBJS-$(CONFIG_TARGA_Y216_DECODER)      += targa_y216dec.o
diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c
index 5395596..e5c0723 100644
--- a/libavcodec/takdec.c
+++ b/libavcodec/takdec.c
@@ -28,6 +28,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/samplefmt.h"
 #include "tak.h"
+#include "takdsp.h"
 #include "audiodsp.h"
 #include "thread.h"
 #include "avcodec.h"
@@ -47,6 +48,7 @@ typedef struct MCDParam {
 typedef struct TAKDecContext {
     AVCodecContext *avctx;                          ///< parent AVCodecContext
     AudioDSPContext adsp;
+    TAKDSPContext   tdsp;
     TAKStreamInfo   ti;
     GetBitContext   gb;                             ///< bitstream reader initialized to start at the current frame
 
@@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
     TAKDecContext *s = avctx->priv_data;
 
     ff_audiodsp_init(&s->adsp);
+    ff_takdsp_init(&s->tdsp);
 
     s->avctx = avctx;
     avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
@@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan)
 static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
 {
     GetBitContext *gb = &s->gb;
-    int32_t *p1       = s->decoded[c1] + 1;
-    int32_t *p2       = s->decoded[c2] + 1;
+    int32_t *p1       = s->decoded[c1] + (s->dmode > 5);
+    int32_t *p2       = s->decoded[c2] + (s->dmode > 5);
+    int32_t bp1       = p1[0];
+    int32_t bp2       = p2[0];
     int i;
     int dshift, dfactor;
 
+    length += s->dmode < 6;
+
     switch (s->dmode) {
     case 1: /* left/side */
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            p2[i]     = a + b;
-        }
+        s->tdsp.decorrelate_ls(p1, p2, length);
         break;
     case 2: /* side/right */
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            p1[i]     = b - a;
-        }
+        s->tdsp.decorrelate_sr(p1, p2, length);
         break;
     case 3: /* side/mid */
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            a        -= b >> 1;
-            p1[i]     = a;
-            p2[i]     = a + b;
-        }
+        s->tdsp.decorrelate_sm(p1, p2, length);
         break;
     case 4: /* side/left with scale factor */
         FFSWAP(int32_t*, p1, p2);
+        FFSWAP(int32_t, bp1, bp2);
     case 5: /* side/right with scale factor */
         dshift  = get_bits_esc4(gb);
         dfactor = get_sbits(gb, 10);
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            b         = dfactor * (b >> dshift) + 128 >> 8 << dshift;
-            p1[i]     = b - a;
-        }
+        s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor);
         break;
     case 6:
         FFSWAP(int32_t*, p1, p2);
@@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
     }
     }
 
+    if (s->dmode > 0 && s->dmode < 6) {
+        p1[0] = bp1;
+        p2[0] = bp2;
+    }
+
     return 0;
 }
 
diff --git a/libavcodec/takdsp.c b/libavcodec/takdsp.c
new file mode 100644
index 0000000..2441c2b
--- /dev/null
+++ b/libavcodec/takdsp.c
@@ -0,0 +1,82 @@
+/*
+ * TAK decoder
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "takdsp.h"
+#include "config.h"
+
+static void decorrelate_ls(int32_t *p1, int32_t *p2, int length)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        p2[i]     = a + b;
+    }
+}
+
+static void decorrelate_sr(int32_t *p1, int32_t *p2, int length)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        p1[i]     = b - a;
+    }
+}
+
+static void decorrelate_sm(int32_t *p1, int32_t *p2, int length)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        a        -= b >> 1;
+        p1[i]     = a;
+        p2[i]     = a + b;
+    }
+}
+
+static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        b         = dfactor * (b >> dshift) + 128 >> 8 << dshift;
+        p1[i]     = b - a;
+    }
+}
+
+av_cold void ff_takdsp_init(TAKDSPContext *c)
+{
+    c->decorrelate_ls = decorrelate_ls;
+    c->decorrelate_sr = decorrelate_sr;
+    c->decorrelate_sm = decorrelate_sm;
+    c->decorrelate_sf = decorrelate_sf;
+
+    if (ARCH_X86)
+        ff_takdsp_init_x86(c);
+}
diff --git a/libavcodec/takdsp.h b/libavcodec/takdsp.h
new file mode 100644
index 0000000..c05b574
--- /dev/null
+++ b/libavcodec/takdsp.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_TAKDSP_H
+#define AVCODEC_TAKDSP_H
+
+#include <stdint.h>
+
+typedef struct TAKDSPContext {
+    void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length);
+    void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length);
+    void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length);
+    void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+} TAKDSPContext;
+
+void ff_takdsp_init(TAKDSPContext *c);
+void ff_takdsp_init_x86(TAKDSPContext *c);
+
+#endif /* AVCODEC_TAKDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 5ff3a77..7d6ce8a 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -55,6 +55,7 @@ OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
 OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
+OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
 OBJS-$(CONFIG_TTA_DECODER)             += x86/ttadsp_init.o
 OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
@@ -150,6 +151,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv40dsp.o
 YASM-OBJS-$(CONFIG_SVQ1_ENCODER)       += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TAK_DECODER)        += x86/takdsp.o
 YASM-OBJS-$(CONFIG_TRUEHD_DECODER)     += x86/mlpdsp.o
 YASM-OBJS-$(CONFIG_TTA_DECODER)        += x86/ttadsp.o
 YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm
new file mode 100644
index 0000000..bc881bf
--- /dev/null
+++ b/libavcodec/x86/takdsp.asm
@@ -0,0 +1,105 @@
+;******************************************************************************
+;* TAK DSP SIMD optimizations
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pd_128: times 4 dd 128
+
+SECTION .text
+
+INIT_XMM sse2
+cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
+    .loop:
+    mova                 m0, [p1q+mmsize*0]
+    mova                 m1, [p1q+mmsize*1]
+    paddd                m0, [p2q+mmsize*0]
+    paddd                m1, [p2q+mmsize*1]
+    mova     [p2q+mmsize*0], m0
+    mova     [p2q+mmsize*1], m1
+    add                 p1q, mmsize*2
+    add                 p2q, mmsize*2
+    sub             lengthd, mmsize/2
+    jg .loop
+    REP_RET
+
+cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
+    .loop:
+    mova                 m0, [p2q+mmsize*0]
+    mova                 m1, [p2q+mmsize*1]
+    psubd                m0, [p1q+mmsize*0]
+    psubd                m1, [p1q+mmsize*1]
+    mova     [p1q+mmsize*0], m0
+    mova     [p1q+mmsize*1], m1
+    add                 p1q, mmsize*2
+    add                 p2q, mmsize*2
+    sub             lengthd, mmsize/2
+    jg .loop
+    REP_RET
+
+cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
+    .loop:
+    mova                 m0, [p1q]
+    mova                 m1, [p2q]
+    mova                 m3, [p1q+mmsize]
+    mova                 m4, [p2q+mmsize]
+    mova                 m2, m1
+    mova                 m5, m4
+    psrld                m2, 1
+    psrld                m5, 1
+    psubd                m0, m2
+    psubd                m3, m5
+    paddd                m1, m0
+    paddd                m4, m3
+    mova              [p1q], m0
+    mova              [p2q], m1
+    mova       [p1q+mmsize], m3
+    mova       [p2q+mmsize], m4
+    add                 p1q, mmsize*2
+    add                 p2q, mmsize*2
+    sub             lengthd, mmsize/2
+    jg .loop
+    REP_RET
+
+INIT_XMM sse4
+cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
+    movd                 m2, dshiftm
+    movd                 m3, dfactorm
+    pshufd               m3, m3, 0
+    mova                 m4, [pd_128]
+
+    .loop:
+    mova                 m0, [p1q]
+    mova                 m1, [p2q]
+    psrld                m1, m2
+    pmulld               m1, m3
+    paddd                m1, m4
+    psrld                m1, 8
+    pslld                m1, m2
+    psubd                m1, m0
+    mova              [p1q], m1
+    add                 p1q, mmsize
+    add                 p2q, mmsize
+    sub             lengthd, mmsize/4
+    jg .loop
+    REP_RET
diff --git a/libavcodec/x86/takdsp_init.c b/libavcodec/x86/takdsp_init.c
new file mode 100644
index 0000000..555d064
--- /dev/null
+++ b/libavcodec/x86/takdsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/takdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+
+av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
+        c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
+        c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
+    }
+#endif
+}
-- 
1.9.1



More information about the ffmpeg-devel mailing list