[FFmpeg-devel] [PATCH] avcodec/takdec: add x86 SIMD for rest of decorrelation modes
Paul B Mahol
onemda at gmail.com
Tue Oct 6 01:04:24 CEST 2015
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavcodec/Makefile | 2 +-
libavcodec/takdec.c | 44 ++++++++----------
libavcodec/takdsp.c | 82 +++++++++++++++++++++++++++++++++
libavcodec/takdsp.h | 34 ++++++++++++++
libavcodec/x86/Makefile | 2 +
libavcodec/x86/takdsp.asm | 105 +++++++++++++++++++++++++++++++++++++++++++
libavcodec/x86/takdsp_init.c | 45 +++++++++++++++++++
7 files changed, 288 insertions(+), 26 deletions(-)
create mode 100644 libavcodec/takdsp.c
create mode 100644 libavcodec/takdsp.h
create mode 100644 libavcodec/x86/takdsp.asm
create mode 100644 libavcodec/x86/takdsp_init.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9075077..60491ce 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -491,7 +491,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \
h263.o ituh263enc.o
OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o
OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o
-OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o
+OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o takdsp.o
OBJS-$(CONFIG_TARGA_DECODER) += targa.o
OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o
OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o
diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c
index 5395596..e5c0723 100644
--- a/libavcodec/takdec.c
+++ b/libavcodec/takdec.c
@@ -28,6 +28,7 @@
#include "libavutil/internal.h"
#include "libavutil/samplefmt.h"
#include "tak.h"
+#include "takdsp.h"
#include "audiodsp.h"
#include "thread.h"
#include "avcodec.h"
@@ -47,6 +48,7 @@ typedef struct MCDParam {
typedef struct TAKDecContext {
AVCodecContext *avctx; ///< parent AVCodecContext
AudioDSPContext adsp;
+ TAKDSPContext tdsp;
TAKStreamInfo ti;
GetBitContext gb; ///< bitstream reader initialized to start at the current frame
@@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
TAKDecContext *s = avctx->priv_data;
ff_audiodsp_init(&s->adsp);
+ ff_takdsp_init(&s->tdsp);
s->avctx = avctx;
avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
@@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan)
static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
{
GetBitContext *gb = &s->gb;
- int32_t *p1 = s->decoded[c1] + 1;
- int32_t *p2 = s->decoded[c2] + 1;
+ int32_t *p1 = s->decoded[c1] + (s->dmode > 5);
+ int32_t *p2 = s->decoded[c2] + (s->dmode > 5);
+ int32_t bp1 = p1[0];
+ int32_t bp2 = p2[0];
int i;
int dshift, dfactor;
+ length += s->dmode < 6;
+
switch (s->dmode) {
case 1: /* left/side */
- for (i = 0; i < length; i++) {
- int32_t a = p1[i];
- int32_t b = p2[i];
- p2[i] = a + b;
- }
+ s->tdsp.decorrelate_ls(p1, p2, length);
break;
case 2: /* side/right */
- for (i = 0; i < length; i++) {
- int32_t a = p1[i];
- int32_t b = p2[i];
- p1[i] = b - a;
- }
+ s->tdsp.decorrelate_sr(p1, p2, length);
break;
case 3: /* side/mid */
- for (i = 0; i < length; i++) {
- int32_t a = p1[i];
- int32_t b = p2[i];
- a -= b >> 1;
- p1[i] = a;
- p2[i] = a + b;
- }
+ s->tdsp.decorrelate_sm(p1, p2, length);
break;
case 4: /* side/left with scale factor */
FFSWAP(int32_t*, p1, p2);
+ FFSWAP(int32_t, bp1, bp2);
case 5: /* side/right with scale factor */
dshift = get_bits_esc4(gb);
dfactor = get_sbits(gb, 10);
- for (i = 0; i < length; i++) {
- int32_t a = p1[i];
- int32_t b = p2[i];
- b = dfactor * (b >> dshift) + 128 >> 8 << dshift;
- p1[i] = b - a;
- }
+ s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor);
break;
case 6:
FFSWAP(int32_t*, p1, p2);
@@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
}
}
+ if (s->dmode > 0 && s->dmode < 6) {
+ p1[0] = bp1;
+ p2[0] = bp2;
+ }
+
return 0;
}
diff --git a/libavcodec/takdsp.c b/libavcodec/takdsp.c
new file mode 100644
index 0000000..2441c2b
--- /dev/null
+++ b/libavcodec/takdsp.c
@@ -0,0 +1,82 @@
+/*
+ * TAK decoder
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "takdsp.h"
+#include "config.h"
+
+static void decorrelate_ls(int32_t *p1, int32_t *p2, int length)
+{
+ int i;
+
+ for (i = 0; i < length; i++) {
+ int32_t a = p1[i];
+ int32_t b = p2[i];
+ p2[i] = a + b;
+ }
+}
+
+static void decorrelate_sr(int32_t *p1, int32_t *p2, int length)
+{
+ int i;
+
+ for (i = 0; i < length; i++) {
+ int32_t a = p1[i];
+ int32_t b = p2[i];
+ p1[i] = b - a;
+ }
+}
+
+static void decorrelate_sm(int32_t *p1, int32_t *p2, int length)
+{
+ int i;
+
+ for (i = 0; i < length; i++) {
+ int32_t a = p1[i];
+ int32_t b = p2[i];
+ a -= b >> 1;
+ p1[i] = a;
+ p2[i] = a + b;
+ }
+}
+
+static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor)
+{
+ int i;
+
+ for (i = 0; i < length; i++) {
+ int32_t a = p1[i];
+ int32_t b = p2[i];
+ b = dfactor * (b >> dshift) + 128 >> 8 << dshift;
+ p1[i] = b - a;
+ }
+}
+
+av_cold void ff_takdsp_init(TAKDSPContext *c)
+{
+ c->decorrelate_ls = decorrelate_ls;
+ c->decorrelate_sr = decorrelate_sr;
+ c->decorrelate_sm = decorrelate_sm;
+ c->decorrelate_sf = decorrelate_sf;
+
+ if (ARCH_X86)
+ ff_takdsp_init_x86(c);
+}
diff --git a/libavcodec/takdsp.h b/libavcodec/takdsp.h
new file mode 100644
index 0000000..c05b574
--- /dev/null
+++ b/libavcodec/takdsp.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_TAKDSP_H
+#define AVCODEC_TAKDSP_H
+
+#include <stdint.h>
+
+typedef struct TAKDSPContext {
+ void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length);
+ void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length);
+ void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length);
+ void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+} TAKDSPContext;
+
+void ff_takdsp_init(TAKDSPContext *c);
+void ff_takdsp_init_x86(TAKDSPContext *c);
+
+#endif /* AVCODEC_TAKDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 5ff3a77..7d6ce8a 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -55,6 +55,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
+OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
@@ -150,6 +151,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm
new file mode 100644
index 0000000..bc881bf
--- /dev/null
+++ b/libavcodec/x86/takdsp.asm
@@ -0,0 +1,105 @@
+;******************************************************************************
+;* TAK DSP SIMD optimizations
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pd_128: times 4 dd 128
+
+SECTION .text
+
+INIT_XMM sse2
+cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
+ .loop:
+ mova m0, [p1q+mmsize*0]
+ mova m1, [p1q+mmsize*1]
+ paddd m0, [p2q+mmsize*0]
+ paddd m1, [p2q+mmsize*1]
+ mova [p2q+mmsize*0], m0
+ mova [p2q+mmsize*1], m1
+ add p1q, mmsize*2
+ add p2q, mmsize*2
+ sub lengthd, mmsize/2
+ jg .loop
+ REP_RET
+
+cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
+ .loop:
+ mova m0, [p2q+mmsize*0]
+ mova m1, [p2q+mmsize*1]
+ psubd m0, [p1q+mmsize*0]
+ psubd m1, [p1q+mmsize*1]
+ mova [p1q+mmsize*0], m0
+ mova [p1q+mmsize*1], m1
+ add p1q, mmsize*2
+ add p2q, mmsize*2
+ sub lengthd, mmsize/2
+ jg .loop
+ REP_RET
+
+cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
+ .loop:
+ mova m0, [p1q]
+ mova m1, [p2q]
+ mova m3, [p1q+mmsize]
+ mova m4, [p2q+mmsize]
+ mova m2, m1
+ mova m5, m4
+ psrld m2, 1
+ psrld m5, 1
+ psubd m0, m2
+ psubd m3, m5
+ paddd m1, m0
+ paddd m4, m3
+ mova [p1q], m0
+ mova [p2q], m1
+ mova [p1q+mmsize], m3
+ mova [p2q+mmsize], m4
+ add p1q, mmsize*2
+ add p2q, mmsize*2
+ sub lengthd, mmsize/2
+ jg .loop
+ REP_RET
+
+INIT_XMM sse4
+cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
+ movd m2, dshiftm
+ movd m3, dfactorm
+ pshufd m3, m3, 0
+ mova m4, [pd_128]
+
+ .loop:
+ mova m0, [p1q]
+ mova m1, [p2q]
+ psrld m1, m2
+ pmulld m1, m3
+ paddd m1, m4
+ psrld m1, 8
+ pslld m1, m2
+ psubd m1, m0
+ mova [p1q], m1
+ add p1q, mmsize
+ add p2q, mmsize
+ sub lengthd, mmsize/4
+ jg .loop
+ REP_RET
diff --git a/libavcodec/x86/takdsp_init.c b/libavcodec/x86/takdsp_init.c
new file mode 100644
index 0000000..555d064
--- /dev/null
+++ b/libavcodec/x86/takdsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/takdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+
+av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
+{
+#if HAVE_YASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
+ c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
+ c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
+ }
+
+ if (EXTERNAL_SSE4(cpu_flags)) {
+ c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
+ }
+#endif
+}
--
1.9.1
More information about the ffmpeg-devel
mailing list