[FFmpeg-devel] [PATCH] x86/ttadsp: add ff_ttafilter_process_enc_{ssse3, sse4}
James Almer
jamrial at gmail.com
Mon Aug 1 04:27:46 EEST 2016
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/Makefile | 2 +-
libavcodec/ttadsp.c | 41 ++++++++++++++++++++++++++++++++++++-----
libavcodec/ttadsp.h | 3 +++
libavcodec/ttaenc.c | 38 ++++++--------------------------------
libavcodec/x86/Makefile | 2 ++
libavcodec/x86/ttadsp.asm | 24 ++++++++++++++++--------
libavcodec/x86/ttadsp_init.c | 25 +++++++++++++++++++------
7 files changed, 83 insertions(+), 52 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 33ac2b3..4355c13 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -552,7 +552,7 @@ OBJS-$(CONFIG_TRUESPEECH_DECODER) += truespeech.o
OBJS-$(CONFIG_TSCC_DECODER) += tscc.o msrledec.o
OBJS-$(CONFIG_TSCC2_DECODER) += tscc2.o
OBJS-$(CONFIG_TTA_DECODER) += tta.o ttadata.o ttadsp.o
-OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o
+OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o ttadsp.o
OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o
OBJS-$(CONFIG_TXD_DECODER) += txd.o
OBJS-$(CONFIG_ULTI_DECODER) += ulti.o
diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c
index 30b7ab9..32a87b2 100644
--- a/libavcodec/ttadsp.c
+++ b/libavcodec/ttadsp.c
@@ -18,9 +18,10 @@
#include "ttadsp.h"
-static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
- int32_t *error, int32_t *in, int32_t shift,
- int32_t round) {
+static inline void ttafilter_process(int32_t *qm, int32_t *dx, int32_t *dl,
+ int32_t *error, int32_t *in, int32_t shift,
+ int32_t round, int enc)
+{
if (*error < 0) {
qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3];
qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7];
@@ -40,17 +41,47 @@ static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
dx[6] = ((dl[6] >> 30) | 2) & ~1;
dx[7] = ((dl[7] >> 30) | 4) & ~3;
- *error = *in;
- *in += (round >> shift);
+ if (!enc) {
+ *error = *in;
+ *in += (round >> shift);
+ }
dl[4] = -dl[5]; dl[5] = -dl[6];
dl[6] = *in - dl[7]; dl[7] = *in;
dl[5] += dl[6]; dl[4] += dl[5];
+
+ if (enc) {
+ *in -= (round >> shift);
+ *error = *in;
+ }
+}
+
+#if CONFIG_TTA_DECODER
+static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
+ int32_t *error, int32_t *in, int32_t shift,
+ int32_t round)
+{
+ ttafilter_process(qm, dx, dl, error, in, shift, round, 0);
+}
+#endif
+
+#if CONFIG_TTA_ENCODER
+static void ttafilter_process_enc_c(int32_t *qm, int32_t *dx, int32_t *dl,
+ int32_t *error, int32_t *in, int32_t shift,
+ int32_t round)
+{
+ ttafilter_process(qm, dx, dl, error, in, shift, round, 1);
}
+#endif
av_cold void ff_ttadsp_init(TTADSPContext *c)
{
+#if CONFIG_TTA_DECODER
c->ttafilter_process_dec = ttafilter_process_dec_c;
+#endif
+#if CONFIG_TTA_ENCODER
+ c->ttafilter_process_enc = ttafilter_process_enc_c;
+#endif
if (ARCH_X86)
ff_ttadsp_init_x86(c);
diff --git a/libavcodec/ttadsp.h b/libavcodec/ttadsp.h
index 56930f1..df73998 100644
--- a/libavcodec/ttadsp.h
+++ b/libavcodec/ttadsp.h
@@ -26,6 +26,9 @@ typedef struct TTADSPContext {
void (*ttafilter_process_dec)(int32_t *qm, int32_t *dx, int32_t *dl,
int32_t *error, int32_t *in, int32_t shift,
int32_t round);
+ void (*ttafilter_process_enc)(int32_t *qm, int32_t *dx, int32_t *dl,
+ int32_t *error, int32_t *in, int32_t shift,
+ int32_t round);
} TTADSPContext;
void ff_ttadsp_init(TTADSPContext *c);
diff --git a/libavcodec/ttaenc.c b/libavcodec/ttaenc.c
index 2f1c8db..5ccf98b 100644
--- a/libavcodec/ttaenc.c
+++ b/libavcodec/ttaenc.c
@@ -20,6 +20,7 @@
#define BITSTREAM_WRITER_LE
#include "ttadata.h"
+#include "ttadsp.h"
#include "avcodec.h"
#include "put_bits.h"
#include "internal.h"
@@ -29,6 +30,7 @@ typedef struct TTAEncContext {
const AVCRC *crc_table;
int bps;
TTAChannel *ch_ctx;
+ TTADSPContext dsp;
} TTAEncContext;
static av_cold int tta_encode_init(AVCodecContext *avctx)
@@ -57,38 +59,9 @@ static av_cold int tta_encode_init(AVCodecContext *avctx)
if (!s->ch_ctx)
return AVERROR(ENOMEM);
- return 0;
-}
-
-static inline void ttafilter_process(TTAFilter *c, int32_t *in)
-{
- register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round;
-
- if (c->error < 0) {
- qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3];
- qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7];
- } else if (c->error > 0) {
- qm[0] += dx[0]; qm[1] += dx[1]; qm[2] += dx[2]; qm[3] += dx[3];
- qm[4] += dx[4]; qm[5] += dx[5]; qm[6] += dx[6]; qm[7] += dx[7];
- }
+ ff_ttadsp_init(&s->dsp);
- sum += dl[0] * qm[0] + dl[1] * qm[1] + dl[2] * qm[2] + dl[3] * qm[3] +
- dl[4] * qm[4] + dl[5] * qm[5] + dl[6] * qm[6] + dl[7] * qm[7];
-
- dx[0] = dx[1]; dx[1] = dx[2]; dx[2] = dx[3]; dx[3] = dx[4];
- dl[0] = dl[1]; dl[1] = dl[2]; dl[2] = dl[3]; dl[3] = dl[4];
-
- dx[4] = ((dl[4] >> 30) | 1);
- dx[5] = ((dl[5] >> 30) | 2) & ~1;
- dx[6] = ((dl[6] >> 30) | 2) & ~1;
- dx[7] = ((dl[7] >> 30) | 4) & ~3;
-
- dl[4] = -dl[5]; dl[5] = -dl[6];
- dl[6] = *in - dl[7]; dl[7] = *in;
- dl[5] += dl[6]; dl[4] += dl[5];
-
- *in -= (sum >> c->shift);
- c->error = *in;
+ return 0;
}
static int32_t get_sample(const AVFrame *frame, int sample,
@@ -155,7 +128,8 @@ pkt_alloc:
}
c->predictor = temp;
- ttafilter_process(filter, &value);
+ s->dsp.ttafilter_process_enc(filter->qm, filter->dx, filter->dl, &filter->error, &value,
+ filter->shift, filter->round);
outval = (value > 0) ? (value << 1) - 1: -value << 1;
k = rice->k0;
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 839b5bc..cc2b3c4 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -61,6 +61,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
+OBJS-$(CONFIG_TTA_ENCODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
@@ -160,6 +161,7 @@ YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
+YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttadsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
diff --git a/libavcodec/x86/ttadsp.asm b/libavcodec/x86/ttadsp.asm
index 8f48949..1c664f2 100644
--- a/libavcodec/x86/ttadsp.asm
+++ b/libavcodec/x86/ttadsp.asm
@@ -29,9 +29,9 @@ pd_1224: dd 1, 2, 2, 4
SECTION .text
-%macro TTA_FILTER 2
+%macro TTA_FILTER 3
INIT_XMM %1
-cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
+cglobal ttafilter_process_%2, 5,5,%3, qm, dx, dl, error, in, shift, round
mova m2, [qmq ]
mova m3, [qmq + 0x10]
mova m4, [dxq ]
@@ -94,13 +94,19 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
mova [dlq ], m2
mova [dxq ], m5
mova [dxq + 0x10], m4
- movd m0, [inq] ; filter->error = *in;
- movd [errorq], m0 ;
- movd m2, shiftm ; *in += (sum >> filter->shift);
+ movd m2, shiftm ;
+ movd m0, [inq]
psrad m6, m2 ;
- paddd m0, m6 ;
+%ifidn %2, dec
+ movd [errorq], m0 ; filter->error = *in;
+ paddd m0, m6 ; *in += (sum >> filter->shift);
movd [inq], m0 ;
+%else
+ psubd m3, m0, m6 ;
+ movd [inq], m3 ; *in -= (sum >> filter->shift);
+ movd [errorq], m3 ; filter->error = *in;
+%endif
psrldq m1, 4 ;
pslldq m0, 12 ; filter->dl[4] = -filter->dl[5];
@@ -115,5 +121,7 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
RET
%endmacro
-TTA_FILTER ssse3, 8
-TTA_FILTER sse4, 7
+TTA_FILTER ssse3, dec, 8
+TTA_FILTER sse4, dec, 7
+TTA_FILTER ssse3, enc, 8
+TTA_FILTER sse4, enc, 7
diff --git a/libavcodec/x86/ttadsp_init.c b/libavcodec/x86/ttadsp_init.c
index 47dc87f..75c444c 100644
--- a/libavcodec/x86/ttadsp_init.c
+++ b/libavcodec/x86/ttadsp_init.c
@@ -22,21 +22,34 @@
#include "libavutil/x86/cpu.h"
#include "config.h"
-void ff_ttafilter_process_dec_ssse3(int32_t *qm, int32_t *dx, int32_t *dl,
- int32_t *error, int32_t *in, int32_t shift,
- int32_t round);
-void ff_ttafilter_process_dec_sse4(int32_t *qm, int32_t *dx, int32_t *dl,
- int32_t *error, int32_t *in, int32_t shift,
- int32_t round);
+#define TTAFILTER_PROCESS(opt) \
+void ff_ttafilter_process_dec_##opt(int32_t *qm, int32_t *dx, int32_t *dl, \
+ int32_t *error, int32_t *in, int32_t shift, \
+ int32_t round); \
+void ff_ttafilter_process_enc_##opt(int32_t *qm, int32_t *dx, int32_t *dl, \
+ int32_t *error, int32_t *in, int32_t shift, \
+ int32_t round)
+
+TTAFILTER_PROCESS(ssse3);
+TTAFILTER_PROCESS(sse4);
av_cold void ff_ttadsp_init_x86(TTADSPContext *c)
{
#if HAVE_YASM
int cpu_flags = av_get_cpu_flags();
+#if CONFIG_TTA_DECODER
if (EXTERNAL_SSSE3(cpu_flags))
c->ttafilter_process_dec = ff_ttafilter_process_dec_ssse3;
if (EXTERNAL_SSE4(cpu_flags))
c->ttafilter_process_dec = ff_ttafilter_process_dec_sse4;
#endif
+
+#if CONFIG_TTA_ENCODER
+ if (EXTERNAL_SSSE3(cpu_flags))
+ c->ttafilter_process_enc = ff_ttafilter_process_enc_ssse3;
+ if (EXTERNAL_SSE4(cpu_flags))
+ c->ttafilter_process_enc = ff_ttafilter_process_enc_sse4;
+#endif
+#endif
}
--
2.9.1
More information about the ffmpeg-devel
mailing list