[FFmpeg-devel] [PATCH 5/6] diracdec: avx2 dd97
James Darnley
jdarnley at obe.tv
Thu Jul 19 17:52:51 EEST 2018
---
libavcodec/x86/dirac_dwt_10bit.asm | 3 ++-
libavcodec/x86/dirac_dwt_init_10bit.c | 13 +++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/libavcodec/x86/dirac_dwt_10bit.asm b/libavcodec/x86/dirac_dwt_10bit.asm
index ae110d2945..2e039e11ea 100644
--- a/libavcodec/x86/dirac_dwt_10bit.asm
+++ b/libavcodec/x86/dirac_dwt_10bit.asm
@@ -25,7 +25,7 @@ SECTION_RODATA
cextern pd_1
pd_2: times 8 dd 2
-pd_8: times 4 dd 8
+pd_8: times 8 dd 8
SECTION .text
@@ -202,6 +202,7 @@ HAAR_HORIZONTAL
HAAR_VERTICAL
INIT_YMM avx2
+DD97_VERTICAL_HI
HAAR_HORIZONTAL
HAAR_VERTICAL
LEGALL53_VERTICAL_HI
diff --git a/libavcodec/x86/dirac_dwt_init_10bit.c b/libavcodec/x86/dirac_dwt_init_10bit.c
index 51d6eeae93..f103a56176 100644
--- a/libavcodec/x86/dirac_dwt_init_10bit.c
+++ b/libavcodec/x86/dirac_dwt_init_10bit.c
@@ -24,6 +24,7 @@
#include "libavcodec/dirac_dwt.h"
void ff_dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3, int32_t *b4, int width);
+void ff_dd97_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3, int32_t *b4, int width);
void ff_legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
void ff_legall53_vertical_lo_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
@@ -137,7 +138,15 @@ static void dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2,
ff_dd97_vertical_hi_sse2(b0, b1, b2, b3, b4, i);
for(; i<width; i++)
b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]);
+}
+static void dd97_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2,
+ int32_t *b3, int32_t *b4, int width)
+{
+ int i = width & ~7;
+ ff_dd97_vertical_hi_avx2(b0, b1, b2, b3, b4, i);
+ for(; i<width; i++)
+ b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]);
}
av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type)
@@ -179,6 +188,10 @@ av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type)
if (EXTERNAL_AVX2(cpu_flags)) {
switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->vertical_compose_h0 = (void*)dd97_vertical_hi_avx2;
+ d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2;
+ break;
case DWT_DIRAC_LEGALL5_3:
d->vertical_compose_h0 = (void*)legall53_vertical_hi_avx2;
d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2;
--
2.17.1
More information about the ffmpeg-devel
mailing list