[FFmpeg-devel] [PATCH] x86/swr: make int32_to_int32 un/pack_2ch functions SSE

Wed Jan 14 05:53:48 CET 2015

unpack_2ch is already using sse float ops only, and pack_2ch is a trivial change.
Rename both to float_to_float for consistency.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libswresample/x86/audio_convert.asm    | 14 ++++++++------
 libswresample/x86/audio_convert_init.c | 11 +++++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index 1617e0b..c13c26f 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -60,8 +60,8 @@ pack_2ch_%2_to_%1_u_int %+ SUFFIX
     punpcklwd m0, m2
     punpckhwd m1, m2
 %else
-    punpckldq m0, m2
-    punpckhdq m1, m2
+    unpcklps  m0, m2
+    unpckhps  m1, m2
 %endif
     %6 m0,m1,m2,m3,m4,m5
 %else
@@ -597,6 +597,12 @@ PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
 PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
 
 INIT_XMM sse
+PACK_2CH float, float, u, 2, 2, NOP_N, NOP_N
+PACK_2CH float, float, a, 2, 2, NOP_N, NOP_N
+
+UNPACK_2CH float, float, u, 2, 2, NOP_N, NOP_N
+UNPACK_2CH float, float, a, 2, 2, NOP_N, NOP_N
+
 PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
 PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
 
@@ -611,8 +617,6 @@ CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
 
 PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
 PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
-PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
-PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
 PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
 PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
 PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
@@ -620,8 +624,6 @@ PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
 
 UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
 UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
-UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
-UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
 UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
 UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
 UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index bd5f741..f800b4e 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -61,6 +61,13 @@ MULTI_CAPS_FUNC(SSE2, sse2)
         }
     }
     if(EXTERNAL_SSE(mm_flags)) {
+        if(channels == 2) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_float_to_float_a_sse;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_float_to_float_a_sse;
+        }
         if(channels == 6) {
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
                 ac->simd_f =  ff_pack_6ch_float_to_float_a_sse;
@@ -80,8 +87,6 @@ MULTI_CAPS_FUNC(SSE2, sse2)
             ac->simd_f =  ff_float_to_int16_a_sse2;
 
         if(channels == 2) {
-            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
-                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
                 ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
@@ -89,8 +94,6 @@ MULTI_CAPS_FUNC(SSE2, sse2)
             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
                 ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
 
-            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
-                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
                 ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
-- 
2.2.2