[FFmpeg-devel] [PATCH 1/2] swresample: Refactor resample asm and port it to yasm
James Almer
jamrial at gmail.com
Wed Mar 19 22:45:03 CET 2014
This reduces code duplication and makes it easier to implement new asm
functions in the future
Signed-off-by: James Almer <jamrial at gmail.com>
---
libswresample/resample.c | 96 ++++++++++---------------------------
libswresample/resample_template.c | 49 +++++++------------
libswresample/swresample_internal.h | 24 ++++++++++
libswresample/x86/Makefile | 1 +
libswresample/x86/resample.asm | 64 +++++++++++++++++++++++++
libswresample/x86/resample_mmx.h | 74 ----------------------------
libswresample/x86/swresample_x86.c | 16 +++++++
7 files changed, 148 insertions(+), 176 deletions(-)
create mode 100644 libswresample/x86/resample.asm
delete mode 100644 libswresample/x86/resample_mmx.h
diff --git a/libswresample/resample.c b/libswresample/resample.c
index 034b47a..c7e9c02 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -29,29 +29,6 @@
#include "libavutil/avassert.h"
#include "swresample_internal.h"
-
-typedef struct ResampleContext {
- const AVClass *av_class;
- uint8_t *filter_bank;
- int filter_length;
- int filter_alloc;
- int ideal_dst_incr;
- int dst_incr;
- int index;
- int frac;
- int src_incr;
- int compensation_distance;
- int phase_shift;
- int phase_mask;
- int linear;
- enum SwrFilterType filter_type;
- int kaiser_beta;
- double factor;
- enum AVSampleFormat format;
- int felem_size;
- int filter_shift;
-} ResampleContext;
-
/**
* 0th order modified bessel function of the first kind.
*/
@@ -195,6 +172,22 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
return 0;
}
+#define TEMPLATE_RESAMPLE_S16
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_S16
+
+#define TEMPLATE_RESAMPLE_S32
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_S32
+
+#define TEMPLATE_RESAMPLE_FLT
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_FLT
+
+#define TEMPLATE_RESAMPLE_DBL
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_DBL
+
static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, int kaiser_beta,
double precision, int cheby){
@@ -216,13 +209,19 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
switch(c->format){
case AV_SAMPLE_FMT_S16P:
c->filter_shift = 15;
+ c->scalarproduct = scalarproduct_int16;
break;
case AV_SAMPLE_FMT_S32P:
c->filter_shift = 30;
+ c->scalarproduct = scalarproduct_int32;
break;
case AV_SAMPLE_FMT_FLTP:
+ c->filter_shift = 0;
+ c->scalarproduct = scalarproduct_float;
+ break;
case AV_SAMPLE_FMT_DBLP:
c->filter_shift = 0;
+ c->scalarproduct = scalarproduct_double;
break;
default:
av_log(NULL, AV_LOG_ERROR, "Unsupported sample format\n");
@@ -254,6 +253,9 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
c->index= -phase_count*((c->filter_length-1)/2);
c->frac= 0;
+ if (ARCH_X86)
+ swri_audio_resample_init_x86(c);
+
return c;
error:
av_freep(&c->filter_bank);
@@ -277,62 +279,16 @@ static int set_compensation(ResampleContext *c, int sample_delta, int compensati
return 0;
}
-#define TEMPLATE_RESAMPLE_S16
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S16
-
-#define TEMPLATE_RESAMPLE_S32
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S32
-
-#define TEMPLATE_RESAMPLE_FLT
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_FLT
-
-#define TEMPLATE_RESAMPLE_DBL
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_DBL
-
-// XXX FIXME the whole C loop should be written in asm so this x86 specific code here isnt needed
-#if HAVE_MMXEXT_INLINE
-
-#include "x86/resample_mmx.h"
-
-#define TEMPLATE_RESAMPLE_S16_MMX2
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S16_MMX2
-
-#if HAVE_SSE2_INLINE
-#define TEMPLATE_RESAMPLE_S16_SSE2
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S16_SSE2
-#endif
-
-#endif // HAVE_MMXEXT_INLINE
-
static int multiple_resample(ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed){
int i, ret= -1;
- int av_unused mm_flags = av_get_cpu_flags();
- int need_emms= 0;
for(i=0; i<dst->ch_count; i++){
-#if HAVE_MMXEXT_INLINE
-#if HAVE_SSE2_INLINE
- if(c->format == AV_SAMPLE_FMT_S16P && (mm_flags&AV_CPU_FLAG_SSE2)) ret= swri_resample_int16_sse2 (c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
- else
-#endif
- if(c->format == AV_SAMPLE_FMT_S16P && (mm_flags&AV_CPU_FLAG_MMX2 )){
- ret= swri_resample_int16_mmx2 (c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
- need_emms= 1;
- } else
-#endif
if(c->format == AV_SAMPLE_FMT_S16P) ret= swri_resample_int16(c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
else if(c->format == AV_SAMPLE_FMT_S32P) ret= swri_resample_int32(c, (int32_t*)dst->ch[i], (const int32_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
else if(c->format == AV_SAMPLE_FMT_FLTP) ret= swri_resample_float(c, (float *)dst->ch[i], (const float *)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
else if(c->format == AV_SAMPLE_FMT_DBLP) ret= swri_resample_double(c,(double *)dst->ch[i], (const double *)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
}
- if(need_emms)
- emms_c();
+
return ret;
}
diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c
index f11053d..123786a 100644
--- a/libswresample/resample_template.c
+++ b/libswresample/resample_template.c
@@ -55,10 +55,8 @@
# define OUT(d, v) v = (v + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\
d = (uint64_t)(v + 0x80000000) > 0xFFFFFFFF ? (v>>63) ^ 0x7FFFFFFF : v
-#elif defined(TEMPLATE_RESAMPLE_S16) \
- || defined(TEMPLATE_RESAMPLE_S16_MMX2) \
- || defined(TEMPLATE_RESAMPLE_S16_SSE2)
-
+#elif defined(TEMPLATE_RESAMPLE_S16)
+# define RENAME(N) N ## _int16
# define FILTER_SHIFT 15
# define DELEM int16_t
# define FELEM int16_t
@@ -68,18 +66,21 @@
# define FELEM_MIN INT16_MIN
# define OUT(d, v) v = (v + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\
d = (unsigned)(v + 32768) > 65535 ? (v>>31) ^ 32767 : v
+#endif
-# if defined(TEMPLATE_RESAMPLE_S16)
-# define RENAME(N) N ## _int16
-# elif defined(TEMPLATE_RESAMPLE_S16_MMX2)
-# define COMMON_CORE COMMON_CORE_INT16_MMX2
-# define RENAME(N) N ## _int16_mmx2
-# elif defined(TEMPLATE_RESAMPLE_S16_SSE2)
-# define COMMON_CORE COMMON_CORE_INT16_SSE2
-# define RENAME(N) N ## _int16_sse2
-# endif
+static void RENAME(scalarproduct)(const void *source, void *dest, void *filter, int len)
+{
+ const DELEM *src = (const DELEM*)source;
+ DELEM *dst = (DELEM*)dest;
+ FELEM *flt = (FELEM*)filter;
+ FELEM2 val=0;
+ int i;
-#endif
+ for(i = 0; i < len; i++){
+ val += src[i] * (FELEM2)flt[i];
+ }
+ OUT(*dst, val);
+}
int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int *consumed, int src_size, int dst_size, int update_ctx){
int dst_index, i;
@@ -118,15 +119,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
if(sample_index + c->filter_length > src_size){
break;
}else{
-#ifdef COMMON_CORE
- COMMON_CORE
-#else
- FELEM2 val=0;
- for(i=0; i<c->filter_length; i++){
- val += src[sample_index + i] * (FELEM2)filter[i];
- }
- OUT(dst[dst_index], val);
-#endif
+ c->scalarproduct(src+sample_index, dst+dst_index, filter, c->filter_length);
}
frac += dst_incr_frac;
@@ -162,14 +155,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
val+=(v2-val)*(FELEML)frac / c->src_incr;
OUT(dst[dst_index], val);
}else{
-#ifdef COMMON_CORE
- COMMON_CORE
-#else
- for(i=0; i<c->filter_length; i++){
- val += src[sample_index + i] * (FELEM2)filter[i];
- }
- OUT(dst[dst_index], val);
-#endif
+ c->scalarproduct(src+sample_index, dst+dst_index, filter, c->filter_length);
}
frac += dst_incr_frac;
@@ -204,7 +190,6 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
return dst_index;
}
-#undef COMMON_CORE
#undef RENAME
#undef FILTER_SHIFT
#undef DELEM
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index ab19f21..95a803c 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -66,6 +66,29 @@ struct DitherContext {
int output_sample_bits; ///< the number of used output bits, needed to scale dither correctly
};
+typedef struct ResampleContext {
+ const AVClass *av_class;
+ uint8_t *filter_bank;
+ int filter_length;
+ int filter_alloc;
+ int ideal_dst_incr;
+ int dst_incr;
+ int index;
+ int frac;
+ int src_incr;
+ int compensation_distance;
+ int phase_shift;
+ int phase_mask;
+ int linear;
+ enum SwrFilterType filter_type;
+ int kaiser_beta;
+ double factor;
+ enum AVSampleFormat format;
+ int felem_size;
+ int filter_shift;
+ void (*scalarproduct)(const void *src, void *dst, void *filter, int length);
+} ResampleContext;
+
struct SwrContext {
const AVClass *av_class; ///< AVClass used for AVOption and av_log()
int log_level_offset; ///< logging level offset
@@ -196,4 +219,5 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,
enum AVSampleFormat out_fmt,
enum AVSampleFormat in_fmt,
int channels);
+void swri_audio_resample_init_x86(struct ResampleContext *c);
#endif
diff --git a/libswresample/x86/Makefile b/libswresample/x86/Makefile
index 1d1ab6e..0f8e75d 100644
--- a/libswresample/x86/Makefile
+++ b/libswresample/x86/Makefile
@@ -1,5 +1,6 @@
YASM-OBJS += x86/swresample_x86.o\
x86/audio_convert.o\
x86/rematrix.o\
+ x86/resample.o\
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
new file mode 100644
index 0000000..0204387
--- /dev/null
+++ b/libswresample/x86/resample.asm
@@ -0,0 +1,64 @@
+;******************************************************************************
+;* Copyright (c) 2012 Michael Niedermayer
+;* Copyright (c) 2014 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+
+SECTION_RODATA
+round: dq 0x4000, 0
+
+SECTION .text
+
+%macro RESAMPLE_SCALARPRODUCT_INT16 0
+cglobal resample_scalarproduct_int16, 4,4,2, src, dst, filter, len
+ shl lenq, 1
+ neg lenq
+ sub srcq, lenq
+ sub filterq, lenq
+ mova m0, [round]
+.loop
+ movu m1, [srcq + lenq]
+ pmaddwd m1, [filterq + lenq]
+ paddd m0, m1
+ add lenq, mmsize
+ js .loop
+%if mmsize == 8
+ pshufw m1, m0, 0xe
+ paddd m0, m1
+%else
+ pshufd m1, m0, 0xe
+ paddd m0, m1
+ pshufd m1, m0, 1
+ paddd m0, m1
+%endif
+ psrad m0, 15
+ packssdw m0, m0
+ movd [dstq], m0
+%if mmsize == 8
+ emms
+%endif
+ RET
+%endmacro
+
+INIT_MMX mmxext
+RESAMPLE_SCALARPRODUCT_INT16
+INIT_XMM sse2
+RESAMPLE_SCALARPRODUCT_INT16
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h
deleted file mode 100644
index f366cc7..0000000
--- a/libswresample/x86/resample_mmx.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2012 Michael Niedermayer <michaelni at gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86/asm.h"
-#include "libavutil/cpu.h"
-#include "libswresample/swresample_internal.h"
-
-int swri_resample_int16_mmx2 (struct ResampleContext *c, int16_t *dst, const int16_t *src, int *consumed, int src_size, int dst_size, int update_ctx);
-int swri_resample_int16_sse2 (struct ResampleContext *c, int16_t *dst, const int16_t *src, int *consumed, int src_size, int dst_size, int update_ctx);
-
-DECLARE_ALIGNED(16, const uint64_t, ff_resample_int16_rounder)[2] = { 0x0000000000004000ULL, 0x0000000000000000ULL};
-
-#define COMMON_CORE_INT16_MMX2 \
- x86_reg len= -2*c->filter_length;\
-__asm__ volatile(\
- "movq "MANGLE(ff_resample_int16_rounder)", %%mm0 \n\t"\
- "1: \n\t"\
- "movq (%1, %0), %%mm1 \n\t"\
- "pmaddwd (%2, %0), %%mm1 \n\t"\
- "paddd %%mm1, %%mm0 \n\t"\
- "add $8, %0 \n\t"\
- " js 1b \n\t"\
- "pshufw $0x0E, %%mm0, %%mm1 \n\t"\
- "paddd %%mm1, %%mm0 \n\t"\
- "psrad $15, %%mm0 \n\t"\
- "packssdw %%mm0, %%mm0 \n\t"\
- "movd %%mm0, (%3) \n\t"\
- : "+r" (len)\
- : "r" (((uint8_t*)(src+sample_index))-len),\
- "r" (((uint8_t*)filter)-len),\
- "r" (dst+dst_index)\
- NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
-);
-
-#define COMMON_CORE_INT16_SSE2 \
- x86_reg len= -2*c->filter_length;\
-__asm__ volatile(\
- "movdqa "MANGLE(ff_resample_int16_rounder)", %%xmm0 \n\t"\
- "1: \n\t"\
- "movdqu (%1, %0), %%xmm1 \n\t"\
- "pmaddwd (%2, %0), %%xmm1 \n\t"\
- "paddd %%xmm1, %%xmm0 \n\t"\
- "add $16, %0 \n\t"\
- " js 1b \n\t"\
- "pshufd $0x0E, %%xmm0, %%xmm1 \n\t"\
- "paddd %%xmm1, %%xmm0 \n\t"\
- "pshufd $0x01, %%xmm0, %%xmm1 \n\t"\
- "paddd %%xmm1, %%xmm0 \n\t"\
- "psrad $15, %%xmm0 \n\t"\
- "packssdw %%xmm0, %%xmm0 \n\t"\
- "movd %%xmm0, (%3) \n\t"\
- : "+r" (len)\
- : "r" (((uint8_t*)(src+sample_index))-len),\
- "r" (((uint8_t*)filter)-len),\
- "r" (dst+dst_index)\
- NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
-);
diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c
index 7483ba0..f38b069 100644
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/x86/cpu.h"
#include "libswresample/swresample_internal.h"
#include "libswresample/audioconvert.h"
@@ -198,3 +199,18 @@ av_cold void swri_rematrix_init_x86(struct SwrContext *s){
memcpy(s->native_simd_one, s->native_one, sizeof(float));
}
}
+
+void ff_resample_scalarproduct_int16_mmxext(const void *src, void *dst, void *filter, int length);
+void ff_resample_scalarproduct_int16_sse2 (const void *src, void *dst, void *filter, int length);
+
+void swri_audio_resample_init_x86(ResampleContext *c)
+{
+ int cpuflags = av_get_cpu_flags();
+
+ if (c->format == AV_SAMPLE_FMT_S16P) {
+ if (EXTERNAL_MMXEXT(cpuflags))
+ c->scalarproduct = ff_resample_scalarproduct_int16_mmxext;
+ if (EXTERNAL_SSE2(cpuflags))
+ c->scalarproduct = ff_resample_scalarproduct_int16_sse2;
+ }
+}
--
1.8.3.2
More information about the ffmpeg-devel
mailing list