[FFmpeg-devel] [RFC/PATCH] More flexible variafloat_to_int16 , WMA optimization, Vorbis
Loren Merritt
lorenm
Tue Jul 15 16:58:23 CEST 2008
On Tue, 15 Jul 2008, Michael Niedermayer wrote:
> May i suggest an array of src pointers instead of stride?
> Reason is, if we want to use this function in the future in a generic
> converter stride will not be enough because we likely will have a
> array of source pointers from the user.
> Besides it would allow reordering channels.
done. (wma part breaks with c version, since it doesn't use [384,386] bias yet)
> It also might be worth to look at mplayer/liba52/resample_mmx.c, maybe some
> of that code could be reused. Especially as we do not have a MMX
> float_to_int16, besides the trick used could be tried with SSE2.
I'm not very interested in optimizing for pentium2 / k6-1. I'm not sure I
could, anyway; that's so far removed from anything I can benchmark on.
--Loren Merritt
-------------- next part --------------
Index: dsputil.c
===================================================================
--- dsputil.c (revision 14207)
+++ dsputil.c (working copy)
@@ -3962,17 +3962,17 @@
dst[i] = float_to_int16_one(src+i);
}
-void ff_float_to_int16_interleave_c(int16_t *dst, const float *src, long len, int channels){
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
int i,j,c;
if(channels==2){
for(i=0; i<len; i++){
- dst[2*i] = float_to_int16_one(src+i);
- dst[2*i+1] = float_to_int16_one(src+i+len);
+ dst[2*i] = float_to_int16_one(src[0]+i);
+ dst[2*i+1] = float_to_int16_one(src[1]+i);
}
}else{
- for(c=0; c<channels; c++, src+=len)
+ for(c=0; c<channels; c++)
for(i=0, j=c; i<len; i++, j+=channels)
- dst[j] = float_to_int16_one(src+i);
+ dst[j] = float_to_int16_one(src[c]+i);
}
}
Index: dsputil.h
===================================================================
--- dsputil.h (revision 14207)
+++ dsputil.h (working copy)
@@ -372,7 +372,7 @@
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
void (*float_to_int16)(int16_t *dst, const float *src, long len);
- void (*float_to_int16_interleave)(int16_t *dst, const float *src, long len, int channels);
+ void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
Index: i386/dsputil_mmx.c
===================================================================
--- i386/dsputil_mmx.c (revision 14236)
+++ i386/dsputil_mmx.c (working copy)
@@ -2156,32 +2156,32 @@
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
-static av_noinline void float_to_int16_interleave2_##cpu(int16_t *dst, const float *src, long len, int channels){\
- DECLARE_ALIGNED_16(int16_t, tmp[len*channels]);\
+static av_noinline void float_to_int16_interleave2_##cpu(int16_t *dst, const float **src, long len, int channels){\
+ DECLARE_ALIGNED_16(int16_t, tmp[len]);\
int i,j,c;\
- float_to_int16_##cpu(tmp, src, len*channels);\
for(c=0; c<channels; c++){\
- int16_t *ptmp = tmp+c*len;\
+ float_to_int16_##cpu(tmp, src[c], len);\
for(i=0, j=c; i<len; i++, j+=channels)\
- dst[j] = ptmp[i];\
+ dst[j] = tmp[i];\
}\
}\
\
-static void float_to_int16_interleave_##cpu(int16_t *dst, const float *src, long len, int channels){\
+static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\
- float_to_int16_##cpu(dst, src, len);\
+ float_to_int16_##cpu(dst, src[0], len);\
else if(channels>2)\
float_to_int16_interleave2_##cpu(dst, src, len, channels);\
else{\
- float *src1;\
+ const float *src0 = src[0];\
+ const float *src1 = src[1];\
asm volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
- "lea (%2,%0), %3 \n"\
+ "add %0, %3 \n"\
"neg %0 \n"\
body\
- :"+r"(len), "+r"(dst), "+r"(src), "=r"(src1)\
+ :"+r"(len), "+r"(dst), "+r"(src0), "+r"(src1)\
);\
}\
}
Index: vorbis_dec.c
===================================================================
--- vorbis_dec.c (revision 14207)
+++ vorbis_dec.c (working copy)
@@ -1551,6 +1551,8 @@
{
vorbis_context *vc = avccontext->priv_data ;
GetBitContext *gb = &(vc->gb);
+ const float *channel_ptrs[vc->audio_channels];
+ int i;
int_fast16_t len;
@@ -1577,7 +1579,9 @@
AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
- vc->dsp.float_to_int16_interleave(data, vc->ret, len, vc->audio_channels);
+ for(i=0; i<vc->audio_channels; i++)
+ channel_ptrs[i] = vc->ret+i*len;
+ vc->dsp.float_to_int16_interleave(data, channel_ptrs, len, vc->audio_channels);
*data_size=len*2*vc->audio_channels;
return buf_size ;
Index: wmadec.c
===================================================================
--- wmadec.c (revision 14207)
+++ wmadec.c (working copy)
@@ -715,9 +715,8 @@
/* decode a frame of frame_len samples */
static int wma_decode_frame(WMACodecContext *s, int16_t *samples)
{
- int ret, i, n, ch, incr;
- int16_t *ptr;
- float *iptr;
+ int ret, ch;
+ const float *channel_ptrs[s->nb_channels];
#ifdef TRACE
tprintf(s->avctx, "***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len);
@@ -734,19 +733,13 @@
break;
}
- /* convert frame to integer */
- n = s->frame_len;
- incr = s->nb_channels;
- for(ch = 0; ch < s->nb_channels; ch++) {
- ptr = samples + ch;
- iptr = s->frame_out[ch];
+ for(ch=0; ch<s->nb_channels; ch++)
+ channel_ptrs[ch] = s->frame_out[ch];
+ s->dsp.float_to_int16_interleave(samples, channel_ptrs, s->frame_len, s->nb_channels);
- for(i=0;i<n;i++) {
- *ptr = av_clip_int16(lrintf(*iptr++));
- ptr += incr;
- }
+ for(ch = 0; ch < s->nb_channels; ch++) {
/* prepare for next block */
- memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
+ memcpy(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
s->frame_len * sizeof(float));
}
More information about the ffmpeg-devel
mailing list