[Ffmpeg-devel] [PATCH] Snow mc_block mmx optimization
Oded Shimon
ods15
Fri Mar 24 15:18:04 CET 2006
On Thu, Mar 23, 2006 at 09:43:29PM +0100, Guillaume POIRIER wrote:
> Hi,
>
> On 3/22/06, Oded Shimon <ods15 at ods15.dyndns.org> wrote:
> > My turn!
> >
> > Just for qpel (I think? maybe also for odd resolutions)
> >
> > C version is faster as well, by a factor of ~2, and mmx version is about ~8
> > times faster... md5sums pass. I think total speed increase is 10-20% for
> > files encoded with qpel...
>
> Doesn't apply here...
New patch. Also has the _mmx function names you wanted.
- ods15
-------------- next part --------------
Index: libavcodec/dsputil.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dsputil.c,v
retrieving revision 1.137
diff -u -r1.137 dsputil.c
--- libavcodec/dsputil.c 23 Mar 2006 20:16:35 -0000 1.137
+++ libavcodec/dsputil.c 24 Mar 2006 14:13:27 -0000
@@ -3775,6 +3775,85 @@
static void just_return() { return; }
+static always_inline void mc_block_x(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+ int x, y;
+ if (dy == 0) {
+ b_h -= 5;
+ tmp = dst;
+ src += 2*stride;
+ }
+ if (dx != 0) for (y = 0; y < b_h+5; y++) {
+ for (x = 0; x < b_w; x++) {
+ int a0= src[x ];
+ int a1= src[x + 1];
+ int a2= src[x + 2];
+ int a3= src[x + 3];
+ int a4= src[x + 4];
+ int a5= src[x + 5];
+ int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+
+ if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
+ else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
+
+ if(am&(~255)) am= ~(am>>31);
+
+ tmp[x] = am;
+ }
+ tmp += stride;
+ src += stride;
+ }
+}
+
+static always_inline void mc_block_y(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+ int x, y;
+ if (dx == 0) tmp = (uint8_t*)src + 2;
+ if (dy != 0) for (y = 0; y < b_h; y++) {
+ for (x = 0; x < b_w; x++) {
+ int a0= tmp[x + 0*stride];
+ int a1= tmp[x + 1*stride];
+ int a2= tmp[x + 2*stride];
+ int a3= tmp[x + 3*stride];
+ int a4= tmp[x + 4*stride];
+ int a5= tmp[x + 5*stride];
+ int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+
+ if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
+ else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
+
+ if(am&(~255)) am= ~(am>>31);
+
+ dst[x] = am;
+ }
+ dst += stride;
+ tmp += stride;
+ } else if (dx == 0) { // do nothing! memcpy
+ tmp += 2*stride;
+ for (y = 0; y < b_h; y++) {
+ memcpy(dst, tmp, b_w);
+ dst += stride;
+ tmp += stride;
+ }
+ }
+}
+
+#define mca(a)\
+static void mc_block_x ## a(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){\
+ mc_block_x(dst, src, tmp, stride, b_w, b_h, a, dy);\
+}\
+static void mc_block_y ## a(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){\
+ mc_block_y(dst, src, tmp, stride, b_w, b_h, dx, a);\
+}
+
+mca(0)
+mca(2)
+mca(4)
+mca(6)
+mca(8)
+mca(10)
+mca(12)
+mca(14)
+#undef mca
+
/* init static data */
void dsputil_static_init(void)
{
@@ -4054,6 +4133,24 @@
c->vertical_compose97i = ff_snow_vertical_compose97i;
c->horizontal_compose97i = ff_snow_horizontal_compose97i;
c->inner_add_yblock = ff_snow_inner_add_yblock;
+
+ c->mc_block_x[0] = mc_block_x0;
+ c->mc_block_x[1] = mc_block_x2;
+ c->mc_block_x[2] = mc_block_x4;
+ c->mc_block_x[3] = mc_block_x6;
+ c->mc_block_x[4] = mc_block_x8;
+ c->mc_block_x[5] = mc_block_x10;
+ c->mc_block_x[6] = mc_block_x12;
+ c->mc_block_x[7] = mc_block_x14;
+
+ c->mc_block_y[0] = mc_block_y0;
+ c->mc_block_y[1] = mc_block_y2;
+ c->mc_block_y[2] = mc_block_y4;
+ c->mc_block_y[3] = mc_block_y6;
+ c->mc_block_y[4] = mc_block_y8;
+ c->mc_block_y[5] = mc_block_y10;
+ c->mc_block_y[6] = mc_block_y12;
+ c->mc_block_y[7] = mc_block_y14;
#endif
c->prefetch= just_return;
Index: libavcodec/dsputil.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dsputil.h,v
retrieving revision 1.132
diff -u -r1.132 dsputil.h
--- libavcodec/dsputil.h 24 Mar 2006 01:33:22 -0000 1.132
+++ libavcodec/dsputil.h 24 Mar 2006 14:13:28 -0000
@@ -133,6 +133,7 @@
// allthough currently h<4 is not used as functions with width <8 are not used and neither implemented
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+typedef void (*mc_block_func)(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy);
// for snow slices
typedef struct slice_buffer_s slice_buffer;
@@ -344,6 +345,9 @@
void (*horizontal_compose97i)(DWTELEM *b, int width);
void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+ mc_block_func mc_block_x[8];
+ mc_block_func mc_block_y[8];
+
void (*prefetch)(void *mem, int stride, int h);
} DSPContext;
Index: libavcodec/snow.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/snow.c,v
retrieving revision 1.94
diff -u -r1.94 snow.c
--- libavcodec/snow.c 20 Mar 2006 05:52:23 -0000 1.94
+++ libavcodec/snow.c 24 Mar 2006 14:13:33 -0000
@@ -2294,91 +2294,6 @@
}
}
-static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
- int x, y;
-START_TIMER
- for(y=0; y < b_h+5; y++){
- for(x=0; x < b_w; x++){
- int a0= src[x ];
- int a1= src[x + 1];
- int a2= src[x + 2];
- int a3= src[x + 3];
- int a4= src[x + 4];
- int a5= src[x + 5];
-// int am= 9*(a1+a2) - (a0+a3);
- int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-// int am= 18*(a2+a3) - 2*(a1+a4);
-// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
-// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
-
-// if(b_w==16) am= 8*(a1+a2);
-
- if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
- else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
-
- /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
- if(am&(~255)) am= ~(am>>31);
-
- tmp[x] = am;
-
-/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
- else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
- else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
- else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
- }
- tmp += stride;
- src += stride;
- }
- tmp -= (b_h+5)*stride;
-
- for(y=0; y < b_h; y++){
- for(x=0; x < b_w; x++){
- int a0= tmp[x + 0*stride];
- int a1= tmp[x + 1*stride];
- int a2= tmp[x + 2*stride];
- int a3= tmp[x + 3*stride];
- int a4= tmp[x + 4*stride];
- int a5= tmp[x + 5*stride];
- int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-// int am= 18*(a2+a3) - 2*(a1+a4);
-/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
- int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
-
-// if(b_w==16) am= 8*(a1+a2);
-
- if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
- else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
-
- if(am&(~255)) am= ~(am>>31);
-
- dst[x] = am;
-/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
- else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
- else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
- else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
- }
- dst += stride;
- tmp += stride;
- }
-STOP_TIMER("mc_block")
-}
-
-#define mca(dx,dy,b_w)\
-static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
- uint8_t tmp[stride*(b_w+5)];\
- assert(h==b_w);\
- mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
-}
-
-mca( 0, 0,16)
-mca( 8, 0,16)
-mca( 0, 8,16)
-mca( 8, 8,16)
-mca( 0, 0,8)
-mca( 8, 0,8)
-mca( 0, 8,8)
-mca( 8, 8,8)
-
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
if(block->type & BLOCK_INTRA){
int x, y;
@@ -2437,9 +2352,13 @@
// assert(!(b_w&(b_w-1)));
assert(b_w>1 && b_h>1);
assert(tab_index>=0 && tab_index<4 || b_w==32);
- if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
- mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
- else if(b_w==32){
+ if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1))) {
+ START_TIMER
+ assert(!(dx&1) && !(dy&1));
+ s->dsp.mc_block_x[dx>>1](dst, src, tmp, stride, b_w, b_h, dx, dy);
+ s->dsp.mc_block_y[dy>>1](dst, src, tmp, stride, b_w, b_h, dx, dy);
+ STOP_TIMER("mc_block")
+ } else if(b_w==32){
int y;
for(y=0; y<b_h; y+=16){
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
@@ -3765,19 +3684,6 @@
mcf( 8,12)
mcf(12,12)
-#define mcfh(dx,dy)\
- s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
- s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
- mc_block_hpel ## dx ## dy ## 16;\
- s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
- s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
- mc_block_hpel ## dx ## dy ## 8;
-
- mcfh(0, 0)
- mcfh(8, 0)
- mcfh(0, 8)
- mcfh(8, 8)
-
if(!qexp[0])
init_qexp();
Index: libavcodec/i386/dsputil_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.118
diff -u -r1.118 dsputil_mmx.c
--- libavcodec/i386/dsputil_mmx.c 23 Mar 2006 20:16:36 -0000 1.118
+++ libavcodec/i386/dsputil_mmx.c 24 Mar 2006 14:13:35 -0000
@@ -2585,6 +2585,181 @@
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+
+static always_inline void mc_block_core_mmx(int dx, uint8_t * dst) {
+ asm volatile("punpcklbw %%mm6, %%mm0 \n\t"
+ "punpcklbw %%mm6, %%mm1 \n\t"
+ "punpcklbw %%mm6, %%mm2 \n\t"
+ "punpcklbw %%mm6, %%mm3 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpcklbw %%mm6, %%mm5 \n\t"
+
+ "paddsw %%mm5, %%mm0 \n\t" // am += a5;
+
+ "paddsw %%mm4, %%mm1 \n\t" // a1 += a4;
+ "movq %%mm1, %%mm4 \n\t" // a4 = a1;
+ "psllw $2, %%mm1 \n\t" // a1 *= 4;
+ "paddsw %%mm4, %%mm1 \n\t" // a1 += a4;
+ "psubsw %%mm1, %%mm0 \n\t" // am -= a1;
+
+ "movq %%mm2, %%mm4 \n\t" // a4 = a2;
+ "paddsw %%mm3, %%mm4 \n\t" // a4 += a3;
+
+ "psllw $2, %%mm4 \n\t" // a4 *= 4;
+ "paddsw %%mm4, %%mm0 \n\t" // am += a1;
+ "psllw $2, %%mm4 \n\t" // a4 *= 4;
+ "paddsw %%mm4, %%mm0 \n\t" // am += a1;
+ ::);
+ switch (dx) {
+ case 2: asm volatile("psllw $5, %%mm2 \n\t" // a2 <<= 5;
+ "movq %%mm2, %%mm3 \n\t" // a3 = a2;
+ "psllw $1, %%mm2 \n\t" // a2 <<= 1;
+ "paddsw %%mm3, %%mm0 \n\t" // am += a3;
+ "paddsw %%mm2, %%mm0 \n\t" // am += a2;
+ ::); break;
+ case 4: asm volatile("psllw $6, %%mm2 \n\t" // a2 <<= 6;
+ "psllw $1, %%mm0 \n\t" // am <<= 1;
+ "paddsw %%mm2, %%mm0 \n\t" // am += a2;
+ ::); break;
+ case 6: asm volatile("psllw $5, %%mm2 \n\t" // a2 <<= 5;
+ "movq %%mm0, %%mm3 \n\t" // a3 = am;
+ "psllw $1, %%mm0 \n\t" // am <<= 1;
+ "paddsw %%mm3, %%mm0 \n\t" // am += a3;
+ "paddsw %%mm2, %%mm0 \n\t" // am += a2;
+ ::); break;
+ case 8: asm volatile("psllw $2, %%mm0 \n\t" // am <<= 2;
+ ::); break;
+ case 10: asm volatile("psllw $5, %%mm3 \n\t" // a3 <<= 5;
+ "movq %%mm0, %%mm2 \n\t" // a2 = am;
+ "psllw $1, %%mm0 \n\t" // am <<= 1;
+ "paddsw %%mm3, %%mm0 \n\t" // am += a3;
+ "paddsw %%mm2, %%mm0 \n\t" // am += a2;
+ ::); break;
+ case 12: asm volatile("psllw $6, %%mm3 \n\t" // a3 <<= 6;
+ "psllw $1, %%mm0 \n\t" // am <<= 1;
+ "paddsw %%mm3, %%mm0 \n\t" // am += a3;
+ ::); break;
+ case 14: asm volatile("psllw $5, %%mm3 \n\t" // a3 <<= 5;
+ "movq %%mm3, %%mm2 \n\t" // a2 = a3;
+ "psllw $1, %%mm3 \n\t" // a3 <<= 1;
+ "paddsw %%mm2, %%mm0 \n\t" // am += a2;
+ "paddsw %%mm3, %%mm0 \n\t" // am += a3;
+ ::); break;
+ }
+ asm volatile("paddsw %%mm7, %%mm0 \n\t" // am += 64;
+ "psraw $7, %%mm0 \n\t" // am >>= 7;
+ "packuswb %%mm6, %%mm0 \n\t"
+ "movd %%mm0, (%0) \n\t" // tmp[x] = am;
+ ::"r"(dst));
+}
+
+static always_inline void mc_block_x_mmx(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+ int x, y;
+ assert(!(b_w&3) && !(b_h&3) && !(dx&1));
+ asm volatile("pcmpeqw %%mm7, %%mm7 \n\t"
+ "psllw $15, %%mm7 \n\t"
+ "psrlw $9, %%mm7 \n\t" // 64
+ "pxor %%mm6, %%mm6 \n\t" // 0
+ ::);
+ if (dy == 0) {
+ b_h -= 5;
+ tmp = dst;
+ src += 2*stride;
+ }
+ if (dx != 0) for (y = 0; y < b_h+5; y++) {
+ for (x = 0; x < b_w-3; x += 4) {
+ asm volatile("movd (%0), %%mm0 \n\t" // am = src[x ];
+ "movd 1(%0), %%mm1 \n\t" // a1 = src[x + 1];
+ "movd 2(%0), %%mm2 \n\t" // a2 = src[x + 2];
+ "movd 3(%0), %%mm3 \n\t" // a3 = src[x + 3];
+ "movd 4(%0), %%mm4 \n\t" // a4 = src[x + 4];
+ "movd 5(%0), %%mm5 \n\t" // a5 = src[x + 5];
+ ::"r"(&src[x]));
+
+ mc_block_core_mmx(dx, &tmp[x]);
+ }
+ for (; x < b_w; x++) {
+ int a0= src[x ];
+ int a1= src[x + 1];
+ int a2= src[x + 2];
+ int a3= src[x + 3];
+ int a4= src[x + 4];
+ int a5= src[x + 5];
+ int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+
+ if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
+ else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
+
+ if(am&(~255)) am= ~(am>>31);
+
+ tmp[x] = am;
+ }
+ tmp += stride;
+ src += stride;
+ }
+}
+
+static always_inline void mc_block_y_mmx(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+ int x, y;
+ if (dx == 0) tmp = (uint8_t*)src + 2;
+ if (dy != 0) for (y = 0; y < b_h; y++) {
+ for (x = 0; x < b_w-3; x += 4) {
+ asm volatile("movd (%0), %%mm0 \n\t" // am = tmp[x + 0*stride];
+ "movd (%1), %%mm1 \n\t" // a1 = tmp[x + 1*stride];
+ "movd (%0,%2,2), %%mm2 \n\t" // a2 = tmp[x + 2*stride];
+ "movd (%1,%2,2), %%mm3 \n\t" // a3 = tmp[x + 3*stride];
+ "movd (%0,%2,4), %%mm4 \n\t" // a4 = tmp[x + 4*stride];
+ "movd (%1,%2,4), %%mm5 \n\t" // a5 = tmp[x + 5*stride];
+ ::"r"(&tmp[x]),"r"(&tmp[x+stride]),"a"(stride));
+
+ mc_block_core_mmx(dy, &dst[x]);
+ }
+ for (; x < b_w; x++){
+ int a0= tmp[x + 0*stride];
+ int a1= tmp[x + 1*stride];
+ int a2= tmp[x + 2*stride];
+ int a3= tmp[x + 3*stride];
+ int a4= tmp[x + 4*stride];
+ int a5= tmp[x + 5*stride];
+ int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+
+ if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
+ else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
+
+ if(am&(~255)) am= ~(am>>31);
+
+ dst[x] = am;
+ }
+ dst += stride;
+ tmp += stride;
+ } else if (dx == 0) { // do nothing! memcpy
+ tmp += 2*stride;
+ for (y = 0; y < b_h; y++) {
+ memcpy(dst, tmp, b_w);
+ dst += stride;
+ tmp += stride;
+ }
+ }
+ asm volatile("emms"::);
+}
+
+#define mca(a)\
+static void mc_block_x ## a ## _mmx(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){\
+ mc_block_x_mmx(dst, src, tmp, stride, b_w, b_h, a, dy);\
+}\
+static void mc_block_y ## a ## _mmx(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){\
+ mc_block_y_mmx(dst, src, tmp, stride, b_w, b_h, dx, a);\
+}
+
+mca(0)
+mca(2)
+mca(4)
+mca(6)
+mca(8)
+mca(10)
+mca(12)
+mca(14)
+
#endif
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
@@ -2991,6 +3166,24 @@
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
+
+ c->mc_block_x[0] = mc_block_x0_mmx;
+ c->mc_block_x[1] = mc_block_x2_mmx;
+ c->mc_block_x[2] = mc_block_x4_mmx;
+ c->mc_block_x[3] = mc_block_x6_mmx;
+ c->mc_block_x[4] = mc_block_x8_mmx;
+ c->mc_block_x[5] = mc_block_x10_mmx;
+ c->mc_block_x[6] = mc_block_x12_mmx;
+ c->mc_block_x[7] = mc_block_x14_mmx;
+
+ c->mc_block_y[0] = mc_block_y0_mmx;
+ c->mc_block_y[1] = mc_block_y2_mmx;
+ c->mc_block_y[2] = mc_block_y4_mmx;
+ c->mc_block_y[3] = mc_block_y6_mmx;
+ c->mc_block_y[4] = mc_block_y8_mmx;
+ c->mc_block_y[5] = mc_block_y10_mmx;
+ c->mc_block_y[6] = mc_block_y12_mmx;
+ c->mc_block_y[7] = mc_block_y14_mmx;
#endif
}
More information about the ffmpeg-devel
mailing list