[FFmpeg-devel] [PATCH 1/2] avfilter/transpose: refactor for asm
    Paul B Mahol 
    onemda at gmail.com
       
    Thu Sep 12 18:52:54 CEST 2013
    
    
  
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
 libavfilter/vf_transpose.c | 124 ++++++++++++++++++++++++++++++---------------
 1 file changed, 82 insertions(+), 42 deletions(-)
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 8daeeaf..d19198c 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -58,6 +58,9 @@ typedef struct {
 
     PassthroughType passthrough; ///< landscape passthrough mode enabled
     enum TransposeDir dir;
+
+    void (*transpose_block)(uint8_t *src, int src_linesize,
+                            uint8_t *dst, int dst_linesize);
 } TransContext;
 
 static int query_formats(AVFilterContext *ctx)
@@ -79,6 +82,67 @@ static int query_formats(AVFilterContext *ctx)
     return 0;
 }
 
+static void transpose_8_c(uint8_t *src, int src_linesize,
+                          uint8_t *dst, int dst_linesize)
+{
+    int x, y;
+    for (y = 0; y < 8; y++, dst += dst_linesize, src++)
+        for (x = 0; x < 8; x++)
+            dst[x] = src[x*src_linesize];
+}
+
+static void transpose_16_c(uint8_t *src, int src_linesize,
+                           uint8_t *dst, int dst_linesize)
+{
+    int x, y;
+    for (y = 0; y < 8; y++, dst += dst_linesize, src += 2)
+        for (x = 0; x < 8; x++)
+            *((uint16_t *)(dst + 2*x)) = *((uint16_t *)(src + x*src_linesize));
+}
+
+static void transpose_24_c(uint8_t *src, int src_linesize,
+                           uint8_t *dst, int dst_linesize)
+{
+    int x, y;
+    for (y = 0; y < 8; y++, dst += dst_linesize) {
+        for (x = 0; x < 8; x++) {
+            int32_t v = AV_RB24(src + x*src_linesize + y*3);
+            AV_WB24(dst + 3*x, v);
+        }
+    }
+}
+
+static void transpose_32_c(uint8_t *src, int src_linesize,
+                           uint8_t *dst, int dst_linesize)
+{
+    int x, y;
+    for (y = 0; y < 8; y++, dst += dst_linesize, src += 4) {
+        for (x = 0; x < 8; x++)
+            *((uint32_t *)(dst + 4*x)) = *((uint32_t *)(src + x*src_linesize));
+    }
+}
+
+static void transpose_48_c(uint8_t *src, int src_linesize,
+                           uint8_t *dst, int dst_linesize)
+{
+    int x, y;
+    for (y = 0; y < 8; y++, dst += dst_linesize, src += 6) {
+        for (x = 0; x < 8; x++) {
+            int64_t v = AV_RB48(src + x*src_linesize);
+            AV_WB48(dst + 6*x, v);
+        }
+    }
+}
+
+static void transpose_64_c(uint8_t *src, int src_linesize,
+                           uint8_t *dst, int dst_linesize)
+{
+    int x, y;
+    for (y = 0; y < 8; y++, dst += dst_linesize, src += 8)
+        for (x = 0; x < 8; x++)
+            *((uint64_t *)(dst + 8*x)) = *((uint64_t *)(src + x*src_linesize));
+}
+
 static int config_props_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -117,6 +181,15 @@ static int config_props_output(AVFilterLink *outlink)
     } else
         outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 
+    switch (trans->pixsteps[0]) {
+    case 1: trans->transpose_block = transpose_8_c;  break;
+    case 2: trans->transpose_block = transpose_16_c; break;
+    case 3: trans->transpose_block = transpose_24_c; break;
+    case 4: trans->transpose_block = transpose_32_c; break;
+    case 6: trans->transpose_block = transpose_48_c; break;
+    case 8: trans->transpose_block = transpose_64_c; break;
+    }
+
     av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
            inlink->w, inlink->h, trans->dir, outlink->w, outlink->h,
            trans->dir == 1 || trans->dir == 3 ? "clockwise" : "counterclockwise",
@@ -174,47 +247,12 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
             dstlinesize *= -1;
         }
 
-        switch (pixstep) {
-        case 1:
-            for (y = start; y < end; y++, dst += dstlinesize)
-                for (x = 0; x < outw; x++)
-                    dst[x] = src[x*srclinesize + y];
-            break;
-        case 2:
-            for (y = start; y < end; y++, dst += dstlinesize) {
-                for (x = 0; x < outw; x++)
-                    *((uint16_t *)(dst + 2*x)) = *((uint16_t *)(src + x*srclinesize + y*2));
-            }
-            break;
-        case 3:
-            for (y = start; y < end; y++, dst += dstlinesize) {
-                for (x = 0; x < outw; x++) {
-                    int32_t v = AV_RB24(src + x*srclinesize + y*3);
-                    AV_WB24(dst + 3*x, v);
-                }
-            }
-            break;
-        case 4:
-            for (y = start; y < end; y++, dst += dstlinesize) {
-                for (x = 0; x < outw; x++)
-                    *((uint32_t *)(dst + 4*x)) = *((uint32_t *)(src + x*srclinesize + y*4));
-            }
-            break;
-        case 6:
-            for (y = start; y < end; y++, dst += dstlinesize) {
-                for (x = 0; x < outw; x++) {
-                    int64_t v = AV_RB48(src + x*srclinesize + y*6);
-                    AV_WB48(dst + 6*x, v);
-                }
-            }
-            break;
-        case 8:
-            for (y = start; y < end; y++, dst += dstlinesize) {
-                for (x = 0; x < outw; x++)
-                    *((uint64_t *)(dst + 8*x)) = *((uint64_t *)(src + x*srclinesize + y*8));
-            }
-            break;
-        }
+        for (y = start; y < end; y += 8)
+            for (x = 0; x < outw; x += 8)
+                trans->transpose_block(src + x * srclinesize + y * pixstep,
+                                       srclinesize,
+                                       dst + (y - start) * dstlinesize + x * pixstep,
+                                       dstlinesize);
     }
 
     return 0;
@@ -231,12 +269,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     if (trans->passthrough)
         return ff_filter_frame(outlink, in);
 
-    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    out = ff_get_video_buffer(outlink, FFALIGN(outlink->w, 8), FFALIGN(outlink->h, 8));
     if (!out) {
         av_frame_free(&in);
         return AVERROR(ENOMEM);
     }
     av_frame_copy_props(out, in);
+    out->height = outlink->h;
+    out->width  = outlink->w;
 
     if (in->sample_aspect_ratio.num == 0) {
         out->sample_aspect_ratio = in->sample_aspect_ratio;
-- 
1.7.11.2
    
    
More information about the ffmpeg-devel
mailing list