[FFmpeg-devel] [PATCH 1/2] avfilter/transpose: refactor for asm
Paul B Mahol
onemda at gmail.com
Thu Sep 12 18:52:54 CEST 2013
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavfilter/vf_transpose.c | 124 ++++++++++++++++++++++++++++++---------------
1 file changed, 82 insertions(+), 42 deletions(-)
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 8daeeaf..d19198c 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -58,6 +58,9 @@ typedef struct {
PassthroughType passthrough; ///< landscape passthrough mode enabled
enum TransposeDir dir;
+
+ void (*transpose_block)(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize);
} TransContext;
static int query_formats(AVFilterContext *ctx)
@@ -79,6 +82,67 @@ static int query_formats(AVFilterContext *ctx)
return 0;
}
+static void transpose_8_c(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize)
+{
+ int x, y;
+ for (y = 0; y < 8; y++, dst += dst_linesize, src++)
+ for (x = 0; x < 8; x++)
+ dst[x] = src[x*src_linesize];
+}
+
+static void transpose_16_c(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize)
+{
+ int x, y;
+ for (y = 0; y < 8; y++, dst += dst_linesize, src += 2)
+ for (x = 0; x < 8; x++)
+ *((uint16_t *)(dst + 2*x)) = *((uint16_t *)(src + x*src_linesize));
+}
+
+static void transpose_24_c(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize)
+{
+ int x, y;
+ for (y = 0; y < 8; y++, dst += dst_linesize) {
+ for (x = 0; x < 8; x++) {
+ int32_t v = AV_RB24(src + x*src_linesize + y*3);
+ AV_WB24(dst + 3*x, v);
+ }
+ }
+}
+
+static void transpose_32_c(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize)
+{
+ int x, y;
+ for (y = 0; y < 8; y++, dst += dst_linesize, src += 4) {
+ for (x = 0; x < 8; x++)
+ *((uint32_t *)(dst + 4*x)) = *((uint32_t *)(src + x*src_linesize));
+ }
+}
+
+static void transpose_48_c(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize)
+{
+ int x, y;
+ for (y = 0; y < 8; y++, dst += dst_linesize, src += 6) {
+ for (x = 0; x < 8; x++) {
+ int64_t v = AV_RB48(src + x*src_linesize);
+ AV_WB48(dst + 6*x, v);
+ }
+ }
+}
+
+static void transpose_64_c(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize)
+{
+ int x, y;
+ for (y = 0; y < 8; y++, dst += dst_linesize, src += 8)
+ for (x = 0; x < 8; x++)
+ *((uint64_t *)(dst + 8*x)) = *((uint64_t *)(src + x*src_linesize));
+}
+
static int config_props_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
@@ -117,6 +181,15 @@ static int config_props_output(AVFilterLink *outlink)
} else
outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+ switch (trans->pixsteps[0]) {
+ case 1: trans->transpose_block = transpose_8_c; break;
+ case 2: trans->transpose_block = transpose_16_c; break;
+ case 3: trans->transpose_block = transpose_24_c; break;
+ case 4: trans->transpose_block = transpose_32_c; break;
+ case 6: trans->transpose_block = transpose_48_c; break;
+ case 8: trans->transpose_block = transpose_64_c; break;
+ }
+
av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
inlink->w, inlink->h, trans->dir, outlink->w, outlink->h,
trans->dir == 1 || trans->dir == 3 ? "clockwise" : "counterclockwise",
@@ -174,47 +247,12 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
dstlinesize *= -1;
}
- switch (pixstep) {
- case 1:
- for (y = start; y < end; y++, dst += dstlinesize)
- for (x = 0; x < outw; x++)
- dst[x] = src[x*srclinesize + y];
- break;
- case 2:
- for (y = start; y < end; y++, dst += dstlinesize) {
- for (x = 0; x < outw; x++)
- *((uint16_t *)(dst + 2*x)) = *((uint16_t *)(src + x*srclinesize + y*2));
- }
- break;
- case 3:
- for (y = start; y < end; y++, dst += dstlinesize) {
- for (x = 0; x < outw; x++) {
- int32_t v = AV_RB24(src + x*srclinesize + y*3);
- AV_WB24(dst + 3*x, v);
- }
- }
- break;
- case 4:
- for (y = start; y < end; y++, dst += dstlinesize) {
- for (x = 0; x < outw; x++)
- *((uint32_t *)(dst + 4*x)) = *((uint32_t *)(src + x*srclinesize + y*4));
- }
- break;
- case 6:
- for (y = start; y < end; y++, dst += dstlinesize) {
- for (x = 0; x < outw; x++) {
- int64_t v = AV_RB48(src + x*srclinesize + y*6);
- AV_WB48(dst + 6*x, v);
- }
- }
- break;
- case 8:
- for (y = start; y < end; y++, dst += dstlinesize) {
- for (x = 0; x < outw; x++)
- *((uint64_t *)(dst + 8*x)) = *((uint64_t *)(src + x*srclinesize + y*8));
- }
- break;
- }
+ for (y = start; y < end; y += 8)
+ for (x = 0; x < outw; x += 8)
+ trans->transpose_block(src + x * srclinesize + y * pixstep,
+ srclinesize,
+ dst + (y - start) * dstlinesize + x * pixstep,
+ dstlinesize);
}
return 0;
@@ -231,12 +269,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
if (trans->passthrough)
return ff_filter_frame(outlink, in);
- out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ out = ff_get_video_buffer(outlink, FFALIGN(outlink->w, 8), FFALIGN(outlink->h, 8));
if (!out) {
av_frame_free(&in);
return AVERROR(ENOMEM);
}
av_frame_copy_props(out, in);
+ out->height = outlink->h;
+ out->width = outlink->w;
if (in->sample_aspect_ratio.num == 0) {
out->sample_aspect_ratio = in->sample_aspect_ratio;
--
1.7.11.2
More information about the ffmpeg-devel
mailing list