[FFmpeg-devel] [PATCH 1/2] avfilter/transpose: refactor for asm
Michael Niedermayer
michaelni at gmx.at
Thu Sep 12 20:08:38 CEST 2013
On Thu, Sep 12, 2013 at 04:52:54PM +0000, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
> libavfilter/vf_transpose.c | 124 ++++++++++++++++++++++++++++++---------------
> 1 file changed, 82 insertions(+), 42 deletions(-)
>
> diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
> index 8daeeaf..d19198c 100644
> --- a/libavfilter/vf_transpose.c
> +++ b/libavfilter/vf_transpose.c
> @@ -58,6 +58,9 @@ typedef struct {
>
> PassthroughType passthrough; ///< landscape passthrough mode enabled
> enum TransposeDir dir;
> +
> + void (*transpose_block)(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize);
> } TransContext;
>
> static int query_formats(AVFilterContext *ctx)
> @@ -79,6 +82,67 @@ static int query_formats(AVFilterContext *ctx)
> return 0;
> }
>
> +static void transpose_8_c(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize)
> +{
> + int x, y;
> + for (y = 0; y < 8; y++, dst += dst_linesize, src++)
> + for (x = 0; x < 8; x++)
> + dst[x] = src[x*src_linesize];
> +}
> +
> +static void transpose_16_c(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize)
> +{
> + int x, y;
> + for (y = 0; y < 8; y++, dst += dst_linesize, src += 2)
> + for (x = 0; x < 8; x++)
> + *((uint16_t *)(dst + 2*x)) = *((uint16_t *)(src + x*src_linesize));
> +}
> +
> +static void transpose_24_c(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize)
> +{
> + int x, y;
> + for (y = 0; y < 8; y++, dst += dst_linesize) {
> + for (x = 0; x < 8; x++) {
> + int32_t v = AV_RB24(src + x*src_linesize + y*3);
> + AV_WB24(dst + 3*x, v);
> + }
> + }
> +}
> +
> +static void transpose_32_c(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize)
> +{
> + int x, y;
> + for (y = 0; y < 8; y++, dst += dst_linesize, src += 4) {
> + for (x = 0; x < 8; x++)
> + *((uint32_t *)(dst + 4*x)) = *((uint32_t *)(src + x*src_linesize));
> + }
> +}
> +
> +static void transpose_48_c(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize)
> +{
> + int x, y;
> + for (y = 0; y < 8; y++, dst += dst_linesize, src += 6) {
> + for (x = 0; x < 8; x++) {
> + int64_t v = AV_RB48(src + x*src_linesize);
> + AV_WB48(dst + 6*x, v);
> + }
> + }
> +}
> +
> +static void transpose_64_c(uint8_t *src, int src_linesize,
> + uint8_t *dst, int dst_linesize)
> +{
> + int x, y;
> + for (y = 0; y < 8; y++, dst += dst_linesize, src += 8)
> + for (x = 0; x < 8; x++)
> + *((uint64_t *)(dst + 8*x)) = *((uint64_t *)(src + x*src_linesize));
> +}
> +
> static int config_props_output(AVFilterLink *outlink)
> {
> AVFilterContext *ctx = outlink->src;
> @@ -117,6 +181,15 @@ static int config_props_output(AVFilterLink *outlink)
> } else
> outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
>
> + switch (trans->pixsteps[0]) {
> + case 1: trans->transpose_block = transpose_8_c; break;
> + case 2: trans->transpose_block = transpose_16_c; break;
> + case 3: trans->transpose_block = transpose_24_c; break;
> + case 4: trans->transpose_block = transpose_32_c; break;
> + case 6: trans->transpose_block = transpose_48_c; break;
> + case 8: trans->transpose_block = transpose_64_c; break;
> + }
> +
> av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
> inlink->w, inlink->h, trans->dir, outlink->w, outlink->h,
> trans->dir == 1 || trans->dir == 3 ? "clockwise" : "counterclockwise",
> @@ -174,47 +247,12 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
> dstlinesize *= -1;
> }
>
> - switch (pixstep) {
> - case 1:
> - for (y = start; y < end; y++, dst += dstlinesize)
> - for (x = 0; x < outw; x++)
> - dst[x] = src[x*srclinesize + y];
> - break;
> - case 2:
> - for (y = start; y < end; y++, dst += dstlinesize) {
> - for (x = 0; x < outw; x++)
> - *((uint16_t *)(dst + 2*x)) = *((uint16_t *)(src + x*srclinesize + y*2));
> - }
> - break;
> - case 3:
> - for (y = start; y < end; y++, dst += dstlinesize) {
> - for (x = 0; x < outw; x++) {
> - int32_t v = AV_RB24(src + x*srclinesize + y*3);
> - AV_WB24(dst + 3*x, v);
> - }
> - }
> - break;
> - case 4:
> - for (y = start; y < end; y++, dst += dstlinesize) {
> - for (x = 0; x < outw; x++)
> - *((uint32_t *)(dst + 4*x)) = *((uint32_t *)(src + x*srclinesize + y*4));
> - }
> - break;
> - case 6:
> - for (y = start; y < end; y++, dst += dstlinesize) {
> - for (x = 0; x < outw; x++) {
> - int64_t v = AV_RB48(src + x*srclinesize + y*6);
> - AV_WB48(dst + 6*x, v);
> - }
> - }
> - break;
> - case 8:
> - for (y = start; y < end; y++, dst += dstlinesize) {
> - for (x = 0; x < outw; x++)
> - *((uint64_t *)(dst + 8*x)) = *((uint64_t *)(src + x*srclinesize + y*8));
> - }
> - break;
> - }
> + for (y = start; y < end; y += 8)
> + for (x = 0; x < outw; x += 8)
> + trans->transpose_block(src + x * srclinesize + y * pixstep,
> + srclinesize,
> + dst + (y - start) * dstlinesize + x * pixstep,
> + dstlinesize);
raster scan order does still look like a bad idea to me
also one call per 64 pixels also has its overhead
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
When you are offended at any man's fault, turn to yourself and study your
own failings. Then you will forget your anger. -- Epictetus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130912/1a460e32/attachment.asc>
More information about the ffmpeg-devel
mailing list