[FFmpeg-devel] [PATCH] swscale: add unscaled copy from yuv420p10 to p010

Oliver Collyer ovcollyer at mac.com
Thu Sep 1 21:52:17 EEST 2016


Just sticking my head above the parapet, but shouldn’t things like...

> +            for (x = 0; x < c->srcW / 2; x++) {
> +                dstUV[x*2  ] = src[1][x] << 6;
> +                dstUV[x*2+1] = src[2][x] << 6;
> +            }

…be more efficiently written as...

uint16_t* tdstUV = dstUV;
uint16_t* tsrc1 = src[1];
uint16_t* tsrc2 = src[2];
for (x = c->srcW / 2; x > 0; x--) {
    *tdstUV++ = *tsrc1++ << 6;
    *tdstUV++ = *tsrc2++ << 6;
}

…or is that really old-school and a modern compiler does all that when optimising?

Or is readability considered more important than marginal gains in performance?

Oliver (time travelling from the 1980s)

> On 1 Sep 2016, at 20:49, Timo Rothenpieler <timo at rothenpieler.org> wrote:
> 
> ---
> libswscale/swscale_unscaled.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
> 
> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
> index b231abe..f47e1f4 100644
> --- a/libswscale/swscale_unscaled.c
> +++ b/libswscale/swscale_unscaled.c
> @@ -197,6 +197,43 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
>     return srcSliceH;
> }
> 
> +static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
> +                               int srcStride[], int srcSliceY,
> +                               int srcSliceH, uint8_t *dstParam8[],
> +                               int dstStride[])
> +{
> +    uint16_t *src[] = {
> +        (uint16_t*)(src8[0] + srcStride[0] * srcSliceY),
> +        (uint16_t*)(src8[1] + srcStride[1] * srcSliceY),
> +        (uint16_t*)(src8[2] + srcStride[2] * srcSliceY)
> +    };
> +    uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
> +    uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
> +    int x, y;
> +
> +    av_assert0(!(srcStride[0] % 2 || srcStride[1] % 2 || srcStride[2] % 2 ||
> +                 dstStride[0] % 2 || dstStride[1] % 2));
> +
> +    for (y = srcSliceY; y < srcSliceY + srcSliceH; y++) {
> +        if (!(y & 1)) {
> +            for (x = 0; x < c->srcW / 2; x++) {
> +                dstUV[x*2  ] = src[1][x] << 6;
> +                dstUV[x*2+1] = src[2][x] << 6;
> +            }
> +            src[1] += srcStride[1] / 2;
> +            src[2] += srcStride[2] / 2;
> +            dstUV += dstStride[1] / 2;
> +        }
> +        for (x = 0; x < c->srcW; x++) {
> +            dstY[x] = src[0][x] << 6;
> +        }
> +        src[0] += srcStride[0] / 2;
> +        dstY += dstStride[0] / 2;
> +    }
> +
> +    return srcSliceH;
> +}
> +
> static int planarToYuy2Wrapper(SwsContext *c, const uint8_t *src[],
>                                int srcStride[], int srcSliceY, int srcSliceH,
>                                uint8_t *dstParam[], int dstStride[])
> @@ -1600,6 +1637,11 @@ void ff_get_unscaled_swscale(SwsContext *c)
>         !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
>         c->swscale = ff_yuv2rgb_get_func_ptr(c);
>     }
> +    /* yuv420p10le_to_p010le */
> +    if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUVA420P10) &&
> +        dstFormat == AV_PIX_FMT_P010) {
> +        c->swscale = planarToP010Wrapper;
> +    }
> 
>     if (srcFormat == AV_PIX_FMT_YUV410P && !(dstH & 3) &&
>         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
> -- 
> 2.9.2
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel



More information about the ffmpeg-devel mailing list