[FFmpeg-devel] [PATCH 06/21] swscale/output: add AYUV output support

Sat Oct 12 01:54:48 EEST 2024

On 10/11/2024 7:46 PM, Michael Niedermayer wrote:
> On Tue, Oct 08, 2024 at 07:50:11PM -0300, James Almer wrote:
>> Signed-off-by: James Almer <jamrial at gmail.com>
>> ---
>>   libswscale/output.c                      | 323 ++++++++++++-----------
>>   libswscale/utils.c                       |   2 +-
>>   tests/ref/fate/filter-pixdesc-ayuv       |   1 +
>>   tests/ref/fate/filter-pixfmts-copy       |   1 +
>>   tests/ref/fate/filter-pixfmts-crop       |   1 +
>>   tests/ref/fate/filter-pixfmts-field      |   1 +
>>   tests/ref/fate/filter-pixfmts-fieldorder |   1 +
>>   tests/ref/fate/filter-pixfmts-hflip      |   1 +
>>   tests/ref/fate/filter-pixfmts-il         |   1 +
>>   tests/ref/fate/filter-pixfmts-null       |   1 +
>>   tests/ref/fate/filter-pixfmts-pad        |   1 +
>>   tests/ref/fate/filter-pixfmts-scale      |   1 +
>>   tests/ref/fate/filter-pixfmts-transpose  |   1 +
>>   tests/ref/fate/filter-pixfmts-vflip      |   1 +
>>   14 files changed, 183 insertions(+), 154 deletions(-)
>>   create mode 100644 tests/ref/fate/filter-pixdesc-ayuv
>>
>> diff --git a/libswscale/output.c b/libswscale/output.c
>> index c9dfd6f60a..328b108089 100644
>> --- a/libswscale/output.c
>> +++ b/libswscale/output.c
>> @@ -2668,165 +2668,177 @@ yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter,
>>       }
>>   }
>>   
>> -static void
>> -yuv2vuyX_1_c(SwsContext *c, const int16_t *buf0,
>> -             const int16_t *ubuf[2], const int16_t *vbuf[2],
>> -             const int16_t *abuf0, uint8_t *dest, int dstW,
>> -             int uvalpha, int y)
>> -{
>> -    int hasAlpha = !!abuf0;
>> -    int i;
>> -
>> -    if (uvalpha < 2048) {
>> -        for (i = 0; i < dstW; i++) {
>> -            int Y = (buf0[i] + 64) >> 7;
>> -            int U = (ubuf[0][i] + 64) >> 7;
>> -            int V = (vbuf[0][i] + 64) >> 7;
>> -            int A = 255;
>> -
>> -            if (Y & 0x100)
>> -                Y = av_clip_uint8(Y);
>> -            if (U & 0x100)
>> -                U = av_clip_uint8(U);
>> -            if (V & 0x100)
>> -                V = av_clip_uint8(V);
>> -
>> -            if (hasAlpha) {
>> -                A = (abuf0[i] + 64) >> 7;
>> -                if (A & 0x100)
>> -                    A = av_clip_uint8(A);
>> -            }
>> -
>> -            dest[4 * i    ] = V;
>> -            dest[4 * i + 1] = U;
>> -            dest[4 * i + 2] = Y;
>> -            dest[4 * i + 3] = A;
>> -        }
>> -    } else {
>> -        for (i = 0; i < dstW; i++) {
>> -            int Y = (buf0[i] + 64) >> 7;
>> -            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;
>> -            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;
>> -            int A = 255;
>> -
>> -            if (Y & 0x100)
>> -                Y = av_clip_uint8(Y);
>> -            if (U & 0x100)
>> -                U = av_clip_uint8(U);
>> -            if (V & 0x100)
>> -                V = av_clip_uint8(V);
>> -
>> -            if (hasAlpha) {
>> -                A = (abuf0[i] + 64) >> 7;
>> -                if (A & 0x100)
>> -                    A = av_clip_uint8(A);
>> -            }
>> -
>> -            dest[4 * i    ] = V;
>> -            dest[4 * i + 1] = U;
>> -            dest[4 * i + 2] = Y;
>> -            dest[4 * i + 3] = A;
>> -        }
>> -    }
>> +#define AYUV_1_WRAPPER(fmt, C0, C1, C2, C3)                        \
>> +static void                                                        \
>> +yuv2 ## fmt ##_1_c(SwsContext *c, const int16_t *buf0,             \
>> +                   const int16_t *ubuf[2], const int16_t *vbuf[2], \
>> +                   const int16_t *abuf0, uint8_t *dest, int dstW,  \
>> +                   int uvalpha, int y)                             \
>> +{                                                                  \
>> +    int hasAlpha = !!abuf0;                                        \
>> +    int i;                                                         \
>> +                                                                   \
>> +    if (uvalpha < 2048) {                                          \
>> +        for (i = 0; i < dstW; i++) {                               \
>> +            int Y = (buf0[i] + 64) >> 7;                           \
>> +            int U = (ubuf[0][i] + 64) >> 7;                        \
>> +            int V = (vbuf[0][i] + 64) >> 7;                        \
>> +            int A = 255;                                           \
>> +                                                                   \
>> +            if (Y & 0x100)                                         \
>> +                Y = av_clip_uint8(Y);                              \
>> +            if (U & 0x100)                                         \
>> +                U = av_clip_uint8(U);                              \
>> +            if (V & 0x100)                                         \
>> +                V = av_clip_uint8(V);                              \
>> +                                                                   \
>> +            if (hasAlpha) {                                        \
>> +                A = (abuf0[i] + 64) >> 7;                          \
>> +                if (A & 0x100)                                     \
>> +                    A = av_clip_uint8(A);                          \
>> +            }                                                      \
>> +                                                                   \
>> +            dest[4 * i    ] = (C0);                                \
>> +            dest[4 * i + 1] = (C1);                                \
>> +            dest[4 * i + 2] = (C2);                                \
>> +            dest[4 * i + 3] = (C3);                                \
>> +        }                                                          \
>> +    } else {                                                       \
>> +        for (i = 0; i < dstW; i++) {                               \
>> +            int Y = (buf0[i] + 64) >> 7;                           \
>> +            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;          \
>> +            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;          \
>> +            int A = 255;                                           \
>> +                                                                   \
>> +            if (Y & 0x100)                                         \
>> +                Y = av_clip_uint8(Y);                              \
>> +            if (U & 0x100)                                         \
>> +                U = av_clip_uint8(U);                              \
>> +            if (V & 0x100)                                         \
>> +                V = av_clip_uint8(V);                              \
>> +                                                                   \
>> +            if (hasAlpha) {                                        \
>> +                A = (abuf0[i] + 64) >> 7;                          \
>> +                if (A & 0x100)                                     \
>> +                    A = av_clip_uint8(A);                          \
>> +            }                                                      \
>> +                                                                   \
>> +            dest[4 * i    ] = (C0);                                \
>> +            dest[4 * i + 1] = (C1);                                \
>> +            dest[4 * i + 2] = (C2);                                \
>> +            dest[4 * i + 3] = (C3);                                \
>> +        }                                                          \
>> +    }                                                              \
>>   }
> 
> Is there an advantage in using huge multiline macros here ?
> 
> This is ugly and hard to maintain code. Simply writing a always inline function
> and trusting that the compiler will inline it should result in more normal
> C code and the same result
> 
> (is it faster ? or has some other advanatge ?)

No, just figured doing it like this. I can make it an always inline 
function.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 495 bytes
Desc: OpenPGP digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20241011/0924f2d2/attachment.sig>