[FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Thu Sep 8 05:38:11 EEST 2022
Up until now, libswscale/input.c used a macro to read
an input pixel which involved a call to av_pix_fmt_desc_get()
to find out whether the input pixel format is BE or LE
despite this being known at compile-time (there are templates
per pixfmt). Even worse, these calls are made in a loop,
so that e.g. there are six calls to av_pix_fmt_desc_get()
for every pair of UV pixel processed in
rgb64ToUV_half_c_template().
This commit modifies these macros to ensure that isBE()
is evaluated at compile-time. This saved 9743B of .text
for me (GCC 11.2, -O3).
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
---
output.c suffers from the same issue (and according to coverage
is responsible for quite a large chunk of the calls to
av_pix_fmt_desc_get() (144699832 of 321342234)), so I will
be looking into this tomorrow.
libswscale/input.c | 96 +++++++++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 44 deletions(-)
diff --git a/libswscale/input.c b/libswscale/input.c
index 88e318e664..4144ff81a9 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -28,14 +28,19 @@
#include "config.h"
#include "swscale_internal.h"
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+#define input_pixel(pos) (is_be ? AV_RB16(pos) : AV_RL16(pos))
+
+#define IS_BE_LE 0
+#define IS_BE_BE 1
+#define IS_BE_ 0
+#define IS_BE(BE_LE) IS_BE_ ## BE_LE
#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b)
#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r)
static av_always_inline void
rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
- enum AVPixelFormat origin, int32_t *rgb2yuv)
+ enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
{
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
int i;
@@ -51,7 +56,7 @@ rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
static av_always_inline void
rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
const uint16_t *src1, const uint16_t *src2,
- int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+ int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
{
int i;
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -70,7 +75,7 @@ rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
static av_always_inline void
rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
const uint16_t *src1, const uint16_t *src2,
- int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+ int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
{
int i;
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -92,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src,
{ \
const uint16_t *src = (const uint16_t *) _src; \
uint16_t *dst = (uint16_t *) _dst; \
- rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
+ rgb64ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE)); \
} \
\
static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
@@ -102,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
const uint16_t *src1 = (const uint16_t *) _src1, \
*src2 = (const uint16_t *) _src2; \
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
- rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+ rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
} \
\
static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
@@ -112,7 +117,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV
const uint16_t *src1 = (const uint16_t *) _src1, \
*src2 = (const uint16_t *) _src2; \
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
- rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+ rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
}
rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
@@ -123,7 +128,7 @@ rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE)
static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
const uint16_t *src, int width,
enum AVPixelFormat origin,
- int32_t *rgb2yuv)
+ int32_t *rgb2yuv, int is_be)
{
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
int i;
@@ -142,7 +147,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
const uint16_t *src2,
int width,
enum AVPixelFormat origin,
- int32_t *rgb2yuv)
+ int32_t *rgb2yuv, int is_be)
{
int i;
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -164,7 +169,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
const uint16_t *src2,
int width,
enum AVPixelFormat origin,
- int32_t *rgb2yuv)
+ int32_t *rgb2yuv, int is_be)
{
int i;
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -197,7 +202,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
{ \
const uint16_t *src = (const uint16_t *)_src; \
uint16_t *dst = (uint16_t *)_dst; \
- rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \
+ rgb48ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE));\
} \
\
static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
@@ -213,7 +218,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
*src2 = (const uint16_t *)_src2; \
uint16_t *dstU = (uint16_t *)_dstU, \
*dstV = (uint16_t *)_dstV; \
- rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+ rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
} \
\
static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
@@ -229,7 +234,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
*src2 = (const uint16_t *)_src2; \
uint16_t *dstU = (uint16_t *)_dstU, \
*dstV = (uint16_t *)_dstV; \
- rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+ rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
}
rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE)
@@ -245,7 +250,7 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
: ((origin == AV_PIX_FMT_X2RGB10LE || \
origin == AV_PIX_FMT_X2BGR10LE) \
? AV_RL32(&src[(i) * 4]) \
- : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \
+ : (is_be ? AV_RB16(&src[(i) * 2]) \
: AV_RL16(&src[(i) * 2]))))
static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
@@ -257,7 +262,7 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
int maskr, int maskg,
int maskb, int rsh,
int gsh, int bsh, int S,
- int32_t *rgb2yuv)
+ int32_t *rgb2yuv, int is_be)
{
const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh;
const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
@@ -283,7 +288,7 @@ static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
int maskr, int maskg,
int maskb, int rsh,
int gsh, int bsh, int S,
- int32_t *rgb2yuv)
+ int32_t *rgb2yuv, int is_be)
{
const int ru = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
rv = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh);
@@ -311,7 +316,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
int maskr, int maskg,
int maskb, int rsh,
int gsh, int bsh, int S,
- int32_t *rgb2yuv)
+ int32_t *rgb2yuv, int is_be)
{
const int ru = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
rv = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh),
@@ -345,13 +350,13 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
#undef input_pixel
-#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
- maskg, maskb, rsh, gsh, bsh, S) \
+#define rgb16_32_wrapper_0(fmt, name, shr, shg, shb, shp, maskr, \
+ maskg, maskb, rsh, gsh, bsh, S, is_be) \
static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
int width, uint32_t *tab, void *opq) \
{ \
rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \
- maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \
+ maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
} \
\
static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
@@ -360,7 +365,7 @@ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
{ \
rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
shr, shg, shb, shp, \
- maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\
+ maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
} \
\
static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
@@ -371,27 +376,32 @@ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
shr, shg, shb, shp, \
maskr, maskg, maskb, \
- rsh, gsh, bsh, S, tab); \
-}
-
-rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_X2RGB10LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
-rgb16_32_wrapper(AV_PIX_FMT_X2BGR10LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
+ rsh, gsh, bsh, S, tab, is_be); \
+}
+
+#define rgb16_32_wrapper(base_fmt, endianness, name, shr, shg, shb, shp, maskr, \
+ maskg, maskb, rsh, gsh, bsh, S) \
+ rgb16_32_wrapper_0(base_fmt ## endianness, name, shr, shg, shb, shp, maskr, \
+ maskg, maskb, rsh, gsh, bsh, S, IS_BE(endianness))
+
+rgb16_32_wrapper(AV_PIX_FMT_BGR32, , bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, , bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB32, , rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, , rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR565, LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR555, LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_BGR444, LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_RGB565, LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB555, LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_RGB444, LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_BGR565, BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR555, BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_BGR444, BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_RGB565, BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB555, BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_RGB444, BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_X2RGB10, LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
+rgb16_32_wrapper(AV_PIX_FMT_X2BGR10, LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
@@ -832,8 +842,6 @@ p01x_wrapper(10, 6)
p01x_wrapper(12, 4)
p01x_uv_wrapper(16, 0)
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-
static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
int width, uint32_t *rgb2yuv, void *opq)
{
--
2.34.1
More information about the ffmpeg-devel
mailing list