[FFmpeg-devel] [PATCH v2 1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs
mindmark at gmail.com
mindmark at gmail.com
Sun Nov 14 04:56:52 EET 2021
From: Mark Reid <mindmark at gmail.com>
This is ment to be a cosmetic change
old timings:
42780 UNITS in grayf32le, 1 runs, 0 skips
56720 UNITS in grayf32le, 2 runs, 0 skips
67265 UNITS in grayf32le, 4 runs, 0 skips
58082 UNITS in grayf32le, 8 runs, 0 skips
63512 UNITS in grayf32le, 16 runs, 0 skips
52720 UNITS in grayf32le, 32 runs, 0 skips
46491 UNITS in grayf32le, 64 runs, 0 skips
68500 UNITS in grayf32be, 1 runs, 0 skips
66930 UNITS in grayf32be, 2 runs, 0 skips
62305 UNITS in grayf32be, 4 runs, 0 skips
55510 UNITS in grayf32be, 8 runs, 0 skips
50216 UNITS in grayf32be, 16 runs, 0 skips
44480 UNITS in grayf32be, 32 runs, 0 skips
42394 UNITS in grayf32be, 64 runs, 0 skips
new timings:
46660 UNITS in grayf32le, 1 runs, 0 skips
51830 UNITS in grayf32le, 2 runs, 0 skips
53390 UNITS in grayf32le, 4 runs, 0 skips
50910 UNITS in grayf32le, 8 runs, 0 skips
44968 UNITS in grayf32le, 16 runs, 0 skips
40349 UNITS in grayf32le, 32 runs, 0 skips
38330 UNITS in grayf32le, 64 runs, 0 skips
39980 UNITS in grayf32be, 1 runs, 0 skips
49630 UNITS in grayf32be, 2 runs, 0 skips
53540 UNITS in grayf32be, 4 runs, 0 skips
59767 UNITS in grayf32be, 8 runs, 0 skips
51206 UNITS in grayf32be, 16 runs, 0 skips
44743 UNITS in grayf32be, 32 runs, 0 skips
41468 UNITS in grayf32be, 64 runs, 0 skips
---
libswscale/input.c | 36 +++++++++++-------------------------
1 file changed, 11 insertions(+), 25 deletions(-)
diff --git a/libswscale/input.c b/libswscale/input.c
index 336f957c8c..90efdd2ffc 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1013,31 +1013,19 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s
}
}
-#undef rdpx
-
static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
- const uint8_t *unused2, int width, uint32_t *unused)
+ const uint8_t *unused2, int width, int is_be, uint32_t *unused)
{
int i;
const float *src = (const float *)_src;
uint16_t *dst = (uint16_t *)_dst;
for (i = 0; i < width; ++i){
- dst[i] = av_clip_uint16(lrintf(65535.0f * src[i]));
+ dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
}
}
-static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
- const uint8_t *unused2, int width, uint32_t *unused)
-{
- int i;
- const uint32_t *src = (const uint32_t *)_src;
- uint16_t *dst = (uint16_t *)_dst;
-
- for (i = 0; i < width; ++i){
- dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i]))));
- }
-}
+#undef rdpx
#define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \
static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \
@@ -1092,6 +1080,12 @@ static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4
int w, int32_t *rgb2yuv) \
{ \
planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \
+} \
+static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, \
+ const uint8_t *unused1, const uint8_t *unused2, \
+ int width, uint32_t *unused) \
+{ \
+ grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); \
}
rgbf32_planar_funcs_endian(le, 0)
@@ -1699,18 +1693,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
c->lumToYV12 = p010BEToY_c;
break;
case AV_PIX_FMT_GRAYF32LE:
-#if HAVE_BIGENDIAN
- c->lumToYV12 = grayf32ToY16_bswap_c;
-#else
- c->lumToYV12 = grayf32ToY16_c;
-#endif
+ c->lumToYV12 = grayf32leToY16_c;
break;
case AV_PIX_FMT_GRAYF32BE:
-#if HAVE_BIGENDIAN
- c->lumToYV12 = grayf32ToY16_c;
-#else
- c->lumToYV12 = grayf32ToY16_bswap_c;
-#endif
+ c->lumToYV12 = grayf32beToY16_c;
break;
case AV_PIX_FMT_Y210LE:
c->lumToYV12 = y210le_Y_c;
--
2.31.1.windows.1
More information about the ffmpeg-devel
mailing list