[FFmpeg-devel] [PATCH v1 4/6] swscale: RGB24->YUV allow odd widths & improve C rounding
John Cox
jc at kynesim.co.uk
Sun Aug 20 18:10:20 EEST 2023
Allow odd widths for conversion it costs very little and simplifies
setup slightly. x86 asm will fall back to the C code if width is odd.
Round to nearest rather than just down. This reduces the Y error
reported by tests/swscale from 3 to 1. x86 asm doesn't mirror the C so
exact correspondence isn't an issue there.
Signed-off-by: John Cox <jc at kynesim.co.uk>
---
libswscale/rgb2rgb_template.c | 42 ++++++++++++++++++-------------
libswscale/swscale_unscaled.c | 5 ++--
libswscale/x86/rgb2rgb_template.c | 5 ++++
3 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index e57bfa6545..5503e58a29 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -656,6 +656,8 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
int y;
const int chromWidth = width >> 1;
+ const int32_t ky = ((16 << 1) + 1) << (RGB2YUV_SHIFT - 1);
+ const int32_t kc = ((128 << 1) + 1) << (RGB2YUV_SHIFT - 1);
for (y = 0; y < height; y += 2) {
int i;
@@ -664,9 +666,9 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
unsigned int g = src[6 * i + 1];
unsigned int r = src[6 * i + 2];
- unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
- unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
- unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+ unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT;
+ unsigned int V = (rv * r + gv * g + bv * b + kc) >> RGB2YUV_SHIFT;
+ unsigned int U = (ru * r + gu * g + bu * b + kc) >> RGB2YUV_SHIFT;
udst[i] = U;
vdst[i] = V;
@@ -676,30 +678,36 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
g = src[6 * i + 4];
r = src[6 * i + 5];
- Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+ Y = ((ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT);
ydst[2 * i + 1] = Y;
}
- ydst += lumStride;
- src += srcStride;
-
- if (y+1 == height)
- break;
-
- for (i = 0; i < chromWidth; i++) {
+ if ((width & 1) != 0) {
unsigned int b = src[6 * i + 0];
unsigned int g = src[6 * i + 1];
unsigned int r = src[6 * i + 2];
- unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+ unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT;
+ unsigned int V = (rv * r + gv * g + bv * b + kc) >> RGB2YUV_SHIFT;
+ unsigned int U = (ru * r + gu * g + bu * b + kc) >> RGB2YUV_SHIFT;
+ udst[i] = U;
+ vdst[i] = V;
ydst[2 * i] = Y;
+ }
+ ydst += lumStride;
+ src += srcStride;
- b = src[6 * i + 3];
- g = src[6 * i + 4];
- r = src[6 * i + 5];
+ if (y+1 == height)
+ break;
- Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
- ydst[2 * i + 1] = Y;
+ for (i = 0; i < width; i++) {
+ unsigned int b = src[3 * i + 0];
+ unsigned int g = src[3 * i + 1];
+ unsigned int r = src[3 * i + 2];
+
+ unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT;
+
+ ydst[i] = Y;
}
udst += chromStride;
vdst += chromStride;
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 751bdcb2e4..e10f967755 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1994,7 +1994,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
const enum AVPixelFormat dstFormat = c->dstFormat;
const int flags = c->flags;
const int dstH = c->dstH;
- const int dstW = c->dstW;
int needsDither;
needsDither = isAnyRGB(dstFormat) &&
@@ -2052,12 +2051,12 @@ void ff_get_unscaled_swscale(SwsContext *c)
/* bgr24toYV12 */
if (srcFormat == AV_PIX_FMT_BGR24 &&
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
- !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1))
+ !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
c->convert_unscaled = bgr24ToYv12Wrapper;
/* rgb24toYV12 */
if (srcFormat == AV_PIX_FMT_RGB24 &&
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
- !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1))
+ !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
c->convert_unscaled = rgb24ToYv12Wrapper;
/* RGB/BGR -> RGB/BGR (no dither needed forms) */
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index dc2b4e205a..f90527aa08 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -1555,6 +1555,11 @@ static inline void RENAME(bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
int y;
const x86_reg chromWidth= width>>1;
+ if ((width & 1) != 0) {
+ ff_bgr24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv);
+ return;
+ }
+
if (height > 2) {
ff_bgr24toyv12_c(src, ydst, udst, vdst, width, 2, lumStride, chromStride, srcStride, rgb2yuv);
src += 2*srcStride;
--
2.39.2
More information about the ffmpeg-devel
mailing list