[FFmpeg-devel] [PATCH] lavfi/lut: use FF_CEIL_RSHIFT for chroma w/h rounding.
Clément Bœsch
ubitux at gmail.com
Sat May 11 02:00:59 CEST 2013
On Sat, May 11, 2013 at 01:04:24AM +0200, Clément Bœsch wrote:
> ---
> We might consider adding a FF_CEIL_RSHIFT2 with this second form which,
> according to Jason and Ronald, might be more interesting in case of a
> constant shift.
>
Confirmed with a bench:
322 decicycles in const shift CEIL_RSHIFT_1, 67108153 runs, 711 skips
322 decicycles in const shift CEIL_RSHIFT_1, 134216426 runs, 1302 skips
321 decicycles in const shift CEIL_RSHIFT_1, 268432723 runs, 2733 skips
321 decicycles in const shift CEIL_RSHIFT_1, 536865187 runs, 5725 skips
243 decicycles in const shift CEIL_RSHIFT_2, 67108220 runs, 644 skips
242 decicycles in const shift CEIL_RSHIFT_2, 134216551 runs, 1177 skips
242 decicycles in const shift CEIL_RSHIFT_2, 268432973 runs, 2483 skips
242 decicycles in const shift CEIL_RSHIFT_2, 536866036 runs, 4876 skips
436 decicycles in non-const shift + CEIL_RSHIFT_1, 67107406 runs, 1458 skips
435 decicycles in non-const shift + CEIL_RSHIFT_1, 134214761 runs, 2967 skips
434 decicycles in non-const shift + CEIL_RSHIFT_1, 268429706 runs, 5750 skips
433 decicycles in non-const shift + CEIL_RSHIFT_1, 536860455 runs, 10457 skips
451 decicycles in non-const shift + CEIL_RSHIFT_2, 67107498 runs, 1366 skips
451 decicycles in non-const shift + CEIL_RSHIFT_2, 134214901 runs, 2827 skips
450 decicycles in non-const shift + CEIL_RSHIFT_2, 268429866 runs, 5590 skips
450 decicycles in non-const shift + CEIL_RSHIFT_2, 536859571 runs, 11341 skips
(non-const shift tested with max_shift=13)
Test patch attached, feel free to try it out, with for instance (what I
used for the above bench):
make libavutil/ceilrshift-test
./libavutil/ceilrshift-test 1
./libavutil/ceilrshift-test 2
./libavutil/ceilrshift-test 1 13
./libavutil/ceilrshift-test 2 13
So anyway, I'll keep the current form for the macro, but we could make use
of a second version for constant shift. AFAICT, here are the only
occurrences:
libavcodec/snow.c: int w= FF_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE);
libavcodec/snow.c: int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
libswscale/x86/rgb2rgb_template.c: const int chromWidth = FF_CEIL_RSHIFT(width, 1);
libswscale/x86/rgb2rgb_template.c: const int chromWidth = FF_CEIL_RSHIFT(width, 1);
libswscale/x86/rgb2rgb_template.c: const int chromWidth = FF_CEIL_RSHIFT(width, 1);
libswscale/x86/rgb2rgb_template.c: const int chromWidth = FF_CEIL_RSHIFT(width, 1);
Anyway, sorry for the totally overkill benchmark :)
--
Clément B.
-------------- next part --------------
From d9ce927978b19c4eb6b698ae48fd570b407cf953 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux at gmail.com>
Date: Sat, 11 May 2013 01:47:11 +0200
Subject: [PATCH] rshift test
---
libavutil/Makefile | 1 +
libavutil/ceilrshift.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+)
create mode 100644 libavutil/ceilrshift.c
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 30f733f..7a72e25 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -126,6 +126,7 @@ TESTPROGS = adler32 \
base64 \
blowfish \
bprint \
+ ceilrshift \
cpu \
crc \
des \
diff --git a/libavutil/ceilrshift.c b/libavutil/ceilrshift.c
new file mode 100644
index 0000000..5b5d983
--- /dev/null
+++ b/libavutil/ceilrshift.c
@@ -0,0 +1,69 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "libavutil/log.h"
+#include "libavutil/timer.h"
+
+#define CEIL_RSHIFT_1(x, n) (-((-(x)) >> (n)))
+#define CEIL_RSHIFT_2(x, n) (((x) + (1<<(n)) - 1) >> (n))
+
+#ifdef TEST
+int main(int ac, char **av)
+{
+ int a, mode, max_shift;
+
+ if (ac != 2 && ac != 3) {
+ printf("%s mode <max_shift>\n", av[0]);
+ return -1;
+ }
+
+ mode = av[1][0] - '0';
+ max_shift = ac == 3 ? atoi(av[2]) : 0;
+
+ if (mode != 1 && mode != 2)
+ return -1;
+
+#define NON_CONST_SHIFT_TEST(m) do { \
+ int b; \
+ volatile int r; \
+ for (a = 0; a < 1<<16; a++) { \
+ START_TIMER; \
+ for (b = 0; b < max_shift; b++) \
+ r = CEIL_RSHIFT_##m(a, b); \
+ STOP_TIMER("non-const shift + CEIL_RSHIFT_" #m); \
+ } \
+ (void)r; \
+} while (1)
+
+#define CONST_SHIFT_TEST(m) do { \
+ volatile int r; \
+ for (a = 0; a < 1<<16; a++) { \
+ START_TIMER; \
+ r = CEIL_RSHIFT_##m(a, 1); \
+ r = CEIL_RSHIFT_##m(a, 2); \
+ r = CEIL_RSHIFT_##m(a, 3); \
+ r = CEIL_RSHIFT_##m(a, 4); \
+ r = CEIL_RSHIFT_##m(a, 5); \
+ r = CEIL_RSHIFT_##m(a, 6); \
+ r = CEIL_RSHIFT_##m(a, 7); \
+ r = CEIL_RSHIFT_##m(a, 8); \
+ r = CEIL_RSHIFT_##m(a, 9); \
+ r = CEIL_RSHIFT_##m(a, 10); \
+ r = CEIL_RSHIFT_##m(a, 11); \
+ r = CEIL_RSHIFT_##m(a, 12); \
+ r = CEIL_RSHIFT_##m(a, 13); \
+ STOP_TIMER("const shift CEIL_RSHIFT_" #m); \
+ } \
+ (void)r; \
+} while (1)
+
+ if (mode == 1) {
+ if (max_shift) NON_CONST_SHIFT_TEST(1);
+ else CONST_SHIFT_TEST(1);
+ } else {
+ if (max_shift) NON_CONST_SHIFT_TEST(2);
+ else CONST_SHIFT_TEST(2);
+ }
+
+ return 0;
+}
+#endif
--
1.8.2.2
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130511/1da4a79b/attachment.asc>
More information about the ffmpeg-devel
mailing list