[FFmpeg-devel] [PATCH] avutil/mathematics: speed up av_gcd by using Stein's binary GCD algorithm

Sat Oct 10 23:06:10 CEST 2015

This uses Stein's binary GCD algorithm:
https://en.wikipedia.org/wiki/Binary_GCD_algorithm
to get a reported 1.7-2.1x speedup over Euclidean GCD on standard architectures.
Have not benchmarked, so I can't comment.
Note that there may be some room for optimization of the bit operations.

Quick note regarding overflow: yes, subtractions on int64_t can, but the
llabs takes care of that. The llabs is also guaranteed to be safe, with
no annoying INT64_MIN business since INT64_MIN being a power of 2, is
shifted down before being sent to llabs.

The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On
GCC, this is provided by a built-in. On Microsoft, there is a
BitScanForward64 analog of BitScanForward that should work; but I can't confirm.
Apparently it is not available on 32 bit builds; so this may or may not
work correctly. On Intel, per the documentation there is only an
intrinsic for _bit_scan_forward and people have posted on forums
regarding _bit_scan_forward64, but often their documentation is
woeful. Again, I don't have it, so I can't test.

As such, to be safe, for now only the GCC intrinsic is added, the rest
use a compiled version.

Tested with FATE.

Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
---
 libavutil/intmath.h     | 39 +++++++++++++++++++++++++++++++++++++++
 libavutil/mathematics.c | 26 +++++++++++++++++++++-----
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index 08d54a6..63657d9 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -114,6 +114,9 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
 #ifndef ff_ctz
 #define ff_ctz(v) __builtin_ctz(v)
 #endif
+#ifndef ff_ctzll
+#define ff_ctzll(v) __builtin_ctzll(v)
+#endif
 #endif
 #endif
 
@@ -158,6 +161,42 @@ static av_always_inline av_const int ff_ctz_c( int v )
 #endif
 #endif
 
+#ifndef ff_ctzll
+#define ff_ctzll ff_ctzll_c
+static av_always_inline av_const int ff_ctzll_c(long long v)
+{
+    long long c;
+
+    if (v & 0x1)
+        return 0;
+
+    c = 1;
+    if (!(v & 0xffffffff)) {
+        v >>= 32;
+        c += 32;
+    }
+    if (!(v & 0xffff)) {
+        v >>= 16;
+        c += 16;
+    }
+    if (!(v & 0xff)) {
+        v >>= 8;
+        c += 8;
+    }
+    if (!(v & 0xf)) {
+        v >>= 4;
+        c += 4;
+    }
+    if (!(v & 0x3)) {
+        v >>= 2;
+        c += 2;
+    }
+    c -= v & 0x1;
+
+    return c;
+}
+#endif
+
 /**
  * Trailing zero bit count.
  *
diff --git a/libavutil/mathematics.c b/libavutil/mathematics.c
index 252794e..16e4eba 100644
--- a/libavutil/mathematics.c
+++ b/libavutil/mathematics.c
@@ -27,16 +27,32 @@
 #include <limits.h>
 
 #include "mathematics.h"
+#include "libavutil/intmath.h"
 #include "libavutil/common.h"
 #include "avassert.h"
 #include "version.h"
 
-int64_t av_gcd(int64_t a, int64_t b)
-{
-    if (b)
-        return av_gcd(b, a % b);
-    else
+/* Stein's binary GCD algorithm:
+ * https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
+int64_t av_gcd(int64_t a, int64_t b) {
+    int za, zb, k;
+    int64_t u, v;
+    if (a == 0)
+        return b;
+    if (b == 0)
         return a;
+    za = ff_ctzll(a);
+    zb = ff_ctzll(b);
+    k  = FFMIN(za, zb);
+    u = llabs(a >> za);
+    v = llabs(b >> zb);
+    while (u != v) {
+        if (u > v)
+            FFSWAP(int64_t, v, u);
+        v -= u;
+        v >>= ff_ctzll(v);
+    }
+    return u << k;
 }
 
 int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
-- 
2.6.1