[FFmpeg-devel] [PATCH 2/2] x86: hevc_mt: use proxy functions for WP
Christophe Gisquet
christophe.gisquet at gmail.com
Thu Oct 2 20:52:45 CEST 2014
On Win64:
Before: 155576b
64765 decicycles in qpel_bi_w, 8185 runs, 7 skips
13676 decicycles in epel_bi_w, 16378 runs, 6 skips
54402 decicycles in qpel_uni_w, 1023 runs, 1 skips
12328 decicycles in epel_uni_w, 2048 runs, 0 skips
After: 94260b
65037 decicycles in qpel_bi_w, 8185 runs, 7 skips
13752 decicycles in epel_bi_w, 16380 runs, 4 skips
54709 decicycles in qpel_uni_w, 1021 runs, 3 skips
12037 decicycles in epel_uni_w, 2047 runs, 1 skips
---
libavcodec/x86/hevcdsp_init.c | 542 +++++++++++++++++++++++++++++++++++-------
1 file changed, 461 insertions(+), 81 deletions(-)
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 4c536ac..a8284db 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -550,9 +550,23 @@ mc_rep_proxies(qpel_hv,12, 8, sse4);
#define ff_hevc_put_hevc_bi_qpel_hv16_12_sse4 proxy_bi_qpel_hv8_12_sse4
mc_rep_funcs(qpel_hv,12, 4, 12, sse4);
+#define mc_rep_uni_w_proxy(bitd, step, opt) \
+static void proxy_uni_w##step##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
+ int height, int denom, int _wx, int _ox, int width) \
+{ \
+ int i; \
+ int16_t *src; \
+ uint8_t *dst; \
+ for (i = 0; i < width; i += step) { \
+ src= _src + i; \
+ dst= _dst + (i * ((bitd + 7) / 8)); \
+ ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, denom, _wx, _ox); \
+ } \
+}
+
#define mc_rep_uni_w(bitd, step, W, opt) \
-void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride,\
- int height, int denom, int _wx, int _ox) \
+static void no_proxy_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
+ int height, int denom, int _wx, int _ox, int width) \
{ \
int i; \
int16_t *src; \
@@ -560,36 +574,84 @@ void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststri
for (i = 0; i < W; i += step) { \
src= _src + i; \
dst= _dst + (i * ((bitd + 7) / 8)); \
- ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
- height, denom, _wx, _ox); \
+ ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, denom, _wx, _ox); \
} \
}
+#define mc_rep_uni_w_unproxy(bitd, W, opt) \
+static void unproxy_uni_w##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *src, ptrdiff_t srcstride, \
+ int height, int denom, int _wx, int _ox, int width) \
+{ \
+ ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(dst, dststride, src, srcstride, height, denom, _wx, _ox); \
+}
+
mc_rep_uni_w(8, 6, 12, sse4);
-mc_rep_uni_w(8, 8, 16, sse4);
-mc_rep_uni_w(8, 8, 24, sse4);
-mc_rep_uni_w(8, 8, 32, sse4);
-mc_rep_uni_w(8, 8, 48, sse4);
-mc_rep_uni_w(8, 8, 64, sse4);
+#define ff_hevc_put_hevc_uni_w12_8_sse4 no_proxy_uni_w12_8_sse4
+mc_rep_uni_w_proxy(8, 8, sse4);
+#define ff_hevc_put_hevc_uni_w64_8_sse4 proxy_uni_w8_8_sse4
+#define ff_hevc_put_hevc_uni_w48_8_sse4 proxy_uni_w8_8_sse4
+#define ff_hevc_put_hevc_uni_w32_8_sse4 proxy_uni_w8_8_sse4
+#define ff_hevc_put_hevc_uni_w24_8_sse4 proxy_uni_w8_8_sse4
+#define ff_hevc_put_hevc_uni_w16_8_sse4 proxy_uni_w8_8_sse4
+mc_rep_uni_w_unproxy(8, 4, sse4);
+mc_rep_uni_w_unproxy(8, 6, sse4);
+mc_rep_uni_w_unproxy(8, 8, sse4);
+#define ff_hevc_put_hevc_uni_w4_8_sse4 unproxy_uni_w4_8_sse4
+#define ff_hevc_put_hevc_uni_w6_8_sse4 unproxy_uni_w6_8_sse4
+#define ff_hevc_put_hevc_uni_w8_8_sse4 unproxy_uni_w8_8_sse4
mc_rep_uni_w(10, 6, 12, sse4);
-mc_rep_uni_w(10, 8, 16, sse4);
-mc_rep_uni_w(10, 8, 24, sse4);
-mc_rep_uni_w(10, 8, 32, sse4);
-mc_rep_uni_w(10, 8, 48, sse4);
-mc_rep_uni_w(10, 8, 64, sse4);
+#define ff_hevc_put_hevc_uni_w12_10_sse4 no_proxy_uni_w12_10_sse4
+mc_rep_uni_w_proxy(10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w64_10_sse4 proxy_uni_w8_10_sse4
+#define ff_hevc_put_hevc_uni_w48_10_sse4 proxy_uni_w8_10_sse4
+#define ff_hevc_put_hevc_uni_w32_10_sse4 proxy_uni_w8_10_sse4
+#define ff_hevc_put_hevc_uni_w24_10_sse4 proxy_uni_w8_10_sse4
+#define ff_hevc_put_hevc_uni_w16_10_sse4 proxy_uni_w8_10_sse4
+mc_rep_uni_w_unproxy(10, 4, sse4);
+mc_rep_uni_w_unproxy(10, 6, sse4);
+mc_rep_uni_w_unproxy(10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w4_10_sse4 unproxy_uni_w4_10_sse4
+#define ff_hevc_put_hevc_uni_w6_10_sse4 unproxy_uni_w6_10_sse4
+#define ff_hevc_put_hevc_uni_w8_10_sse4 unproxy_uni_w8_10_sse4
mc_rep_uni_w(12, 6, 12, sse4);
-mc_rep_uni_w(12, 8, 16, sse4);
-mc_rep_uni_w(12, 8, 24, sse4);
-mc_rep_uni_w(12, 8, 32, sse4);
-mc_rep_uni_w(12, 8, 48, sse4);
-mc_rep_uni_w(12, 8, 64, sse4);
+#define ff_hevc_put_hevc_uni_w12_12_sse4 no_proxy_uni_w12_12_sse4
+mc_rep_uni_w_proxy(12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w64_12_sse4 proxy_uni_w8_12_sse4
+#define ff_hevc_put_hevc_uni_w48_12_sse4 proxy_uni_w8_12_sse4
+#define ff_hevc_put_hevc_uni_w32_12_sse4 proxy_uni_w8_12_sse4
+#define ff_hevc_put_hevc_uni_w24_12_sse4 proxy_uni_w8_12_sse4
+#define ff_hevc_put_hevc_uni_w16_12_sse4 proxy_uni_w8_12_sse4
+mc_rep_uni_w_unproxy(12, 4, sse4);
+mc_rep_uni_w_unproxy(12, 6, sse4);
+mc_rep_uni_w_unproxy(12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w4_12_sse4 unproxy_uni_w4_12_sse4
+#define ff_hevc_put_hevc_uni_w6_12_sse4 unproxy_uni_w6_12_sse4
+#define ff_hevc_put_hevc_uni_w8_12_sse4 unproxy_uni_w8_12_sse4
+
+#define mc_rep_bi_w_proxy(bitd, step, opt) \
+static void proxy_bi_w##step##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
+ int16_t *_src2, int height, \
+ int denom, int _wx0, int _wx1, int _ox0, int _ox1, int width) \
+{ \
+ int i; \
+ int16_t *src; \
+ int16_t *src2; \
+ uint8_t *dst; \
+ for (i = 0; i < width; i += step) { \
+ src = _src + i; \
+ src2 = _src2 + i; \
+ dst = _dst + (i * ((bitd + 7) / 8)); \
+ ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
+ height, denom, _wx0, _wx1, _ox0, _ox1); \
+ } \
+}
#define mc_rep_bi_w(bitd, step, W, opt) \
-void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
+static void no_proxy_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
int16_t *_src2, int height, \
- int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
+ int denom, int _wx0, int _wx1, int _ox0, int _ox1, int width) \
{ \
int i; \
int16_t *src; \
@@ -604,26 +666,69 @@ void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststrid
} \
}
+#define mc_rep_bi_w_unproxy(bitd, W, opt) \
+static void unproxy_bi_w##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dstride, int16_t *src, ptrdiff_t sstride, \
+ int16_t *src2, int h, int denom, int w0, int w1, int o0, int o1, int w) \
+{ \
+ ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(dst, dstride, src, sstride, src2, h, denom, w0, w1, o0, o1); \
+}
+
mc_rep_bi_w(8, 6, 12, sse4);
-mc_rep_bi_w(8, 8, 16, sse4);
-mc_rep_bi_w(8, 8, 24, sse4);
-mc_rep_bi_w(8, 8, 32, sse4);
-mc_rep_bi_w(8, 8, 48, sse4);
-mc_rep_bi_w(8, 8, 64, sse4);
+#define ff_hevc_put_hevc_bi_w12_8_sse4 no_proxy_bi_w12_8_sse4
+mc_rep_bi_w_proxy(8, 8, sse4);
+#define ff_hevc_put_hevc_bi_w64_8_sse4 proxy_bi_w8_8_sse4
+#define ff_hevc_put_hevc_bi_w48_8_sse4 proxy_bi_w8_8_sse4
+#define ff_hevc_put_hevc_bi_w32_8_sse4 proxy_bi_w8_8_sse4
+#define ff_hevc_put_hevc_bi_w24_8_sse4 proxy_bi_w8_8_sse4
+#define ff_hevc_put_hevc_bi_w16_8_sse4 proxy_bi_w8_8_sse4
+mc_rep_bi_w_unproxy(8, 4, sse4);
+mc_rep_bi_w_unproxy(8, 6, sse4);
+mc_rep_bi_w_unproxy(8, 8, sse4);
+#define ff_hevc_put_hevc_bi_w4_8_sse4 unproxy_bi_w4_8_sse4
+#define ff_hevc_put_hevc_bi_w6_8_sse4 unproxy_bi_w6_8_sse4
+#define ff_hevc_put_hevc_bi_w8_8_sse4 unproxy_bi_w8_8_sse4
mc_rep_bi_w(10, 6, 12, sse4);
-mc_rep_bi_w(10, 8, 16, sse4);
-mc_rep_bi_w(10, 8, 24, sse4);
-mc_rep_bi_w(10, 8, 32, sse4);
-mc_rep_bi_w(10, 8, 48, sse4);
-mc_rep_bi_w(10, 8, 64, sse4);
+#define ff_hevc_put_hevc_bi_w12_10_sse4 no_proxy_bi_w12_10_sse4
+mc_rep_bi_w_proxy(10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w64_10_sse4 proxy_bi_w8_10_sse4
+#define ff_hevc_put_hevc_bi_w48_10_sse4 proxy_bi_w8_10_sse4
+#define ff_hevc_put_hevc_bi_w32_10_sse4 proxy_bi_w8_10_sse4
+#define ff_hevc_put_hevc_bi_w24_10_sse4 proxy_bi_w8_10_sse4
+#define ff_hevc_put_hevc_bi_w16_10_sse4 proxy_bi_w8_10_sse4
+mc_rep_bi_w_unproxy(10, 4, sse4);
+mc_rep_bi_w_unproxy(10, 6, sse4);
+mc_rep_bi_w_unproxy(10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w4_10_sse4 unproxy_bi_w4_10_sse4
+#define ff_hevc_put_hevc_bi_w6_10_sse4 unproxy_bi_w6_10_sse4
+#define ff_hevc_put_hevc_bi_w8_10_sse4 unproxy_bi_w8_10_sse4
mc_rep_bi_w(12, 6, 12, sse4);
-mc_rep_bi_w(12, 8, 16, sse4);
-mc_rep_bi_w(12, 8, 24, sse4);
-mc_rep_bi_w(12, 8, 32, sse4);
-mc_rep_bi_w(12, 8, 48, sse4);
-mc_rep_bi_w(12, 8, 64, sse4);
+#define ff_hevc_put_hevc_bi_w12_12_sse4 no_proxy_bi_w12_12_sse4
+mc_rep_bi_w_proxy(12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w64_12_sse4 proxy_bi_w8_12_sse4
+#define ff_hevc_put_hevc_bi_w48_12_sse4 proxy_bi_w8_12_sse4
+#define ff_hevc_put_hevc_bi_w32_12_sse4 proxy_bi_w8_12_sse4
+#define ff_hevc_put_hevc_bi_w24_12_sse4 proxy_bi_w8_12_sse4
+#define ff_hevc_put_hevc_bi_w16_12_sse4 proxy_bi_w8_12_sse4
+mc_rep_bi_w_unproxy(12, 4, sse4);
+mc_rep_bi_w_unproxy(12, 6, sse4);
+mc_rep_bi_w_unproxy(12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w4_12_sse4 unproxy_bi_w4_12_sse4
+#define ff_hevc_put_hevc_bi_w6_12_sse4 unproxy_bi_w6_12_sse4
+#define ff_hevc_put_hevc_bi_w8_12_sse4 unproxy_bi_w8_12_sse4
+
+#define mc_uni_w_func_proxy(name, bitd, step, opt) \
+static void proxy_uni_w_##name##step##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
+ uint8_t *src, ptrdiff_t srcstride, \
+ int height, int denom, \
+ int wx, int ox, \
+ intptr_t mx, intptr_t my, int width) \
+{ \
+ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
+ proxy_##name##step##_##bitd##_##opt(temp, src, srcstride, height, mx, my, width); \
+ proxy_uni_w8##_##bitd##_##opt(dst, dststride, temp, MAX_PB_SIZE, height, denom, wx, ox, width);\
+}
#define mc_uni_w_func(name, bitd, W, opt) \
void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
@@ -634,54 +739,199 @@ void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t
{ \
LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
- ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, height, denom, _wx, _ox);\
+ ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, height, denom, _wx, _ox, width);\
}
#define mc_uni_w_funcs(name, bitd, opt) \
mc_uni_w_func(name, bitd, 4, opt); \
mc_uni_w_func(name, bitd, 8, opt); \
mc_uni_w_func(name, bitd, 12, opt); \
- mc_uni_w_func(name, bitd, 16, opt); \
mc_uni_w_func(name, bitd, 24, opt); \
+ mc_uni_w_func(name, bitd, 16, opt); \
mc_uni_w_func(name, bitd, 32, opt); \
mc_uni_w_func(name, bitd, 48, opt); \
mc_uni_w_func(name, bitd, 64, opt)
-mc_uni_w_funcs(pel_pixels, 8, sse4);
+#define mc_uni_w_proxy_funcs(name, bitd, step, opt) \
+ mc_uni_w_func(name, bitd, 4, opt); \
+ mc_uni_w_func(name, bitd, 8, opt); \
+ mc_uni_w_func(name, bitd, 12, opt); \
+ mc_uni_w_func_proxy(name, bitd, step, opt)
+
+
+mc_uni_w_proxy_funcs(pel_pixels, 8, 16, sse4);
+#define ff_hevc_put_hevc_uni_w_pel_pixels16_8_sse4 proxy_uni_w_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels32_8_sse4 proxy_uni_w_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels48_8_sse4 proxy_uni_w_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels64_8_sse4 proxy_uni_w_pel_pixels16_8_sse4
+mc_uni_w_func(pel_pixels, 8, 24, sse4);
mc_uni_w_func(pel_pixels, 8, 6, sse4);
-mc_uni_w_funcs(epel_h, 8, sse4);
+
+mc_uni_w_proxy_funcs(epel_h, 8, 16, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_h16_8_sse4 proxy_uni_w_epel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h32_8_sse4 proxy_uni_w_epel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h48_8_sse4 proxy_uni_w_epel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h64_8_sse4 proxy_uni_w_epel_h16_8_sse4
+mc_uni_w_func(epel_h, 8, 24, sse4);
mc_uni_w_func(epel_h, 8, 6, sse4);
-mc_uni_w_funcs(epel_v, 8, sse4);
+
+mc_uni_w_proxy_funcs(epel_v, 8, 16, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_v16_8_sse4 proxy_uni_w_epel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v32_8_sse4 proxy_uni_w_epel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v48_8_sse4 proxy_uni_w_epel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v64_8_sse4 proxy_uni_w_epel_v16_8_sse4
+mc_uni_w_func(epel_v, 8, 24, sse4);
mc_uni_w_func(epel_v, 8, 6, sse4);
-mc_uni_w_funcs(epel_hv, 8, sse4);
+
+mc_uni_w_proxy_funcs(epel_hv, 8, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_hv16_8_sse4 proxy_uni_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv24_8_sse4 proxy_uni_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv32_8_sse4 proxy_uni_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv48_8_sse4 proxy_uni_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv64_8_sse4 proxy_uni_w_epel_hv8_8_sse4
mc_uni_w_func(epel_hv, 8, 6, sse4);
-mc_uni_w_funcs(qpel_h, 8, sse4);
-mc_uni_w_funcs(qpel_v, 8, sse4);
-mc_uni_w_funcs(qpel_hv, 8, sse4);
-mc_uni_w_funcs(pel_pixels, 10, sse4);
+mc_uni_w_proxy_funcs(qpel_h, 8, 16, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_h16_8_sse4 proxy_uni_w_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h32_8_sse4 proxy_uni_w_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h48_8_sse4 proxy_uni_w_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h64_8_sse4 proxy_uni_w_qpel_h16_8_sse4
+mc_uni_w_func(qpel_h, 8, 24, sse4);
+
+mc_uni_w_proxy_funcs(qpel_v, 8, 16, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_v16_8_sse4 proxy_uni_w_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v32_8_sse4 proxy_uni_w_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v48_8_sse4 proxy_uni_w_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v64_8_sse4 proxy_uni_w_qpel_v16_8_sse4
+mc_uni_w_func(qpel_v, 8, 24, sse4);
+
+mc_uni_w_proxy_funcs(qpel_hv, 8, 8, sse4);
+mc_uni_w_func(qpel_hv, 8, 16, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_hv24_8_sse4 proxy_uni_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv32_8_sse4 proxy_uni_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv48_8_sse4 proxy_uni_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv64_8_sse4 proxy_uni_w_qpel_hv8_8_sse4
+
+mc_uni_w_proxy_funcs(pel_pixels, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_pel_pixels16_10_sse4 proxy_uni_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels24_10_sse4 proxy_uni_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels32_10_sse4 proxy_uni_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels48_10_sse4 proxy_uni_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels64_10_sse4 proxy_uni_w_pel_pixels8_10_sse4
mc_uni_w_func(pel_pixels, 10, 6, sse4);
-mc_uni_w_funcs(epel_h, 10, sse4);
+
+mc_uni_w_proxy_funcs(epel_h, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_h16_10_sse4 proxy_uni_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h24_10_sse4 proxy_uni_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h32_10_sse4 proxy_uni_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h48_10_sse4 proxy_uni_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h64_10_sse4 proxy_uni_w_epel_h8_10_sse4
mc_uni_w_func(epel_h, 10, 6, sse4);
-mc_uni_w_funcs(epel_v, 10, sse4);
+
+mc_uni_w_proxy_funcs(epel_v, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_v16_10_sse4 proxy_uni_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v24_10_sse4 proxy_uni_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v32_10_sse4 proxy_uni_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v48_10_sse4 proxy_uni_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v64_10_sse4 proxy_uni_w_epel_v8_10_sse4
mc_uni_w_func(epel_v, 10, 6, sse4);
-mc_uni_w_funcs(epel_hv, 10, sse4);
+
+mc_uni_w_proxy_funcs(epel_hv, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_hv16_10_sse4 proxy_uni_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv24_10_sse4 proxy_uni_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv32_10_sse4 proxy_uni_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv48_10_sse4 proxy_uni_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv64_10_sse4 proxy_uni_w_epel_hv8_10_sse4
mc_uni_w_func(epel_hv, 10, 6, sse4);
-mc_uni_w_funcs(qpel_h, 10, sse4);
-mc_uni_w_funcs(qpel_v, 10, sse4);
-mc_uni_w_funcs(qpel_hv, 10, sse4);
-mc_uni_w_funcs(pel_pixels, 12, sse4);
+mc_uni_w_proxy_funcs(qpel_h, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_h16_10_sse4 proxy_uni_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h24_10_sse4 proxy_uni_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h32_10_sse4 proxy_uni_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h48_10_sse4 proxy_uni_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h64_10_sse4 proxy_uni_w_qpel_h8_10_sse4
+
+mc_uni_w_proxy_funcs(qpel_v, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_v16_10_sse4 proxy_uni_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v24_10_sse4 proxy_uni_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v32_10_sse4 proxy_uni_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v48_10_sse4 proxy_uni_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v64_10_sse4 proxy_uni_w_qpel_v8_10_sse4
+
+mc_uni_w_proxy_funcs(qpel_hv, 10, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_hv16_10_sse4 proxy_uni_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv24_10_sse4 proxy_uni_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv32_10_sse4 proxy_uni_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv48_10_sse4 proxy_uni_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv64_10_sse4 proxy_uni_w_qpel_hv8_10_sse4
+
+mc_uni_w_proxy_funcs(pel_pixels, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_pel_pixels16_12_sse4 proxy_uni_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels24_12_sse4 proxy_uni_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels32_12_sse4 proxy_uni_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels48_12_sse4 proxy_uni_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_w_pel_pixels64_12_sse4 proxy_uni_w_pel_pixels8_12_sse4
mc_uni_w_func(pel_pixels, 12, 6, sse4);
-mc_uni_w_funcs(epel_h, 12, sse4);
+
+mc_uni_w_proxy_funcs(epel_h, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_h16_12_sse4 proxy_uni_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h24_12_sse4 proxy_uni_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h32_12_sse4 proxy_uni_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h48_12_sse4 proxy_uni_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_h64_12_sse4 proxy_uni_w_epel_h8_12_sse4
mc_uni_w_func(epel_h, 12, 6, sse4);
-mc_uni_w_funcs(epel_v, 12, sse4);
+
+mc_uni_w_proxy_funcs(epel_v, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_v16_12_sse4 proxy_uni_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v24_12_sse4 proxy_uni_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v32_12_sse4 proxy_uni_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v48_12_sse4 proxy_uni_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_v64_12_sse4 proxy_uni_w_epel_v8_12_sse4
mc_uni_w_func(epel_v, 12, 6, sse4);
-mc_uni_w_funcs(epel_hv, 12, sse4);
+
+mc_uni_w_proxy_funcs(epel_hv, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_epel_hv16_12_sse4 proxy_uni_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv24_12_sse4 proxy_uni_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv32_12_sse4 proxy_uni_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv48_12_sse4 proxy_uni_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_epel_hv64_12_sse4 proxy_uni_w_epel_hv8_12_sse4
mc_uni_w_func(epel_hv, 12, 6, sse4);
-mc_uni_w_funcs(qpel_h, 12, sse4);
-mc_uni_w_funcs(qpel_v, 12, sse4);
-mc_uni_w_funcs(qpel_hv, 12, sse4);
+
+mc_uni_w_proxy_funcs(qpel_h, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_h16_12_sse4 proxy_uni_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h24_12_sse4 proxy_uni_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h32_12_sse4 proxy_uni_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h48_12_sse4 proxy_uni_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_h64_12_sse4 proxy_uni_w_qpel_h8_12_sse4
+
+mc_uni_w_proxy_funcs(qpel_v, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_v16_12_sse4 proxy_uni_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v24_12_sse4 proxy_uni_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v32_12_sse4 proxy_uni_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v48_12_sse4 proxy_uni_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_v64_12_sse4 proxy_uni_w_qpel_v8_12_sse4
+
+mc_uni_w_proxy_funcs(qpel_hv, 12, 8, sse4);
+#define ff_hevc_put_hevc_uni_w_qpel_hv16_12_sse4 proxy_uni_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv24_12_sse4 proxy_uni_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv32_12_sse4 proxy_uni_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv48_12_sse4 proxy_uni_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_w_qpel_hv64_12_sse4 proxy_uni_w_qpel_hv8_12_sse4
+
+// Step only for first proxy
+#define mc_bi_w_func_proxy(name, bitd, step, opt) \
+static void proxy_bi_w_##name##step##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
+ uint8_t *_src, ptrdiff_t _srcstride, \
+ int16_t *_src2, \
+ int height, int denom, \
+ int _wx0, int _wx1, int _ox0, int _ox1, \
+ intptr_t mx, intptr_t my, int width) \
+{ \
+ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
+ proxy_##name##step##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
+ proxy_bi_w8##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, _src2, \
+ height, denom, _wx0, _wx1, _ox0, _ox1, width); \
+}
#define mc_bi_w_func(name, bitd, W, opt) \
void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
@@ -694,7 +944,7 @@ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _
LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, _src2, \
- height, denom, _wx0, _wx1, _ox0, _ox1); \
+ height, denom, _wx0, _wx1, _ox0, _ox1, width); \
}
#define mc_bi_w_funcs(name, bitd, opt) \
@@ -707,41 +957,171 @@ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _
mc_bi_w_func(name, bitd, 48, opt); \
mc_bi_w_func(name, bitd, 64, opt)
-mc_bi_w_funcs(pel_pixels, 8, sse4);
+#define mc_bi_w_proxy_funcs(name, bitd, step, opt) \
+ mc_bi_w_func(name, bitd, 4, opt); \
+ mc_bi_w_func(name, bitd, 8, opt); \
+ mc_bi_w_func(name, bitd, 12, opt); \
+ mc_bi_w_func_proxy(name, bitd, step, opt)
+
+mc_bi_w_proxy_funcs(pel_pixels, 8, 16, sse4);
+#define ff_hevc_put_hevc_bi_w_pel_pixels16_8_sse4 proxy_bi_w_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels32_8_sse4 proxy_bi_w_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels48_8_sse4 proxy_bi_w_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels64_8_sse4 proxy_bi_w_pel_pixels16_8_sse4
+mc_bi_w_func(pel_pixels, 8, 24, sse4);
mc_bi_w_func(pel_pixels, 8, 6, sse4);
-mc_bi_w_funcs(epel_h, 8, sse4);
+
+mc_bi_w_proxy_funcs(epel_h, 8, 16, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_h16_8_sse4 proxy_bi_w_epel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h32_8_sse4 proxy_bi_w_epel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h48_8_sse4 proxy_bi_w_epel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h64_8_sse4 proxy_bi_w_epel_h16_8_sse4
+mc_bi_w_func(epel_h, 8, 24, sse4);
mc_bi_w_func(epel_h, 8, 6, sse4);
-mc_bi_w_funcs(epel_v, 8, sse4);
+
+mc_bi_w_proxy_funcs(epel_v, 8, 16, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_v16_8_sse4 proxy_bi_w_epel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v32_8_sse4 proxy_bi_w_epel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v48_8_sse4 proxy_bi_w_epel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v64_8_sse4 proxy_bi_w_epel_v16_8_sse4
+mc_bi_w_func(epel_v, 8, 24, sse4);
mc_bi_w_func(epel_v, 8, 6, sse4);
-mc_bi_w_funcs(epel_hv, 8, sse4);
+
+mc_bi_w_proxy_funcs(epel_hv, 8, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_hv16_8_sse4 proxy_bi_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv24_8_sse4 proxy_bi_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv32_8_sse4 proxy_bi_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv48_8_sse4 proxy_bi_w_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv64_8_sse4 proxy_bi_w_epel_hv8_8_sse4
mc_bi_w_func(epel_hv, 8, 6, sse4);
-mc_bi_w_funcs(qpel_h, 8, sse4);
-mc_bi_w_funcs(qpel_v, 8, sse4);
-mc_bi_w_funcs(qpel_hv, 8, sse4);
-mc_bi_w_funcs(pel_pixels, 10, sse4);
+mc_bi_w_proxy_funcs(qpel_h, 8, 16, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_h16_8_sse4 proxy_bi_w_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h32_8_sse4 proxy_bi_w_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h48_8_sse4 proxy_bi_w_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h64_8_sse4 proxy_bi_w_qpel_h16_8_sse4
+mc_bi_w_func(qpel_h, 8, 24, sse4);
+
+mc_bi_w_proxy_funcs(qpel_v, 8, 16, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_v16_8_sse4 proxy_bi_w_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v32_8_sse4 proxy_bi_w_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v48_8_sse4 proxy_bi_w_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v64_8_sse4 proxy_bi_w_qpel_v16_8_sse4
+mc_bi_w_func(qpel_v, 8, 24, sse4);
+
+mc_bi_w_proxy_funcs(qpel_hv, 8, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_hv16_8_sse4 proxy_bi_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv24_8_sse4 proxy_bi_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv32_8_sse4 proxy_bi_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv48_8_sse4 proxy_bi_w_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv64_8_sse4 proxy_bi_w_qpel_hv8_8_sse4
+
+mc_bi_w_proxy_funcs(pel_pixels, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_pel_pixels16_10_sse4 proxy_bi_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels24_10_sse4 proxy_bi_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels32_10_sse4 proxy_bi_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels48_10_sse4 proxy_bi_w_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels64_10_sse4 proxy_bi_w_pel_pixels8_10_sse4
mc_bi_w_func(pel_pixels, 10, 6, sse4);
-mc_bi_w_funcs(epel_h, 10, sse4);
+
+mc_bi_w_proxy_funcs(epel_h, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_h16_10_sse4 proxy_bi_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h24_10_sse4 proxy_bi_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h32_10_sse4 proxy_bi_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h48_10_sse4 proxy_bi_w_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h64_10_sse4 proxy_bi_w_epel_h8_10_sse4
mc_bi_w_func(epel_h, 10, 6, sse4);
-mc_bi_w_funcs(epel_v, 10, sse4);
+
+mc_bi_w_proxy_funcs(epel_v, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_v16_10_sse4 proxy_bi_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v24_10_sse4 proxy_bi_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v32_10_sse4 proxy_bi_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v48_10_sse4 proxy_bi_w_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v64_10_sse4 proxy_bi_w_epel_v8_10_sse4
mc_bi_w_func(epel_v, 10, 6, sse4);
-mc_bi_w_funcs(epel_hv, 10, sse4);
+
+mc_bi_w_proxy_funcs(epel_hv, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_hv16_10_sse4 proxy_bi_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv24_10_sse4 proxy_bi_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv32_10_sse4 proxy_bi_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv48_10_sse4 proxy_bi_w_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv64_10_sse4 proxy_bi_w_epel_hv8_10_sse4
mc_bi_w_func(epel_hv, 10, 6, sse4);
-mc_bi_w_funcs(qpel_h, 10, sse4);
-mc_bi_w_funcs(qpel_v, 10, sse4);
-mc_bi_w_funcs(qpel_hv, 10, sse4);
-mc_bi_w_funcs(pel_pixels, 12, sse4);
+mc_bi_w_proxy_funcs(qpel_h, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_h16_10_sse4 proxy_bi_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h24_10_sse4 proxy_bi_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h32_10_sse4 proxy_bi_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h48_10_sse4 proxy_bi_w_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h64_10_sse4 proxy_bi_w_qpel_h8_10_sse4
+
+mc_bi_w_proxy_funcs(qpel_v, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_v16_10_sse4 proxy_bi_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v24_10_sse4 proxy_bi_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v32_10_sse4 proxy_bi_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v48_10_sse4 proxy_bi_w_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v64_10_sse4 proxy_bi_w_qpel_v8_10_sse4
+
+mc_bi_w_proxy_funcs(qpel_hv, 10, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_hv16_10_sse4 proxy_bi_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv24_10_sse4 proxy_bi_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv32_10_sse4 proxy_bi_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv48_10_sse4 proxy_bi_w_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv64_10_sse4 proxy_bi_w_qpel_hv8_10_sse4
+
+mc_bi_w_proxy_funcs(pel_pixels, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_pel_pixels16_12_sse4 proxy_bi_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels24_12_sse4 proxy_bi_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels32_12_sse4 proxy_bi_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels48_12_sse4 proxy_bi_w_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_w_pel_pixels64_12_sse4 proxy_bi_w_pel_pixels8_12_sse4
mc_bi_w_func(pel_pixels, 12, 6, sse4);
-mc_bi_w_funcs(epel_h, 12, sse4);
+
+mc_bi_w_proxy_funcs(epel_h, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_h16_12_sse4 proxy_bi_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h24_12_sse4 proxy_bi_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h32_12_sse4 proxy_bi_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h48_12_sse4 proxy_bi_w_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_h64_12_sse4 proxy_bi_w_epel_h8_12_sse4
mc_bi_w_func(epel_h, 12, 6, sse4);
-mc_bi_w_funcs(epel_v, 12, sse4);
+
+mc_bi_w_proxy_funcs(epel_v, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_v16_12_sse4 proxy_bi_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v24_12_sse4 proxy_bi_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v32_12_sse4 proxy_bi_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v48_12_sse4 proxy_bi_w_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_v64_12_sse4 proxy_bi_w_epel_v8_12_sse4
mc_bi_w_func(epel_v, 12, 6, sse4);
-mc_bi_w_funcs(epel_hv, 12, sse4);
+
+mc_bi_w_proxy_funcs(epel_hv, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_epel_hv16_12_sse4 proxy_bi_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv24_12_sse4 proxy_bi_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv32_12_sse4 proxy_bi_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv48_12_sse4 proxy_bi_w_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_epel_hv64_12_sse4 proxy_bi_w_epel_hv8_12_sse4
mc_bi_w_func(epel_hv, 12, 6, sse4);
-mc_bi_w_funcs(qpel_h, 12, sse4);
-mc_bi_w_funcs(qpel_v, 12, sse4);
-mc_bi_w_funcs(qpel_hv, 12, sse4);
+
+mc_bi_w_proxy_funcs(qpel_h, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_h16_12_sse4 proxy_bi_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h24_12_sse4 proxy_bi_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h32_12_sse4 proxy_bi_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h48_12_sse4 proxy_bi_w_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_h64_12_sse4 proxy_bi_w_qpel_h8_12_sse4
+
+mc_bi_w_proxy_funcs(qpel_v, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_v16_12_sse4 proxy_bi_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v24_12_sse4 proxy_bi_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v32_12_sse4 proxy_bi_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v48_12_sse4 proxy_bi_w_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_v64_12_sse4 proxy_bi_w_qpel_v8_12_sse4
+
+mc_bi_w_proxy_funcs(qpel_hv, 12, 8, sse4);
+#define ff_hevc_put_hevc_bi_w_qpel_hv16_12_sse4 proxy_bi_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv24_12_sse4 proxy_bi_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv32_12_sse4 proxy_bi_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv48_12_sse4 proxy_bi_w_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_w_qpel_hv64_12_sse4 proxy_bi_w_qpel_hv8_12_sse4
+
#endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
--
1.9.2.msysgit.0
More information about the ffmpeg-devel
mailing list