[FFmpeg-devel] [PATCH 1/2] wasm/hevc: Add sao_band_filter

Zhao Zhili quinkblack at foxmail.com
Sat Jun 7 13:18:11 EEST 2025


From: Zhao Zhili <zhilizhao at tencent.com>

hevc_sao_band_8_8_c:                                    63.0 ( 1.00x)
hevc_sao_band_8_8_simd128:                              10.4 ( 6.06x)
hevc_sao_band_16_8_c:                                  230.4 ( 1.00x)
hevc_sao_band_16_8_simd128:                             22.9 (10.07x)
hevc_sao_band_32_8_c:                                  900.4 ( 1.00x)
hevc_sao_band_32_8_simd128:                             81.5 (11.05x)
hevc_sao_band_48_8_c:                                 2009.1 ( 1.00x)
hevc_sao_band_48_8_simd128:                            170.2 (11.80x)
hevc_sao_band_64_8_c:                                 3535.0 ( 1.00x)
hevc_sao_band_64_8_simd128:                            297.5 (11.88x)

Signed-off-by: Zhao Zhili <zhilizhao at tencent.com>
---
 libavcodec/wasm/hevc/Makefile   |   3 +-
 libavcodec/wasm/hevc/dsp_init.c |   7 ++
 libavcodec/wasm/hevc/sao.c      | 113 ++++++++++++++++++++++++++++++++
 libavcodec/wasm/hevc/sao.h      |  41 ++++++++++++
 4 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/wasm/hevc/sao.c
 create mode 100644 libavcodec/wasm/hevc/sao.h

diff --git a/libavcodec/wasm/hevc/Makefile b/libavcodec/wasm/hevc/Makefile
index 132daa3106..7e8ab3776e 100644
--- a/libavcodec/wasm/hevc/Makefile
+++ b/libavcodec/wasm/hevc/Makefile
@@ -1,3 +1,4 @@
 OBJS-$(CONFIG_HEVC_DECODER)        += wasm/hevc/dsp_init.o
 
-SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o
+SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o \
+                                       wasm/hevc/sao.o
diff --git a/libavcodec/wasm/hevc/dsp_init.c b/libavcodec/wasm/hevc/dsp_init.c
index e5c8a2ebb6..76a1031ff4 100644
--- a/libavcodec/wasm/hevc/dsp_init.c
+++ b/libavcodec/wasm/hevc/dsp_init.c
@@ -21,6 +21,7 @@
 #include "libavutil/cpu_internal.h"
 #include "libavcodec/hevc/dsp.h"
 #include "libavcodec/wasm/hevc/idct.h"
+#include "libavcodec/wasm/hevc/sao.h"
 
 av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth)
 {
@@ -35,6 +36,12 @@ av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth)
         c->idct[1] = ff_hevc_idct_8x8_8_simd128;
         c->idct[2] = ff_hevc_idct_16x16_8_simd128;
         c->idct[3] = ff_hevc_idct_32x32_8_simd128;
+
+        c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_simd128;
+        c->sao_band_filter[1] =
+        c->sao_band_filter[2] =
+        c->sao_band_filter[3] =
+        c->sao_band_filter[4] = ff_hevc_sao_band_filter_16x16_8_simd128;
     } else if (bit_depth == 10) {
         c->idct[0] = ff_hevc_idct_4x4_10_simd128;
         c->idct[1] = ff_hevc_idct_8x8_10_simd128;
diff --git a/libavcodec/wasm/hevc/sao.c b/libavcodec/wasm/hevc/sao.c
new file mode 100644
index 0000000000..82134af7f3
--- /dev/null
+++ b/libavcodec/wasm/hevc/sao.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2025 Zhao Zhili
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "sao.h"
+
+#include <wasm_simd128.h>
+
+void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride_dst,
+                                       ptrdiff_t stride_src,
+                                       const int16_t *sao_offset_val,
+                                       int sao_left_class, int width,
+                                       int height)
+{
+    int8_t offset_table[32] = {0};
+    v128_t offset_low, offset_high;
+
+    for (int k = 0; k < 4; k++)
+        offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1];
+
+    offset_low = wasm_v128_load(offset_table);
+    offset_high = wasm_v128_load(&offset_table[16]);
+
+    for (int y = height; y > 0; y -= 2) {
+        v128_t src_v, src_high;
+        v128_t v0, v1;
+
+        src_v = wasm_v128_load64_zero(src);
+        src += stride_src;
+        src_v = wasm_v128_load64_lane(src, src_v, 1);
+        src += stride_src;
+
+        v0 = wasm_u8x16_shr(src_v, 3);
+        v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16));
+        v0 = wasm_i8x16_swizzle(offset_low, v0);
+        v1 = wasm_i8x16_swizzle(offset_high, v1);
+        v0 = wasm_v128_or(v0, v1);
+        src_high = wasm_u16x8_extend_high_u8x16(src_v);
+        v1 = wasm_i16x8_extend_high_i8x16(v0);
+        src_v = wasm_u16x8_extend_low_u8x16(src_v);
+        v0 = wasm_i16x8_extend_low_i8x16(v0);
+
+        v0 = wasm_i16x8_add_sat(src_v, v0);
+        v1 = wasm_i16x8_add_sat(src_high, v1);
+        v0 = wasm_u8x16_narrow_i16x8(v0, v1);
+
+        wasm_v128_store64_lane(dst, v0, 0);
+        dst += stride_dst;
+        wasm_v128_store64_lane(dst, v0, 1);
+        dst += stride_dst;
+    }
+}
+
+void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride_dst,
+                                           ptrdiff_t stride_src,
+                                           const int16_t *sao_offset_val,
+                                           int sao_left_class, int width,
+                                           int height)
+{
+    int8_t offset_table[32] = {0};
+    v128_t offset_low, offset_high;
+
+    for (int k = 0; k < 4; k++)
+        offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1];
+
+    offset_low = wasm_v128_load(offset_table);
+    offset_high = wasm_v128_load(&offset_table[16]);
+
+    for (int y = height; y > 0; y--) {
+        for (int x = 0; x < width; x += 16) {
+            v128_t src_v, src_high;
+            v128_t v0, v1;
+
+            src_v = wasm_v128_load(&src[x]);
+
+            v0 = wasm_u8x16_shr(src_v, 3);
+            v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16));
+            v0 = wasm_i8x16_swizzle(offset_low, v0);
+            v1 = wasm_i8x16_swizzle(offset_high, v1);
+            v0 = wasm_v128_or(v0, v1);
+            src_high = wasm_u16x8_extend_high_u8x16(src_v);
+            v1 = wasm_i16x8_extend_high_i8x16(v0);
+            src_v = wasm_u16x8_extend_low_u8x16(src_v);
+            v0 = wasm_i16x8_extend_low_i8x16(v0);
+
+            v0 = wasm_i16x8_add_sat(src_v, v0);
+            v1 = wasm_i16x8_add_sat(src_high, v1);
+            v0 = wasm_u8x16_narrow_i16x8(v0, v1);
+            wasm_v128_store(&dst[x], v0);
+        }
+
+        dst += stride_dst;
+        src += stride_src;
+    }
+}
diff --git a/libavcodec/wasm/hevc/sao.h b/libavcodec/wasm/hevc/sao.h
new file mode 100644
index 0000000000..6119ec90f1
--- /dev/null
+++ b/libavcodec/wasm/hevc/sao.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2025 Zhao Zhili
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_WASM_HEVC_SAO_H
+#define AVCODEC_WASM_HEVC_SAO_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *_dst, const uint8_t *_src,
+                                           ptrdiff_t _stride_dst,
+                                           ptrdiff_t _stride_src,
+                                           const int16_t *sao_offset_val,
+                                           int sao_left_class, int width,
+                                           int height);
+
+void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *_dst, const uint8_t *_src,
+                                             ptrdiff_t _stride_dst,
+                                             ptrdiff_t _stride_src,
+                                             const int16_t *sao_offset_val,
+                                             int sao_left_class, int width,
+                                             int height);
+
+#endif
\ No newline at end of file
-- 
2.43.0



More information about the ffmpeg-devel mailing list