[FFmpeg-devel] [PATCH 1/4] lavc/aarch64: add hevc sao edge 16x16
Martin Storsjö
martin at martin.st
Tue Oct 19 11:38:45 EEST 2021
On Thu, 7 Oct 2021, J. Dekker wrote:
> --bench on AWS Graviton:
>
> hevc_sao_edge_16x16_8_c: 1857.0
> hevc_sao_edge_16x16_8_neon: 211.0
> hevc_sao_edge_32x32_8_c: 7802.2
> hevc_sao_edge_32x32_8_neon: 808.2
> hevc_sao_edge_48x48_8_c: 16764.2
> hevc_sao_edge_48x48_8_neon: 1796.5
> hevc_sao_edge_64x64_8_c: 32647.5
> hevc_sao_edge_64x64_8_neon: 3118.5
>
> Signed-off-by: J. Dekker <jdek at itanimul.li>
> ---
> libavcodec/aarch64/hevcdsp_init_aarch64.c | 8 ++-
> libavcodec/aarch64/hevcdsp_sao_neon.S | 66 +++++++++++++++++++++++
> 2 files changed, 72 insertions(+), 2 deletions(-)
>
> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> index c785e46f79..747ff0412d 100644
> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> @@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src,
> ptrdiff_t stride_dst, ptrdiff_t stride_src,
> int16_t *sao_offset_val, int sao_left_class,
> int width, int height);
> -
> -
> +void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
> + int16_t *sao_offset_val, int eo, int width, int height);
>
> av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
> {
> @@ -76,6 +76,10 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
> c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
> c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
> c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
> + c->sao_edge_filter[1] =
> + c->sao_edge_filter[2] =
> + c->sao_edge_filter[3] =
> + c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon;
> }
> if (bit_depth == 10) {
> c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
> diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
> index f9fed8345b..a7f054c075 100644
> --- a/libavcodec/aarch64/hevcdsp_sao_neon.S
> +++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
> @@ -85,3 +85,69 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
> bne 1b
> ret
> endfunc
> +
> +// ASSUMES STRIDE_SRC = 192
> +.Lsao_edge_pos:
> +.word 1 // horizontal
> +.word 192 // vertical
> +.word 192 + 1 // 45 degree
> +.word 192 - 1 // 135 degree
> +
> +// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst,
> +// int16 *sao_offset_val, int eo, int width, int height)
> +function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
> + lsl w4, w4, #2
Actually, the left indentation here is one char too little, compared with
the existing function here in the same file, and compared with other asm
sources. So instead of reindenting the old one, please indent the new one
according to all other existing asm instead.
// Martin
More information about the ffmpeg-devel
mailing list