[FFmpeg-devel] [PATCH v2_2 6/6] lavc/vvc_mc R-V V sad

flow gg hlefthleft at gmail.com
Tue Dec 17 11:50:26 EET 2024


> Don't clobber v8 here.
> Use vsub.vv here to avoid the sequential dependency.

Thanks, I will update later

> Are you sure this does not require tail-undisturbed mode? I think you're
> setting tail-agnostic mode up.

I’m not sure if I understood correctly.
My understanding is that tail-undisturbed is used because the largest lmul
is applied, so it is necessary to prevent the tail values from being
incorrectly accumulated.
However, in this case, vset uses the smallest lmul, so this issue will not
occur.

Rémi Denis-Courmont <remi at remlab.net> 于2024年12月17日周二 15:07写道:

> Le sunnuntaina 15. joulukuuta 2024, 17.56.34 EET
> uk7b-at-foxmail.com at ffmpeg.org
> a écrit :
> > From: sunyuechi <sunyuechi at iscas.ac.cn>
> >
> >                             k230               banana_f3
> > sad_8x16_c:                 387.7 ( 1.00x)    394.9 ( 1.00x)
> > sad_8x16_rvv_i32:           109.7 ( 3.53x)    103.5 ( 3.82x)
> > sad_16x8_c:                 378.2 ( 1.00x)    384.7 ( 1.00x)
> > sad_16x8_rvv_i32:            82.0 ( 4.61x)    61.7 ( 6.24x)
> > sad_16x16_c:                748.7 ( 1.00x)    759.7 ( 1.00x)
> > sad_16x16_rvv_i32:          128.5 ( 5.83x)    113.7 ( 6.68x)
> > ---
> >  libavcodec/riscv/vvc/Makefile      |  3 +-
> >  libavcodec/riscv/vvc/vvc_sad_rvv.S | 61 ++++++++++++++++++++++++++++++
> >  libavcodec/riscv/vvc/vvcdsp_init.c |  7 ++++
> >  3 files changed, 70 insertions(+), 1 deletion(-)
> >  create mode 100644 libavcodec/riscv/vvc/vvc_sad_rvv.S
> >
> > diff --git a/libavcodec/riscv/vvc/Makefile
> b/libavcodec/riscv/vvc/Makefile
> > index 582b051579..6b9c618b33 100644
> > --- a/libavcodec/riscv/vvc/Makefile
> > +++ b/libavcodec/riscv/vvc/Makefile
> > @@ -1,2 +1,3 @@
> >  OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o
> > -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o
> > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \
> > +                                  riscv/vvc/vvc_sad_rvv.o
> > diff --git a/libavcodec/riscv/vvc/vvc_sad_rvv.S
> > b/libavcodec/riscv/vvc/vvc_sad_rvv.S new file mode 100644
> > index 0000000000..341167be1f
> > --- /dev/null
> > +++ b/libavcodec/riscv/vvc/vvc_sad_rvv.S
> > @@ -0,0 +1,61 @@
> > +/*
> > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> > (ISCAS). + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA + */
> > +
> > +#include "libavcodec/riscv/h26x/asm.S"
> > +
> > +.macro func_sad vlen
> > +func ff_vvc_sad_rvv_\vlen, zve32x, zbb, zba
> > +        lpad    0
> > +        slli              t2, a3, 7  // dy * 128
> > +        li                t1, 4*128+4
> > +        add               t3, t2, a2 // dy * 128 + dx
> > +        sub               t1, t1, t2
> > +        sub               t1, t1, a2
> > +        sh1add            a0, t3, a0
> > +        sh1add            a1, t1, a1
> > +        li                t3, 16
> > +        beq               a4, t3, SADVSET\vlen\()16
> > +        .irp w,8,16
> > +SADVSET\vlen\w:
> > +        vsetvlstatic32    \w, \vlen
> > +        vmv.v.i           v0, 0
> > +        vmv.s.x           v24, zero
> > +        vsetvlstatic16    \w, \vlen
> > +SAD\vlen\w:
> > +        addi              a5, a5, -2
> > +        vle16.v           v8, (a0)
> > +        vle16.v           v16, (a1)
> > +        vsub.vv           v8, v8, v16
>
> Don't clobber v8 here.
>
> > +        vneg.v            v16, v8
>
> Use vsub.vv here to avoid the sequential dependency.
>
> > +        addi              a0, a0, 2 * 128 * 2
> > +        vmax.vv           v8, v8, v16
> > +        vwaddu.wv         v0, v0, v8
> > +        addi              a1, a1, 2 * 128 * 2
> > +        bnez              a5, SAD\vlen\w
> > +        vsetvlstatic32    \w, \vlen
> > +        vredsum.vs        v24, v0, v24
>
> Are you sure this does not require tail-undisturbed mode? I think you're
> setting tail-agnostic mode up.
>
> > +        vmv.x.s           a0, v24
> > +        ret
> > +        .endr
> > +endfunc
> > +.endm
> > +
> > +func_sad 256
> > +func_sad 128
> > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c
> > b/libavcodec/riscv/vvc/vvcdsp_init.c index 2fe93029aa..1b228cc9f5 100644
> > --- a/libavcodec/riscv/vvc/vvcdsp_init.c
> > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c
> > @@ -59,6 +59,9 @@ DMVR_PROTOTYPES(8, rvv_256)
> >      c->inter.dmvr[1][1]   = ff_vvc_dmvr_hv_##bd##_##opt;           \
> >  } while (0)
> >
> > +int ff_vvc_sad_rvv_128(const int16_t *src0, const int16_t *src1, int dx,
> > int dy, int block_w, int block_h); +int ff_vvc_sad_rvv_256(const int16_t
> > *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); +
> >  #define PUT_PIXELS_PROTOTYPES2(bd, opt)
>
> >      \ void bf(ff_vvc_put_pixels, bd, opt)(int16_t *dst,
>
> >             \ const uint8_t *_src, const ptrdiff_t _src_stride,
>
> >                \ @@ -97,6 +100,8 @@ void
> > ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) FUNCS(LUMA,
> > rvv_256);
> >                  FUNCS(CHROMA, rvv_256);
> >                  break;
> > +            case 10:
> > +                c->inter.sad      = ff_vvc_sad_rvv_256;
> >              default:
> >                  break;
> >          }
> > @@ -111,6 +116,8 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c,
> const
> > int bd) FUNCS(LUMA, rvv_128);
> >                  FUNCS(CHROMA, rvv_128);
> >                  break;
> > +            case 10:
> > +                c->inter.sad      = ff_vvc_sad_rvv_128;
> >              default:
> >                  break;
> >          }
>
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>


More information about the ffmpeg-devel mailing list