[FFmpeg-devel] [PATCH 2/2] jpeg200dec: add ff_rct_int_{sse2, avx2}
Paul B Mahol
onemda at gmail.com
Sat Jun 13 20:26:00 CEST 2015
Dana 13. 6. 2015. 18:28 osoba "James Almer" <jamrial at gmail.com> napisala je:
>
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> Only sample i could find using reversible wavelet transform is
http://www.fnordware.com/j2k/relax.jp2
>
> libavcodec/jpeg2000.c | 1 +
> libavcodec/x86/jpeg2000dsp.asm | 36
++++++++++++++++++++++++++++++++++++
> libavcodec/x86/jpeg2000dsp_init.c | 10 ++++++++++
> 3 files changed, 47 insertions(+)
>
> diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
> index af24e99..ec00ebc 100644
> --- a/libavcodec/jpeg2000.c
> +++ b/libavcodec/jpeg2000.c
> @@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component
*comp,
> if (!comp->f_data)
> return AVERROR(ENOMEM);
> } else {
> + csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
> comp->f_data = NULL;
> comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
> if (!comp->i_data)
> diff --git a/libavcodec/x86/jpeg2000dsp.asm
b/libavcodec/x86/jpeg2000dsp.asm
> index 0d79ab7..712a298 100644
> --- a/libavcodec/x86/jpeg2000dsp.asm
> +++ b/libavcodec/x86/jpeg2000dsp.asm
> @@ -106,3 +106,39 @@ INIT_XMM sse
> ICT_FLOAT 10
> INIT_YMM avx
> ICT_FLOAT 9
> +
>
+;***************************************************************************
> +; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int
csize)
>
+;***************************************************************************
> +%macro RCT_INT 0
> +cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
> + shl csized, 2
> + add src0q, csizeq
> + add src1q, csizeq
> + add src2q, csizeq
> + neg csizeq
> +
> +align 16
> +.loop:
> + mova m1, [src1q+csizeq]
> + mova m2, [src2q+csizeq]
> + mova m0, [src0q+csizeq]
> + paddd m3, m1, m2
> + psrad m3, 2
> + psubd m0, m3
> + paddd m1, m0
> + paddd m2, m0
> + mova [src1q+csizeq], m0
> + mova [src2q+csizeq], m1
> + mova [src0q+csizeq], m2
> + add csizeq, mmsize
> + jl .loop
> + REP_RET
> +%endmacro
> +
> +INIT_XMM sse2
> +RCT_INT
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +RCT_INT
> +%endif
> diff --git a/libavcodec/x86/jpeg2000dsp_init.c
b/libavcodec/x86/jpeg2000dsp_init.c
> index 43b9ccd..0dbd2db 100644
> --- a/libavcodec/x86/jpeg2000dsp_init.c
> +++ b/libavcodec/x86/jpeg2000dsp_init.c
> @@ -26,6 +26,8 @@
>
> void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
> void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
> +void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
> +void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
>
> av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
> {
> @@ -34,7 +36,15 @@ av_cold void
ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
> c->mct_decode[FF_DWT97] = ff_ict_float_sse;
> }
>
> + if (EXTERNAL_SSE2(cpu_flags)) {
> + c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
> + }
> +
> if (EXTERNAL_AVX_FAST(cpu_flags)) {
> c->mct_decode[FF_DWT97] = ff_ict_float_avx;
> }
> +
> + if (EXTERNAL_AVX2(cpu_flags)) {
> + c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
> + }
> }
> --
> 2.4.3
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
How much is this faster?
More information about the ffmpeg-devel
mailing list