[FFmpeg-devel] [RFC PATCH] avfilter/fastdeint: import simple cpu-optimized deinterlacing algorithms from VLC
Paul B Mahol
onemda at gmail.com
Mon Sep 9 23:38:14 EEST 2019
On 9/9/19, Aman Gupta <ffmpeg at tmm1.net> wrote:
> From: Aman Gupta <aman at tmm1.net>
>
> These are simple algorithms which can be run efficiently
> on low powered devices to produce deinteraced images.
>
> Signed-off-by: Aman Gupta <aman at tmm1.net>
> ---
> doc/filters.texi | 27 ++
> libavfilter/Makefile | 1 +
> libavfilter/aarch64/Makefile | 1 +
> libavfilter/aarch64/merge_neon.S | 98 ++++++
> libavfilter/allfilters.c | 1 +
> libavfilter/arm/Makefile | 3 +
> libavfilter/arm/merge_armv6.S | 70 ++++
> libavfilter/arm/merge_neon.S | 109 ++++++
> libavfilter/vf_fastdeint.c | 588 +++++++++++++++++++++++++++++++
> 9 files changed, 898 insertions(+)
> create mode 100644 libavfilter/aarch64/merge_neon.S
> create mode 100644 libavfilter/arm/Makefile
> create mode 100644 libavfilter/arm/merge_armv6.S
> create mode 100644 libavfilter/arm/merge_neon.S
> create mode 100644 libavfilter/vf_fastdeint.c
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 6c81e1da40..55d9adeb81 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5
>
> @end itemize
>
> + at section fastdeint
> +Fast deinterlacing algorithms.
> +
> + at table @option
> + at item mode
> +Deinterlacing algorithm to use.
> +
> +It accepts the following values:
> + at table @samp
> + at item discard
> +Discard bottom frame.
> +
> + at item mean
> +Half resolution blender.
> +
> + at item blend
> +Full resolution blender.
> +
> + at item bob
> +Bob doubler.
> +
> + at item linear
> +Bob doubler with linear interpolation.
> + at end table
> +
> + at end table
> +
> @section fftdnoiz
> Denoise frames using 3D FFT (frequency domain filtering).
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 3ef4191d9a..a2b3566ec0 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) +=
> vf_neighbor_opencl.o opencl.o \
> opencl/neighbor.o
> OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o
> OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o
> +OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o
> OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o
> OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o
> OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o
> diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
> index b58daa3a3f..2b0ad92893 100644
> --- a/libavfilter/aarch64/Makefile
> +++ b/libavfilter/aarch64/Makefile
> @@ -1,3 +1,4 @@
> OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o
>
> +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o
> NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o
> diff --git a/libavfilter/aarch64/merge_neon.S
> b/libavfilter/aarch64/merge_neon.S
> new file mode 100644
> index 0000000000..62377331a4
> --- /dev/null
> +++ b/libavfilter/aarch64/merge_neon.S
> @@ -0,0 +1,98 @@
> +/*
> + * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "libavutil/aarch64/asm.S"
> +
> +#define dest x0
> +#define src1 x1
> +#define src2 x2
> +#define size x3
> +
> + .align 2
> + // NOTE: Offset and pitch must be multiple of 16-bytes.
> +function ff_merge8_neon, export=1
> + ands x5, size, #~63
> + b.eq 2f
> + mov x10, #64
> + add x11, src1, #32
> + add x12, src2, #32
> +1:
> + ld1 {v0.16b,v1.16b}, [src1], x10
> + ld1 {v4.16b,v5.16b}, [src2], x10
> + ld1 {v2.16b,v3.16b}, [x11], x10
> + uhadd v0.16b, v0.16b, v4.16b
> + ld1 {v6.16b,v7.16b}, [x12], x10
> + subs x5, x5, #64
> + uhadd v1.16b, v1.16b, v5.16b
> + uhadd v2.16b, v2.16b, v6.16b
> + uhadd v3.16b, v3.16b, v7.16b
> + st1 {v0.16b,v1.16b}, [dest], #32
> + st1 {v2.16b,v3.16b}, [dest], #32
> + b.gt 1b
> +2:
> + tbz size, #5, 3f
> + ld1 {v0.16b,v1.16b}, [src1], #32
> + ld1 {v4.16b,v5.16b}, [src2], #32
> + uhadd v0.16b, v0.16b, v4.16b
> + uhadd v1.16b, v1.16b, v5.16b
> + st1 {v0.16b,v1.16b}, [dest], #32
> +3:
> + tbz size, #4, 4f
> + ld1 {v0.16b}, [src1]
> + ld1 {v4.16b}, [src2]
> + uhadd v0.16b, v0.16b, v4.16b
> + st1 {v0.16b}, [dest]
> +4:
> + ret
> +endfunc
> +
> + .align 2
> +function ff_merge16_neon, export=1
> + ands x5, size, #~63
> + b.eq 2f
> +1:
> + ld1 {v0.8h,v1.8h}, [src1], #32
> + ld1 {v4.8h,v5.8h}, [src2], #32
> + ld1 {v2.8h,v3.8h}, [src1], #32
> + uhadd v0.8h, v0.8h, v4.8h
> + ld1 {v6.8h,v7.8h}, [src2], #32
> + uhadd v1.8h, v1.8h, v5.8h
> + uhadd v2.8h, v2.8h, v6.8h
> + uhadd v3.8h, v3.8h, v7.8h
> + st1 {v0.8h,v1.8h}, [dest], #32
> + st1 {v2.8h,v3.8h}, [dest], #32
> + subs x5, x5, #64
> + b.gt 1b
> +2:
> + tbz size, #5, 3f
> + ld1 {v0.8h,v1.8h}, [src1], #32
> + ld1 {v4.8h,v5.8h}, [src2], #32
> + uhadd v0.8h, v0.8h, v4.8h
> + uhadd v1.8h, v1.8h, v5.8h
> + st1 {v0.8h,v1.8h}, [dest], #32
> +3:
> + tbz size, #4, 4f
> + ld1 {v0.8h}, [src1]
> + ld1 {v4.8h}, [src2]
> + uhadd v0.8h, v0.8h,v4.8h
> + st1 {v0.8h}, [dest]
> +4:
> + ret
> +endfunc
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index b675c688ee..6631af2ffe 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion;
> extern AVFilter ff_vf_erosion_opencl;
> extern AVFilter ff_vf_extractplanes;
> extern AVFilter ff_vf_fade;
> +extern AVFilter ff_vf_fastdeint;
> extern AVFilter ff_vf_fftdnoiz;
> extern AVFilter ff_vf_fftfilt;
> extern AVFilter ff_vf_field;
> diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile
> new file mode 100644
> index 0000000000..c92d62fac9
> --- /dev/null
> +++ b/libavfilter/arm/Makefile
> @@ -0,0 +1,3 @@
> +ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o
> +
> +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o
> diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S
> new file mode 100644
> index 0000000000..9b551c2c6c
> --- /dev/null
> +++ b/libavfilter/arm/merge_armv6.S
> @@ -0,0 +1,70 @@
> +/*
> + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "libavutil/arm/asm.S"
> +
> +#define dest r0
> +#define src1 r1
> +#define src2 r2
> +#define size r3
> +
> + .align 2
> +function ff_merge8_armv6, export=1
> + push {r4-r9,lr}
> +1:
> + pld [src1, #64]
> + ldm src1!, {r4-r5}
> + pld [src2, #64]
> + ldm src2!, {r8-r9}
> + subs size, size, #16
> + uhadd8 r4, r4, r8
> + ldm src1!, {r6-r7}
> + uhadd8 r5, r5, r9
> + ldm src2!, {ip,lr}
> + uhadd8 r6, r6, ip
> + stm dest!, {r4-r5}
> + uhadd8 r7, r7, lr
> + stm dest!, {r6-r7}
> + it eq
> + popeq {r4-r9,pc}
> + b 1b
> +endfunc
> +
> + .align 2
> +function ff_merge16_armv6, export=1
> + push {r4-r9,lr}
> +1:
> + pld [src1, #64]
> + ldm src1!, {r4-r5}
> + pld [src2, #64]
> + ldm src2!, {r8-r9}
> + subs size, size, #16
> + uhadd16 r4, r4, r8
> + ldm src1!, {r6-r7}
> + uhadd16 r5, r5, r9
> + ldm src2!, {ip,lr}
> + uhadd16 r6, r6, ip
> + stm dest!, {r4-r5}
> + uhadd16 r7, r7, lr
> + stm dest!, {r6-r7}
> + it eq
> + popeq {r4-r9,pc}
> + b 1b
> +endfunc
> \ No newline at end of file
> diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S
> new file mode 100644
> index 0000000000..ae36cf3ca9
> --- /dev/null
> +++ b/libavfilter/arm/merge_neon.S
> @@ -0,0 +1,109 @@
> +/*
> + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "libavutil/arm/asm.S"
> +
> +#define dest r0
> +#define src1 r1
> +#define src2 r2
> +#define size r3
> +
> + .align 2
> + @ NOTE: Offset and pitch must be multiple of 16-bytes.
> +function ff_merge8_neon, export=1
> + cmp size, #64
> + blo 2f
> +1:
> + pld [src1, #64]
> + vld1.u8 {q0-q1}, [src1,:128]!
> + pld [src2, #64]
> + vld1.u8 {q8-q9}, [src2,:128]!
> + vhadd.u8 q0, q0, q8
> + sub size, size, #64
> + vld1.u8 {q2-q3}, [src1,:128]!
> + vhadd.u8 q1, q1, q9
> + vld1.u8 {q10-q11}, [src2,:128]!
> + vhadd.u8 q2, q2, q10
> + cmp size, #64
> + vhadd.u8 q3, q3, q11
> + vst1.u8 {q0-q1}, [dest,:128]!
> + vst1.u8 {q2-q3}, [dest,:128]!
> + bhs 1b
> +2:
> + cmp size, #32
> + blo 3f
> + vld1.u8 {q0-q1}, [src1,:128]!
> + sub size, size, #32
> + vld1.u8 {q8-q9}, [src2,:128]!
> + vhadd.u8 q0, q0, q8
> + vhadd.u8 q1, q1, q9
> + vst1.u8 {q0-q1}, [dest,:128]!
> +3:
> + cmp size, #16
> + it lo
> + bxlo lr
> + vld1.u8 {q0}, [src1,:128]!
> + sub size, size, #16
> + vld1.u8 {q8}, [src2,:128]!
> + vhadd.u8 q0, q0, q8
> + vst1.u8 {q0}, [dest,:128]!
> + bx lr
> +endfunc
> +
> + .align 2
> +function ff_merge16_neon, export=1
> + cmp size, #64
> + blo 2f
> +1:
> + pld [src1, #64]
> + vld1.u16 {q0-q1}, [src1,:128]!
> + pld [src2, #64]
> + vld1.u16 {q8-q9}, [src2,:128]!
> + vhadd.u16 q0, q0, q8
> + sub size, size, #64
> + vld1.u16 {q2-q3}, [src1,:128]!
> + vhadd.u16 q1, q1, q9
> + vld1.u16 {q10-q11}, [src2,:128]!
> + vhadd.u16 q2, q2, q10
> + cmp size, #64
> + vhadd.u16 q3, q3, q11
> + vst1.u16 {q0-q1}, [dest,:128]!
> + vst1.u16 {q2-q3}, [dest,:128]!
> + bhs 1b
> +2:
> + cmp size, #32
> + blo 3f
> + vld1.u16 {q0-q1}, [src1,:128]!
> + sub size, size, #32
> + vld1.u16 {q8-q9}, [src2,:128]!
> + vhadd.u16 q0, q0, q8
> + vhadd.u16 q1, q1, q9
> + vst1.u16 {q0-q1}, [dest,:128]!
> +3:
> + cmp size, #16
> + it lo
> + bxlo lr
> + vld1.u16 {q0}, [src1,:128]!
> + sub size, size, #16
> + vld1.u16 {q8}, [src2,:128]!
> + vhadd.u16 q0, q0, q8
> + vst1.u16 {q0}, [dest,:128]!
> + bx lr
> +endfunc
> \ No newline at end of file
> diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c
> new file mode 100644
> index 0000000000..5ddd8be392
> --- /dev/null
> +++ b/libavfilter/vf_fastdeint.c
> @@ -0,0 +1,588 @@
> +/*
> + * Copyright (C) 2015 Aman Gupta <aman at tmm1.net>
> + * 2000-2011 VLC authors and VideoLAN
> + *
> + * Author: Sam Hocevar <sam at zoy.org>
> + * Damien Lucas <nitrox at videolan.org>
> + * Laurent Aimar <fenrir at videolan.org>
> + * Sigmund Augdal Helberg <sigmunau at videolan.org>
> + *
> + * These algorithms are derived from the VLC project's
> + * modules/video_filter/deinterlace/algo_basic.c
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/common.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/timestamp.h"
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +enum Mode {
> + MODE_DISCARD,
> + MODE_MEAN,
> + MODE_BLEND,
> + MODE_BOB,
> + MODE_LINEAR,
> + MODE_MAX,
> +};
> +
> +typedef void (*merge_fn)(void *dst, const void *src1, const void *src2,
> size_t len);
> +
> +typedef struct FastDeintContext {
> + const AVClass *class;
> + merge_fn merge;
> + int merge_size;
> + int merge_aligned;
> + AVFrame *cur, *next;
> + enum Mode mode;
> + int eof;
> +} FastDeintContext;
> +
> +static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t
> *src2, size_t bytes)
> +{
> + for (; bytes > 0; bytes--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +}
> +
> +static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t
> *src2, size_t bytes)
> +{
> + for (size_t words = bytes / 2; words > 0; words--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +}
> +
> +static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const
> uint8_t *src1, const uint8_t *src2, size_t bytes)
> +{
> + if (s->merge_aligned) {
> + size_t remainder = bytes % 16;
> + if (remainder > 0) {
> + merge8_c(dst, src1, src2, remainder);
> + bytes -= remainder;
> + dst += remainder;
> + src1 += remainder;
> + src2 += remainder;
> + }
> + }
> + s->merge(dst, src1, src2, bytes);
> +}
> +
> +static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const
> uint16_t *src1, const uint16_t *src2, size_t bytes)
> +{
> + if (s->merge_aligned) {
> + size_t words = bytes / 2;
> + size_t remainder = words % 8;
> + if (remainder > 0) {
> + merge16_c(dst, src1, src2, remainder);
> + words -= remainder;
> + dst += remainder;
> + src1 += remainder;
> + src2 += remainder;
> + }
> + }
> + s->merge(dst, src1, src2, bytes);
> +}
> +
> +static void merge_unaligned(FastDeintContext *s, void *dst, const void
> *src1, const void *src2, size_t bytes)
> +{
> + if (s->merge_size == 16)
> + merge16_unaligned(s, dst, src1, src2, bytes);
> + else
> + merge8_unaligned(s, dst, src1, src2, bytes);
> +}
> +
> +#if HAVE_SSE2_INLINE && defined(__x86_64__)
> +static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t
> *src2, size_t bytes)
> +{
> + for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +
> + for (; bytes >= 16; bytes -= 16) {
> + __asm__ __volatile__( "movdqu %2,%%xmm1;"
> + "pavgb %1, %%xmm1;"
> + "movdqu %%xmm1, %0" :"=m" (*dst):
> + "m" (*src1),
> + "m" (*src2) : "xmm1" );
> + dst += 16;
> + src1 += 16;
> + src2 += 16;
> + }
> +
> + if (bytes > 0) {
> + merge8_c(dst, src1, src2, bytes);
> + }
> +}
> +static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const
> uint16_t *src2, size_t bytes)
> +{
> + size_t words = bytes / 2;
> +
> + for(; words > 0 && ((uintptr_t)src1 & 15); words--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +
> + for (; words >= 8; words -= 8) {
> + __asm__ __volatile__( "movdqu %2,%%xmm1;"
> + "pavgw %1, %%xmm1;"
> + "movdqu %%xmm1, %0" :"=m" (*dst):
> + "m" (*src1),
> + "m" (*src2) : "xmm1" );
> + dst += 8;
> + src1 += 8;
> + src2 += 8;
> + }
> +
Unacceptable code. Inline assembly is forbidden.
> + if (words > 0) {
> + merge16_c(dst, src1, src2, words * 2);
> + }
> +}
> +#define merge8 merge8_sse2
> +#define merge16 merge16_sse2
> +#else
> +#define merge8 merge8_c
> +#define merge16 merge16_c
> +#endif
> +
> +static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame
> *frame)
> +{
> + int i, planes_nb = 0;
> + enum Mode mode = s->mode;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
> +
> + for (i = 0; i < desc->nb_components; i++)
> + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
> +
> + for (i = 0; i < planes_nb; i++) {
> + int height, bwidth;
> + int dst_linesize, src_linesize;
> + const uint8_t *src;
> + uint8_t *dst;
> +
> + bwidth = av_image_get_linesize(out->format, out->width, i);
> + if (bwidth < 0) {
> + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n");
> + return;
> + }
> +
> + height = out->height;
> + if (i == 1 || i == 2) {
> + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h);
> + }
> +
> + src = frame->data[i];
> + dst = out->data[i];
> + dst_linesize = out->linesize[i];
> + src_linesize = frame->linesize[i];
> +
> + if (mode == MODE_BLEND) {
> + // Copy first line
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> + height--;
> + }
> +
> + // Merge remaining lines
> + for (; height > 0; height--) {
> + if (mode == MODE_DISCARD)
> + memcpy(dst, src, bwidth);
> + else
> + merge_unaligned(s, dst, src, src + src_linesize, bwidth);
> + dst += dst_linesize;
> + src += src_linesize;
> + if (mode == MODE_MEAN || mode == MODE_DISCARD) {
> + src += src_linesize;
> + height--;
> + }
> + }
> + }
> + if (mode != MODE_DISCARD)
> + emms_c();
> +}
> +
> +static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame
> *frame, int field)
> +{
> + int i, planes_nb = 0;
> + enum Mode mode = s->mode;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
> +
> + for (i = 0; i < desc->nb_components; i++)
> + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
> +
> + for (i = 0; i < planes_nb; i++) {
> + int height, bwidth;
> + int dst_linesize, src_linesize;
> + const uint8_t *src;
> + uint8_t *dst;
> +
> + bwidth = av_image_get_linesize(out->format, out->width, i);
> + if (bwidth < 0) {
> + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n");
> + return;
> + }
> + height = out->height;
> + if (i == 1 || i == 2) {
> + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h);
> + }
> +
> + src = frame->data[i];
> + dst = out->data[i];
> + src_linesize = frame->linesize[i];
> + dst_linesize = out->linesize[i];
> +
> + // For BOTTOM field we need to add the first line
> + if (field == 1) {
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> + src += src_linesize;
> + height--;
> + }
> +
> + height -= 2;
> +
> + for (; height > 0; height-=2) {
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> +
> + if (mode == MODE_LINEAR)
> + merge_unaligned(s, dst, src, src + 2 * src_linesize,
> bwidth);
> + else
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> +
> + src += src_linesize * 2;
> + }
> +
> + memcpy(dst, src, bwidth);
> +
> + // For TOP field we need to add the last line
> + if (field == 0)
> + {
> + dst += dst_linesize;
> + src += src_linesize;
> + memcpy(dst, src, bwidth);
> + }
> + }
> + if (mode == MODE_LINEAR)
> + emms_c();
> +}
> +
> +static int filter_frame_single(AVFilterLink *link, AVFrame *frame)
> +{
> + AVFilterContext *ctx = link->dst;
> + AVFrame *out;
> + FastDeintContext *s = ctx->priv;
> +
> + if (!frame->interlaced_frame) {
> + return ff_filter_frame(ctx->outputs[0], frame);
> + }
> +
> + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
> + if (!out) {
> + av_frame_free(&frame);
> + return AVERROR(ENOMEM);
> + }
> +
> + av_frame_copy_props(out, frame);
> + out->interlaced_frame = 0;
> + render_image_single(s, out, frame);
> +
> + av_frame_free(&frame);
> + return ff_filter_frame(ctx->outputs[0], out);
> +}
> +
> +static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame)
> +{
> + AVFilterContext *ctx = link->dst;
> + AVFrame *out;
> +
> + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX)
> + out = av_frame_alloc();
> + else
> + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
> +
> + if (!out)
> + return NULL;
> +
> + av_frame_copy_props(out, frame);
> + return out;
> +}
> +
> +static int filter_frame_double(AVFilterLink *link, AVFrame *in)
> +{
> + AVFilterContext *ctx = link->dst;
> + FastDeintContext *s = ctx->priv;
> + AVFrame *frame, *out, *out2;
> + int tff, ret;
> +
> + s->cur = s->next;
> + s->next = in;
> +
> + if (!s->cur) {
> + return 0;
> + }
> +
> + frame = s->cur;
> +
> + if (!frame->interlaced_frame) {
> + if (frame->pts != AV_NOPTS_VALUE)
> + frame->pts *= 2;
> + s->cur = NULL;
> + return ff_filter_frame(ctx->outputs[0], frame);
> + }
> +
> + tff = frame->top_field_first;
> + out = copy_frame(link, frame);
> + if (!out) {
> + av_frame_free(&frame);
> + s->cur = NULL;
> + return AVERROR(ENOMEM);
> + }
> +
> + out->interlaced_frame = 0;
> + if (out->pts != AV_NOPTS_VALUE)
> + out->pts = out->pts * 2;
> + render_image_doubler(s, out, frame, !tff);
> +
> + ret = ff_filter_frame(ctx->outputs[0], out);
> + if (ret < 0) {
> + av_frame_free(&frame);
> + s->cur = NULL;
> + return ret;
> + }
> +
> + out2 = copy_frame(link, frame);
> + if (!out2) {
> + av_frame_free(&frame);
> + s->cur = NULL;
> + return AVERROR(ENOMEM);
> + }
> +
> + out2->interlaced_frame = 0;
> + av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC);
> + if (out2->pts != AV_NOPTS_VALUE) {
> + out2->pts = frame->pts + s->next->pts;
> + }
> + render_image_doubler(s, out2, frame, tff);
> +
> + av_frame_free(&frame);
> + s->cur = NULL;
> +
> + return ff_filter_frame(ctx->outputs[0], out2);
> +}
> +
> +static int filter_frame(AVFilterLink *link, AVFrame *frame)
> +{
> + AVFilterContext *ctx = link->dst;
> + FastDeintContext *s = ctx->priv;
> +
> + av_assert0(frame);
> +
> + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) {
> + return filter_frame_double(link, frame);
> + } else {
> + return filter_frame_single(link, frame);
> + }
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + FastDeintContext *s = ctx->priv;
> + av_frame_free(&s->cur);
> + av_frame_free(&s->next);
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> + static const enum AVPixelFormat pix_fmts[] = {
> + AV_PIX_FMT_YUV420P,
> + AV_PIX_FMT_YUV422P,
> + AV_PIX_FMT_YUV444P,
> + AV_PIX_FMT_YUV410P,
> + AV_PIX_FMT_YUV411P,
> + AV_PIX_FMT_GRAY8,
> + AV_PIX_FMT_YUVJ420P,
> + AV_PIX_FMT_YUVJ422P,
> + AV_PIX_FMT_YUVJ444P,
> + AV_PIX_FMT_GRAY16,
> + AV_PIX_FMT_YUV440P,
> + AV_PIX_FMT_YUVJ440P,
> + AV_PIX_FMT_YUV420P9,
> + AV_PIX_FMT_YUV422P9,
> + AV_PIX_FMT_YUV444P9,
> + AV_PIX_FMT_YUV420P10,
> + AV_PIX_FMT_YUV422P10,
> + AV_PIX_FMT_YUV444P10,
> + AV_PIX_FMT_YUV420P12,
> + AV_PIX_FMT_YUV422P12,
> + AV_PIX_FMT_YUV444P12,
> + AV_PIX_FMT_YUV420P14,
> + AV_PIX_FMT_YUV422P14,
> + AV_PIX_FMT_YUV444P14,
> + AV_PIX_FMT_YUV420P16,
> + AV_PIX_FMT_YUV422P16,
> + AV_PIX_FMT_YUV444P16,
> + AV_PIX_FMT_YUVA420P,
> + AV_PIX_FMT_YUVA422P,
> + AV_PIX_FMT_YUVA444P,
> + AV_PIX_FMT_GBRP,
> + AV_PIX_FMT_GBRP9,
> + AV_PIX_FMT_GBRP10,
> + AV_PIX_FMT_GBRP12,
> + AV_PIX_FMT_GBRP14,
> + AV_PIX_FMT_GBRP16,
> + AV_PIX_FMT_GBRAP,
> + AV_PIX_FMT_NONE
> + };
Group this ones on less lines somehow.
> +
> + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
> + if (!fmts_list)
> + return AVERROR(ENOMEM);
> + return ff_set_common_formats(ctx, fmts_list);
> +}
> +
> +#if ARCH_ARM
> +#include "libavutil/arm/cpu.h"
> +#endif
> +#if ARCH_AARCH64
> +#include "libavutil/aarch64/cpu.h"
> +#endif
> +#if ARCH_AARCH64 || ARCH_ARM
> +void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
> size_t bytes);
> +void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t
> *src2, size_t bytes);
> +void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t
> *src2, size_t bytes);
> +void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t
> *src2, size_t bytes);
> +#endif
> +
I do not like this style. Look what other filters do, like one that
adds x86 SIMD.
> +static int config_props(AVFilterLink *link)
> +{
> + AVFilterContext *ctx = link->src;
> + FastDeintContext *s = ctx->priv;
> + const AVPixFmtDescriptor *pix;
> +#if ARCH_AARCH64 || ARCH_ARM
> + int cpu_flags = av_get_cpu_flags();
> +#endif
This belongs in separate directory and file. See aarch64 directory
> +
> + link->w = link->src->inputs[0]->w;
> + link->h = link->src->inputs[0]->h;
> + link->time_base = link->src->inputs[0]->time_base;
> + link->frame_rate = link->src->inputs[0]->frame_rate;
> + link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio;
> +
> + if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) {
> + link->h /= 2;
> + link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio,
> av_make_q(1, 2));
> + }
> + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) {
> + link->time_base = av_mul_q(link->time_base, av_make_q(1, 2));
> + link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1));
> + }
> +
> + pix = av_pix_fmt_desc_get(link->format);
> + s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8;
> + s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8;
> +
> +#if ARCH_ARM
> + if (have_armv6(cpu_flags)) {
> + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 :
> (merge_fn)ff_merge8_armv6;
> + s->merge_aligned = 1;
> + }
> +#endif
> +#if ARCH_AARCH64 || ARCH_ARM
> + if (have_neon(cpu_flags)) {
> + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon :
> (merge_fn)ff_merge8_neon;
> + s->merge_aligned = 1;
> + }
> +#endif
> +
> + return 0;
> +}
> +
> +static int request_frame(AVFilterLink *link)
> +{
> + AVFilterContext *ctx = link->src;
> + FastDeintContext *s = ctx->priv;
> + int ret;
> +
> + if (s->eof)
> + return AVERROR_EOF;
> +
> + ret = ff_request_frame(ctx->inputs[0]);
> +
> + if (ret == AVERROR_EOF && s->cur) {
> + AVFrame *next = av_frame_clone(s->next);
> + if (!next)
> + return AVERROR(ENOMEM);
> +
> + next->pts = s->next->pts * 2 - s->cur->pts;
> + filter_frame(ctx->inputs[0], next);
> + s->eof = 1;
> + } else if (ret < 0) {
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +#define OFFSET(x) offsetof(FastDeintContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST,
> {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
> +
> +static const AVOption fastdeint_options[] = {
> + { "mode", "specify the deinterlacing mode", OFFSET(mode),
> AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" },
> + CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"),
> + CONST("mean", "half resolution blender", MODE_MEAN, "mode"),
> + CONST("blend", "full resolution blender", MODE_BLEND, "mode"),
> + CONST("bob", "bob doubler", MODE_BOB, "mode"),
> + CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR,
> "mode"),
> +
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(fastdeint);
> +
> +static const AVFilterPad fastdeint_inputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .filter_frame = filter_frame,
> + },
> + { NULL }
> +};
> +
> +static const AVFilterPad fastdeint_outputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = config_props,
> + .request_frame = request_frame
> + },
> + { NULL }
> +};
> +
> +AVFilter ff_vf_fastdeint = {
> + .name = "fastdeint",
> + .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"),
First letter should be capitalized.
> + .priv_size = sizeof(FastDeintContext),
> + .priv_class = &fastdeint_class,
> + .uninit = uninit,
> + .query_formats = query_formats,
> + .inputs = fastdeint_inputs,
> + .outputs = fastdeint_outputs,
> +};
> --
> 2.20.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list