[FFmpeg-devel] [RFC PATCH] avfilter/fastdeint: import simple cpu-optimized deinterlacing algorithms from VLC
James Almer
jamrial at gmail.com
Mon Sep 9 23:40:33 EEST 2019
On 9/9/2019 5:12 PM, Aman Gupta wrote:
> From: Aman Gupta <aman at tmm1.net>
>
> These are simple algorithms which can be run efficiently
> on low powered devices to produce deinteraced images.
>
> Signed-off-by: Aman Gupta <aman at tmm1.net>
> ---
> doc/filters.texi | 27 ++
> libavfilter/Makefile | 1 +
> libavfilter/aarch64/Makefile | 1 +
> libavfilter/aarch64/merge_neon.S | 98 ++++++
> libavfilter/allfilters.c | 1 +
> libavfilter/arm/Makefile | 3 +
> libavfilter/arm/merge_armv6.S | 70 ++++
> libavfilter/arm/merge_neon.S | 109 ++++++
> libavfilter/vf_fastdeint.c | 588 +++++++++++++++++++++++++++++++
> 9 files changed, 898 insertions(+)
> create mode 100644 libavfilter/aarch64/merge_neon.S
> create mode 100644 libavfilter/arm/Makefile
> create mode 100644 libavfilter/arm/merge_armv6.S
> create mode 100644 libavfilter/arm/merge_neon.S
> create mode 100644 libavfilter/vf_fastdeint.c
Asm stuff should be in a separate entry.
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 6c81e1da40..55d9adeb81 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5
>
> @end itemize
>
> + at section fastdeint
> +Fast deinterlacing algorithms.
> +
> + at table @option
> + at item mode
> +Deinterlacing algorithm to use.
> +
> +It accepts the following values:
> + at table @samp
> + at item discard
> +Discard bottom frame.
> +
> + at item mean
> +Half resolution blender.
> +
> + at item blend
> +Full resolution blender.
> +
> + at item bob
> +Bob doubler.
> +
> + at item linear
> +Bob doubler with linear interpolation.
> + at end table
> +
> + at end table
> +
> @section fftdnoiz
> Denoise frames using 3D FFT (frequency domain filtering).
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 3ef4191d9a..a2b3566ec0 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) += vf_neighbor_opencl.o opencl.o \
> opencl/neighbor.o
> OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o
> OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o
> +OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o
> OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o
> OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o
> OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o
> diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
> index b58daa3a3f..2b0ad92893 100644
> --- a/libavfilter/aarch64/Makefile
> +++ b/libavfilter/aarch64/Makefile
> @@ -1,3 +1,4 @@
> OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o
>
> +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o
> NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o
> diff --git a/libavfilter/aarch64/merge_neon.S b/libavfilter/aarch64/merge_neon.S
> new file mode 100644
> index 0000000000..62377331a4
> --- /dev/null
> +++ b/libavfilter/aarch64/merge_neon.S
> @@ -0,0 +1,98 @@
> +/*
> + * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/aarch64/asm.S"
> +
> +#define dest x0
> +#define src1 x1
> +#define src2 x2
> +#define size x3
> +
> + .align 2
> + // NOTE: Offset and pitch must be multiple of 16-bytes.
> +function ff_merge8_neon, export=1
> + ands x5, size, #~63
> + b.eq 2f
> + mov x10, #64
> + add x11, src1, #32
> + add x12, src2, #32
> +1:
> + ld1 {v0.16b,v1.16b}, [src1], x10
> + ld1 {v4.16b,v5.16b}, [src2], x10
> + ld1 {v2.16b,v3.16b}, [x11], x10
> + uhadd v0.16b, v0.16b, v4.16b
> + ld1 {v6.16b,v7.16b}, [x12], x10
> + subs x5, x5, #64
> + uhadd v1.16b, v1.16b, v5.16b
> + uhadd v2.16b, v2.16b, v6.16b
> + uhadd v3.16b, v3.16b, v7.16b
> + st1 {v0.16b,v1.16b}, [dest], #32
> + st1 {v2.16b,v3.16b}, [dest], #32
> + b.gt 1b
> +2:
> + tbz size, #5, 3f
> + ld1 {v0.16b,v1.16b}, [src1], #32
> + ld1 {v4.16b,v5.16b}, [src2], #32
> + uhadd v0.16b, v0.16b, v4.16b
> + uhadd v1.16b, v1.16b, v5.16b
> + st1 {v0.16b,v1.16b}, [dest], #32
> +3:
> + tbz size, #4, 4f
> + ld1 {v0.16b}, [src1]
> + ld1 {v4.16b}, [src2]
> + uhadd v0.16b, v0.16b, v4.16b
> + st1 {v0.16b}, [dest]
> +4:
> + ret
> +endfunc
> +
> + .align 2
> +function ff_merge16_neon, export=1
> + ands x5, size, #~63
> + b.eq 2f
> +1:
> + ld1 {v0.8h,v1.8h}, [src1], #32
> + ld1 {v4.8h,v5.8h}, [src2], #32
> + ld1 {v2.8h,v3.8h}, [src1], #32
> + uhadd v0.8h, v0.8h, v4.8h
> + ld1 {v6.8h,v7.8h}, [src2], #32
> + uhadd v1.8h, v1.8h, v5.8h
> + uhadd v2.8h, v2.8h, v6.8h
> + uhadd v3.8h, v3.8h, v7.8h
> + st1 {v0.8h,v1.8h}, [dest], #32
> + st1 {v2.8h,v3.8h}, [dest], #32
> + subs x5, x5, #64
> + b.gt 1b
> +2:
> + tbz size, #5, 3f
> + ld1 {v0.8h,v1.8h}, [src1], #32
> + ld1 {v4.8h,v5.8h}, [src2], #32
> + uhadd v0.8h, v0.8h, v4.8h
> + uhadd v1.8h, v1.8h, v5.8h
> + st1 {v0.8h,v1.8h}, [dest], #32
> +3:
> + tbz size, #4, 4f
> + ld1 {v0.8h}, [src1]
> + ld1 {v4.8h}, [src2]
> + uhadd v0.8h, v0.8h,v4.8h
> + st1 {v0.8h}, [dest]
> +4:
> + ret
> +endfunc
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index b675c688ee..6631af2ffe 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion;
> extern AVFilter ff_vf_erosion_opencl;
> extern AVFilter ff_vf_extractplanes;
> extern AVFilter ff_vf_fade;
> +extern AVFilter ff_vf_fastdeint;
> extern AVFilter ff_vf_fftdnoiz;
> extern AVFilter ff_vf_fftfilt;
> extern AVFilter ff_vf_field;
> diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile
> new file mode 100644
> index 0000000000..c92d62fac9
> --- /dev/null
> +++ b/libavfilter/arm/Makefile
> @@ -0,0 +1,3 @@
> +ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o
> +
> +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o
> diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S
> new file mode 100644
> index 0000000000..9b551c2c6c
> --- /dev/null
> +++ b/libavfilter/arm/merge_armv6.S
> @@ -0,0 +1,70 @@
> +/*
> + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/arm/asm.S"
> +
> +#define dest r0
> +#define src1 r1
> +#define src2 r2
> +#define size r3
> +
> + .align 2
> +function ff_merge8_armv6, export=1
> + push {r4-r9,lr}
> +1:
> + pld [src1, #64]
> + ldm src1!, {r4-r5}
> + pld [src2, #64]
> + ldm src2!, {r8-r9}
> + subs size, size, #16
> + uhadd8 r4, r4, r8
> + ldm src1!, {r6-r7}
> + uhadd8 r5, r5, r9
> + ldm src2!, {ip,lr}
> + uhadd8 r6, r6, ip
> + stm dest!, {r4-r5}
> + uhadd8 r7, r7, lr
> + stm dest!, {r6-r7}
> + it eq
> + popeq {r4-r9,pc}
> + b 1b
> +endfunc
> +
> + .align 2
> +function ff_merge16_armv6, export=1
> + push {r4-r9,lr}
> +1:
> + pld [src1, #64]
> + ldm src1!, {r4-r5}
> + pld [src2, #64]
> + ldm src2!, {r8-r9}
> + subs size, size, #16
> + uhadd16 r4, r4, r8
> + ldm src1!, {r6-r7}
> + uhadd16 r5, r5, r9
> + ldm src2!, {ip,lr}
> + uhadd16 r6, r6, ip
> + stm dest!, {r4-r5}
> + uhadd16 r7, r7, lr
> + stm dest!, {r6-r7}
> + it eq
> + popeq {r4-r9,pc}
> + b 1b
> +endfunc
> \ No newline at end of file
This shouldn't happen.
> diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S
> new file mode 100644
> index 0000000000..ae36cf3ca9
> --- /dev/null
> +++ b/libavfilter/arm/merge_neon.S
> @@ -0,0 +1,109 @@
> +/*
> + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/arm/asm.S"
> +
> +#define dest r0
> +#define src1 r1
> +#define src2 r2
> +#define size r3
> +
> + .align 2
> + @ NOTE: Offset and pitch must be multiple of 16-bytes.
> +function ff_merge8_neon, export=1
> + cmp size, #64
> + blo 2f
> +1:
> + pld [src1, #64]
> + vld1.u8 {q0-q1}, [src1,:128]!
> + pld [src2, #64]
> + vld1.u8 {q8-q9}, [src2,:128]!
> + vhadd.u8 q0, q0, q8
> + sub size, size, #64
> + vld1.u8 {q2-q3}, [src1,:128]!
> + vhadd.u8 q1, q1, q9
> + vld1.u8 {q10-q11}, [src2,:128]!
> + vhadd.u8 q2, q2, q10
> + cmp size, #64
> + vhadd.u8 q3, q3, q11
> + vst1.u8 {q0-q1}, [dest,:128]!
> + vst1.u8 {q2-q3}, [dest,:128]!
> + bhs 1b
> +2:
> + cmp size, #32
> + blo 3f
> + vld1.u8 {q0-q1}, [src1,:128]!
> + sub size, size, #32
> + vld1.u8 {q8-q9}, [src2,:128]!
> + vhadd.u8 q0, q0, q8
> + vhadd.u8 q1, q1, q9
> + vst1.u8 {q0-q1}, [dest,:128]!
> +3:
> + cmp size, #16
> + it lo
> + bxlo lr
> + vld1.u8 {q0}, [src1,:128]!
> + sub size, size, #16
> + vld1.u8 {q8}, [src2,:128]!
> + vhadd.u8 q0, q0, q8
> + vst1.u8 {q0}, [dest,:128]!
> + bx lr
> +endfunc
> +
> + .align 2
> +function ff_merge16_neon, export=1
> + cmp size, #64
> + blo 2f
> +1:
> + pld [src1, #64]
> + vld1.u16 {q0-q1}, [src1,:128]!
> + pld [src2, #64]
> + vld1.u16 {q8-q9}, [src2,:128]!
> + vhadd.u16 q0, q0, q8
> + sub size, size, #64
> + vld1.u16 {q2-q3}, [src1,:128]!
> + vhadd.u16 q1, q1, q9
> + vld1.u16 {q10-q11}, [src2,:128]!
> + vhadd.u16 q2, q2, q10
> + cmp size, #64
> + vhadd.u16 q3, q3, q11
> + vst1.u16 {q0-q1}, [dest,:128]!
> + vst1.u16 {q2-q3}, [dest,:128]!
> + bhs 1b
> +2:
> + cmp size, #32
> + blo 3f
> + vld1.u16 {q0-q1}, [src1,:128]!
> + sub size, size, #32
> + vld1.u16 {q8-q9}, [src2,:128]!
> + vhadd.u16 q0, q0, q8
> + vhadd.u16 q1, q1, q9
> + vst1.u16 {q0-q1}, [dest,:128]!
> +3:
> + cmp size, #16
> + it lo
> + bxlo lr
> + vld1.u16 {q0}, [src1,:128]!
> + sub size, size, #16
> + vld1.u16 {q8}, [src2,:128]!
> + vhadd.u16 q0, q0, q8
> + vst1.u16 {q0}, [dest,:128]!
> + bx lr
> +endfunc
> \ No newline at end of file
> diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c
> new file mode 100644
> index 0000000000..5ddd8be392
> --- /dev/null
> +++ b/libavfilter/vf_fastdeint.c
> @@ -0,0 +1,588 @@
> +/*
> + * Copyright (C) 2015 Aman Gupta <aman at tmm1.net>
> + * 2000-2011 VLC authors and VideoLAN
> + *
> + * Author: Sam Hocevar <sam at zoy.org>
> + * Damien Lucas <nitrox at videolan.org>
> + * Laurent Aimar <fenrir at videolan.org>
> + * Sigmund Augdal Helberg <sigmunau at videolan.org>
> + *
> + * These algorithms are derived from the VLC project's
> + * modules/video_filter/deinterlace/algo_basic.c
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/common.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/timestamp.h"
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +enum Mode {
> + MODE_DISCARD,
> + MODE_MEAN,
> + MODE_BLEND,
> + MODE_BOB,
> + MODE_LINEAR,
> + MODE_MAX,
> +};
> +
> +typedef void (*merge_fn)(void *dst, const void *src1, const void *src2, size_t len);
> +
> +typedef struct FastDeintContext {
> + const AVClass *class;
> + merge_fn merge;
> + int merge_size;
> + int merge_aligned;
> + AVFrame *cur, *next;
> + enum Mode mode;
> + int eof;
> +} FastDeintContext;
> +
> +static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes)
> +{
> + for (; bytes > 0; bytes--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +}
> +
> +static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes)
> +{
> + for (size_t words = bytes / 2; words > 0; words--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +}
> +
> +static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes)
> +{
> + if (s->merge_aligned) {
> + size_t remainder = bytes % 16;
> + if (remainder > 0) {
> + merge8_c(dst, src1, src2, remainder);
> + bytes -= remainder;
> + dst += remainder;
> + src1 += remainder;
> + src2 += remainder;
> + }
> + }
> + s->merge(dst, src1, src2, bytes);
> +}
> +
> +static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes)
> +{
> + if (s->merge_aligned) {
> + size_t words = bytes / 2;
> + size_t remainder = words % 8;
> + if (remainder > 0) {
> + merge16_c(dst, src1, src2, remainder);
> + words -= remainder;
> + dst += remainder;
> + src1 += remainder;
> + src2 += remainder;
> + }
> + }
> + s->merge(dst, src1, src2, bytes);
> +}
> +
> +static void merge_unaligned(FastDeintContext *s, void *dst, const void *src1, const void *src2, size_t bytes)
> +{
> + if (s->merge_size == 16)
> + merge16_unaligned(s, dst, src1, src2, bytes);
> + else
> + merge8_unaligned(s, dst, src1, src2, bytes);
> +}
> +
> +#if HAVE_SSE2_INLINE && defined(__x86_64__)
No inline asm. This code needs to be ported to nasm syntax.
Also, no arch specific code should be present in arch agnostic source
files, beyond calls to init() functions.
> +static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes)
> +{
> + for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +
> + for (; bytes >= 16; bytes -= 16) {
> + __asm__ __volatile__( "movdqu %2,%%xmm1;"
> + "pavgb %1, %%xmm1;"
> + "movdqu %%xmm1, %0" :"=m" (*dst):
> + "m" (*src1),
> + "m" (*src2) : "xmm1" );
> + dst += 16;
> + src1 += 16;
> + src2 += 16;
> + }
> +
> + if (bytes > 0) {
> + merge8_c(dst, src1, src2, bytes);
> + }
> +}
> +static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes)
> +{
> + size_t words = bytes / 2;
> +
> + for(; words > 0 && ((uintptr_t)src1 & 15); words--)
> + *dst++ = ( *src1++ + *src2++ ) >> 1;
> +
> + for (; words >= 8; words -= 8) {
> + __asm__ __volatile__( "movdqu %2,%%xmm1;"
> + "pavgw %1, %%xmm1;"
> + "movdqu %%xmm1, %0" :"=m" (*dst):
> + "m" (*src1),
> + "m" (*src2) : "xmm1" );
> + dst += 8;
> + src1 += 8;
> + src2 += 8;
> + }
> +
> + if (words > 0) {
> + merge16_c(dst, src1, src2, words * 2);
> + }
> +}
> +#define merge8 merge8_sse2
> +#define merge16 merge16_sse2
> +#else
> +#define merge8 merge8_c
> +#define merge16 merge16_c
> +#endif
> +
> +static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame *frame)
> +{
> + int i, planes_nb = 0;
> + enum Mode mode = s->mode;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
> +
> + for (i = 0; i < desc->nb_components; i++)
> + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
> +
> + for (i = 0; i < planes_nb; i++) {
> + int height, bwidth;
> + int dst_linesize, src_linesize;
> + const uint8_t *src;
> + uint8_t *dst;
> +
> + bwidth = av_image_get_linesize(out->format, out->width, i);
> + if (bwidth < 0) {
> + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n");
> + return;
> + }
> +
> + height = out->height;
> + if (i == 1 || i == 2) {
> + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h);
> + }
> +
> + src = frame->data[i];
> + dst = out->data[i];
> + dst_linesize = out->linesize[i];
> + src_linesize = frame->linesize[i];
> +
> + if (mode == MODE_BLEND) {
> + // Copy first line
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> + height--;
> + }
> +
> + // Merge remaining lines
> + for (; height > 0; height--) {
> + if (mode == MODE_DISCARD)
> + memcpy(dst, src, bwidth);
> + else
> + merge_unaligned(s, dst, src, src + src_linesize, bwidth);
> + dst += dst_linesize;
> + src += src_linesize;
> + if (mode == MODE_MEAN || mode == MODE_DISCARD) {
> + src += src_linesize;
> + height--;
> + }
> + }
> + }
> + if (mode != MODE_DISCARD)
> + emms_c();
> +}
> +
> +static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame *frame, int field)
> +{
> + int i, planes_nb = 0;
> + enum Mode mode = s->mode;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
> +
> + for (i = 0; i < desc->nb_components; i++)
> + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
> +
> + for (i = 0; i < planes_nb; i++) {
> + int height, bwidth;
> + int dst_linesize, src_linesize;
> + const uint8_t *src;
> + uint8_t *dst;
> +
> + bwidth = av_image_get_linesize(out->format, out->width, i);
> + if (bwidth < 0) {
> + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n");
> + return;
> + }
> + height = out->height;
> + if (i == 1 || i == 2) {
> + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h);
> + }
> +
> + src = frame->data[i];
> + dst = out->data[i];
> + src_linesize = frame->linesize[i];
> + dst_linesize = out->linesize[i];
> +
> + // For BOTTOM field we need to add the first line
> + if (field == 1) {
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> + src += src_linesize;
> + height--;
> + }
> +
> + height -= 2;
> +
> + for (; height > 0; height-=2) {
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> +
> + if (mode == MODE_LINEAR)
> + merge_unaligned(s, dst, src, src + 2 * src_linesize, bwidth);
> + else
> + memcpy(dst, src, bwidth);
> + dst += dst_linesize;
> +
> + src += src_linesize * 2;
> + }
> +
> + memcpy(dst, src, bwidth);
> +
> + // For TOP field we need to add the last line
> + if (field == 0)
> + {
> + dst += dst_linesize;
> + src += src_linesize;
> + memcpy(dst, src, bwidth);
> + }
> + }
> + if (mode == MODE_LINEAR)
> + emms_c();
> +}
> +
> +static int filter_frame_single(AVFilterLink *link, AVFrame *frame)
> +{
> + AVFilterContext *ctx = link->dst;
> + AVFrame *out;
> + FastDeintContext *s = ctx->priv;
> +
> + if (!frame->interlaced_frame) {
> + return ff_filter_frame(ctx->outputs[0], frame);
> + }
> +
> + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
> + if (!out) {
> + av_frame_free(&frame);
> + return AVERROR(ENOMEM);
> + }
> +
> + av_frame_copy_props(out, frame);
> + out->interlaced_frame = 0;
> + render_image_single(s, out, frame);
> +
> + av_frame_free(&frame);
> + return ff_filter_frame(ctx->outputs[0], out);
> +}
> +
> +static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame)
> +{
> + AVFilterContext *ctx = link->dst;
> + AVFrame *out;
> +
> + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX)
> + out = av_frame_alloc();
> + else
> + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
> +
> + if (!out)
> + return NULL;
> +
> + av_frame_copy_props(out, frame);
> + return out;
> +}
> +
> +static int filter_frame_double(AVFilterLink *link, AVFrame *in)
> +{
> + AVFilterContext *ctx = link->dst;
> + FastDeintContext *s = ctx->priv;
> + AVFrame *frame, *out, *out2;
> + int tff, ret;
> +
> + s->cur = s->next;
> + s->next = in;
> +
> + if (!s->cur) {
> + return 0;
> + }
> +
> + frame = s->cur;
> +
> + if (!frame->interlaced_frame) {
> + if (frame->pts != AV_NOPTS_VALUE)
> + frame->pts *= 2;
> + s->cur = NULL;
> + return ff_filter_frame(ctx->outputs[0], frame);
> + }
> +
> + tff = frame->top_field_first;
> + out = copy_frame(link, frame);
> + if (!out) {
> + av_frame_free(&frame);
> + s->cur = NULL;
> + return AVERROR(ENOMEM);
> + }
> +
> + out->interlaced_frame = 0;
> + if (out->pts != AV_NOPTS_VALUE)
> + out->pts = out->pts * 2;
> + render_image_doubler(s, out, frame, !tff);
> +
> + ret = ff_filter_frame(ctx->outputs[0], out);
> + if (ret < 0) {
> + av_frame_free(&frame);
> + s->cur = NULL;
> + return ret;
> + }
> +
> + out2 = copy_frame(link, frame);
> + if (!out2) {
> + av_frame_free(&frame);
> + s->cur = NULL;
> + return AVERROR(ENOMEM);
> + }
> +
> + out2->interlaced_frame = 0;
> + av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC);
> + if (out2->pts != AV_NOPTS_VALUE) {
> + out2->pts = frame->pts + s->next->pts;
> + }
> + render_image_doubler(s, out2, frame, tff);
> +
> + av_frame_free(&frame);
> + s->cur = NULL;
> +
> + return ff_filter_frame(ctx->outputs[0], out2);
> +}
> +
> +static int filter_frame(AVFilterLink *link, AVFrame *frame)
> +{
> + AVFilterContext *ctx = link->dst;
> + FastDeintContext *s = ctx->priv;
> +
> + av_assert0(frame);
> +
> + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) {
> + return filter_frame_double(link, frame);
> + } else {
> + return filter_frame_single(link, frame);
> + }
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + FastDeintContext *s = ctx->priv;
> + av_frame_free(&s->cur);
> + av_frame_free(&s->next);
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> + static const enum AVPixelFormat pix_fmts[] = {
> + AV_PIX_FMT_YUV420P,
> + AV_PIX_FMT_YUV422P,
> + AV_PIX_FMT_YUV444P,
> + AV_PIX_FMT_YUV410P,
> + AV_PIX_FMT_YUV411P,
> + AV_PIX_FMT_GRAY8,
> + AV_PIX_FMT_YUVJ420P,
> + AV_PIX_FMT_YUVJ422P,
> + AV_PIX_FMT_YUVJ444P,
> + AV_PIX_FMT_GRAY16,
> + AV_PIX_FMT_YUV440P,
> + AV_PIX_FMT_YUVJ440P,
> + AV_PIX_FMT_YUV420P9,
> + AV_PIX_FMT_YUV422P9,
> + AV_PIX_FMT_YUV444P9,
> + AV_PIX_FMT_YUV420P10,
> + AV_PIX_FMT_YUV422P10,
> + AV_PIX_FMT_YUV444P10,
> + AV_PIX_FMT_YUV420P12,
> + AV_PIX_FMT_YUV422P12,
> + AV_PIX_FMT_YUV444P12,
> + AV_PIX_FMT_YUV420P14,
> + AV_PIX_FMT_YUV422P14,
> + AV_PIX_FMT_YUV444P14,
> + AV_PIX_FMT_YUV420P16,
> + AV_PIX_FMT_YUV422P16,
> + AV_PIX_FMT_YUV444P16,
> + AV_PIX_FMT_YUVA420P,
> + AV_PIX_FMT_YUVA422P,
> + AV_PIX_FMT_YUVA444P,
> + AV_PIX_FMT_GBRP,
> + AV_PIX_FMT_GBRP9,
> + AV_PIX_FMT_GBRP10,
> + AV_PIX_FMT_GBRP12,
> + AV_PIX_FMT_GBRP14,
> + AV_PIX_FMT_GBRP16,
> + AV_PIX_FMT_GBRAP,
> + AV_PIX_FMT_NONE
> + };
> +
> + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
> + if (!fmts_list)
> + return AVERROR(ENOMEM);
> + return ff_set_common_formats(ctx, fmts_list);
> +}
> +
> +#if ARCH_ARM
> +#include "libavutil/arm/cpu.h"
> +#endif
> +#if ARCH_AARCH64
> +#include "libavutil/aarch64/cpu.h"
> +#endif
> +#if ARCH_AARCH64 || ARCH_ARM
> +void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes);
> +void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes);
> +void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes);
> +void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes);
> +#endif
> +
> +static int config_props(AVFilterLink *link)
> +{
> + AVFilterContext *ctx = link->src;
> + FastDeintContext *s = ctx->priv;
> + const AVPixFmtDescriptor *pix;
> +#if ARCH_AARCH64 || ARCH_ARM
> + int cpu_flags = av_get_cpu_flags();
> +#endif
> +
> + link->w = link->src->inputs[0]->w;
> + link->h = link->src->inputs[0]->h;
> + link->time_base = link->src->inputs[0]->time_base;
> + link->frame_rate = link->src->inputs[0]->frame_rate;
> + link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio;
> +
> + if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) {
> + link->h /= 2;
> + link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio, av_make_q(1, 2));
> + }
> + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) {
> + link->time_base = av_mul_q(link->time_base, av_make_q(1, 2));
> + link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1));
> + }
> +
> + pix = av_pix_fmt_desc_get(link->format);
> + s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8;
> + s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8;
> +
> +#if ARCH_ARM
> + if (have_armv6(cpu_flags)) {
> + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 : (merge_fn)ff_merge8_armv6;
> + s->merge_aligned = 1;
> + }
> +#endif
> +#if ARCH_AARCH64 || ARCH_ARM
> + if (have_neon(cpu_flags)) {
> + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon : (merge_fn)ff_merge8_neon;
> + s->merge_aligned = 1;
> + }
> +#endif
As i mentioned above, this kind of initialization and any function
prototypes should be added to init files in the respective folders.
In here you should only call init() functions which will set the above.
See how other filters do it, like tinterlace.
> +
> + return 0;
> +}
> +
> +static int request_frame(AVFilterLink *link)
> +{
> + AVFilterContext *ctx = link->src;
> + FastDeintContext *s = ctx->priv;
> + int ret;
> +
> + if (s->eof)
> + return AVERROR_EOF;
> +
> + ret = ff_request_frame(ctx->inputs[0]);
> +
> + if (ret == AVERROR_EOF && s->cur) {
> + AVFrame *next = av_frame_clone(s->next);
> + if (!next)
> + return AVERROR(ENOMEM);
> +
> + next->pts = s->next->pts * 2 - s->cur->pts;
> + filter_frame(ctx->inputs[0], next);
> + s->eof = 1;
> + } else if (ret < 0) {
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +#define OFFSET(x) offsetof(FastDeintContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
> +
> +static const AVOption fastdeint_options[] = {
> + { "mode", "specify the deinterlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" },
> + CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"),
> + CONST("mean", "half resolution blender", MODE_MEAN, "mode"),
> + CONST("blend", "full resolution blender", MODE_BLEND, "mode"),
> + CONST("bob", "bob doubler", MODE_BOB, "mode"),
> + CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR, "mode"),
> +
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(fastdeint);
> +
> +static const AVFilterPad fastdeint_inputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .filter_frame = filter_frame,
> + },
> + { NULL }
> +};
> +
> +static const AVFilterPad fastdeint_outputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = config_props,
> + .request_frame = request_frame
> + },
> + { NULL }
> +};
> +
> +AVFilter ff_vf_fastdeint = {
> + .name = "fastdeint",
> + .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"),
> + .priv_size = sizeof(FastDeintContext),
> + .priv_class = &fastdeint_class,
> + .uninit = uninit,
> + .query_formats = query_formats,
> + .inputs = fastdeint_inputs,
> + .outputs = fastdeint_outputs,
> +};
>
More information about the ffmpeg-devel
mailing list