[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
Thomas Mundt
tmundt75 at gmail.com
Tue Sep 19 23:35:30 EEST 2017
2017-09-19 17:53 GMT+02:00 James Almer <jamrial at gmail.com>:
> On 9/19/2017 5:02 AM, Thomas Mundt wrote:
> > 2017-09-19 4:09 GMT+02:00 James Almer <jamrial at gmail.com>:
> >
> >> On 9/18/2017 10:41 PM, Thomas Mundt wrote:
> >>> I tried to set up MIPS compiler for two days on windows and linux
> without
> >>> success.
> >>> Now I try it blind. This solution is based on the first suggestion
> James
> >>> gave me at IRC.
> >>> There might be room for improvement and an alternative solution with
> >>> AV_RL16() / AV_WL16().
> >>> I used av_le2ne16() because it will be ignored for little endian.
> >>>
> >>> Regards,
> >>> Thomas
> >>
> >>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001
> >>> From: Thomas Mundt <tmundt75 at gmail.com>
> >>> Date: Tue, 19 Sep 2017 00:25:25 +0200
> >>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12
> bit
> >>>
> >>> Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> >>> ---
> >>> libavfilter/interlace.h | 5 +-
> >>> libavfilter/tinterlace.h | 5 +-
> >>> libavfilter/vf_interlace.c | 92
> >> ++++++++++++++++++++++----
> >>> libavfilter/vf_tinterlace.c | 73
> ++++++++++++++++++--
> >>> libavfilter/x86/vf_interlace.asm | 80
> >> ++++++++++++++++++++--
> >>> libavfilter/x86/vf_interlace_init.c | 51 ++++++++++----
> >>> libavfilter/x86/vf_tinterlace_init.c | 51 ++++++++++----
> >>> tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
> >>> tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
> >>> tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++
> >>> tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++
> >>> 11 files changed, 345 insertions(+), 56 deletions(-)
> >>>
> >>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> >>> index 2101b79..90a0198 100644
> >>> --- a/libavfilter/interlace.h
> >>> +++ b/libavfilter/interlace.h
> >>> @@ -25,9 +25,11 @@
> >>> #ifndef AVFILTER_INTERLACE_H
> >>> #define AVFILTER_INTERLACE_H
> >>>
> >>> +#include "libavutil/bswap.h"
> >>> #include "libavutil/common.h"
> >>> #include "libavutil/imgutils.h"
> >>> #include "libavutil/opt.h"
> >>> +#include "libavutil/pixdesc.h"
> >>>
> >>> #include "avfilter.h"
> >>> #include "formats.h"
> >>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
> >>> enum ScanMode scan; // top or bottom field first scanning
> >>> int lowpass; // enable or disable low pass filtering
> >>> AVFrame *cur, *next; // the two frames from which the new one is
> >> obtained
> >>> + const AVPixFmtDescriptor *csp;
> >>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const
> >> uint8_t *srcp,
> >>> - ptrdiff_t mref, ptrdiff_t pref);
> >>> + ptrdiff_t mref, ptrdiff_t pref, int
> clip_max);
> >>> } InterlaceContext;
> >>>
> >>> void ff_interlace_init_x86(InterlaceContext *interlace);
> >>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
> >>> index cc13a6c..b5c39aa 100644
> >>> --- a/libavfilter/tinterlace.h
> >>> +++ b/libavfilter/tinterlace.h
> >>> @@ -27,7 +27,9 @@
> >>> #ifndef AVFILTER_TINTERLACE_H
> >>> #define AVFILTER_TINTERLACE_H
> >>>
> >>> +#include "libavutil/bswap.h"
> >>> #include "libavutil/opt.h"
> >>> +#include "libavutil/pixdesc.h"
> >>> #include "drawutils.h"
> >>> #include "avfilter.h"
> >>>
> >>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
> >>> int black_linesize[4];
> >>> FFDrawContext draw;
> >>> FFDrawColor color;
> >>> + const AVPixFmtDescriptor *csp;
> >>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t
> >> *srcp,
> >>> - ptrdiff_t mref, ptrdiff_t pref);
> >>> + ptrdiff_t mref, ptrdiff_t pref, int
> clip_max);
> >>> } TInterlaceContext;
> >>>
> >>> void ff_tinterlace_init_x86(TInterlaceContext *interlace);
> >>> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
> >>> index 55bf782..bfba054 100644
> >>> --- a/libavfilter/vf_interlace.c
> >>> +++ b/libavfilter/vf_interlace.c
> >>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
> >>> AVFILTER_DEFINE_CLASS(interlace);
> >>>
> >>> static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
> >>> - const uint8_t *srcp,
> >>> - ptrdiff_t mref, ptrdiff_t pref)
> >>> + const uint8_t *srcp, ptrdiff_t mref,
> >>> + ptrdiff_t pref, int clip_max)
> >>> {
> >>> const uint8_t *srcp_above = srcp + mref;
> >>> const uint8_t *srcp_below = srcp + pref;
> >>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t
> >> linesize,
> >>> }
> >>> }
> >>>
> >>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
> >>> + const uint8_t *src8, ptrdiff_t mref,
> >>> + ptrdiff_t pref, int clip_max)
> >>> +{
> >>> + uint16_t *dstp = (uint16_t *)dst8;
> >>> + const uint16_t *srcp = (const uint16_t *)src8;
> >>> + const uint16_t *srcp_above = srcp + mref / 2;
> >>> + const uint16_t *srcp_below = srcp + pref / 2;
> >>> + int i;
> >>> + for (i = 0; i < linesize; i++) {
> >>> + // this calculation is an integer representation of
> >>> + // '0.5 * current + 0.25 * above + 0.25 * below'
> >>> + // '1 +' is for rounding.
> >>> + dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) +
> av_le2ne16(srcp[i])
>
> You might want to load srcp[i] into a local variable here as well.
>
> >>> + + av_le2ne16(srcp_above[i])
> >>> + + av_le2ne16(srcp_below[i])) >> 2);
> >>
> >> This might work (And Michael will be able to confirm that if
> >> filter-pixfmts-tinterlace_vlpf passes)...
> >>
> >>> + }
> >>> +}
> >>> +
> >>> static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
> >>> - const uint8_t *srcp,
> >>> - ptrdiff_t mref, ptrdiff_t pref)
> >>> + const uint8_t *srcp, ptrdiff_t
> mref,
> >>> + ptrdiff_t pref, int clip_max)
> >>> {
> >>> const uint8_t *srcp_above = srcp + mref;
> >>> const uint8_t *srcp_below = srcp + pref;
> >>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp,
> >> ptrdiff_t linesize,
> >>> }
> >>> }
> >>>
> >>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t
> >> linesize,
> >>> + const uint8_t *src8, ptrdiff_t
> >> mref,
> >>> + ptrdiff_t pref, int clip_max)
> >>> +{
> >>> + uint16_t *dstp = (uint16_t *)dst8;
> >>> + const uint16_t *srcp = (const uint16_t *)src8;
> >>> + const uint16_t *srcp_above = srcp + mref / 2;
> >>> + const uint16_t *srcp_below = srcp + pref / 2;
> >>> + const uint16_t *srcp_above2 = srcp + mref;
> >>> + const uint16_t *srcp_below2 = srcp + pref;
> >>> + int i, srcp_x, srcp_ab;
> >>> + for (i = 0; i < linesize; i++) {
> >>> + // this calculation is an integer representation of
> >>> + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 *
> >> above2 - 0.125 * below2'
> >>> + // '4 +' is for rounding.
> >>> + srcp_x = av_le2ne16(srcp[i]) << 1;
> >>> + srcp_ab = av_le2ne16(srcp_above[i]) +
> av_le2ne16(srcp_below[i]);
> >>> + dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) +
> >> srcp_x + srcp_ab) << 1)
> >>> + - av_le2ne16(srcp_above2[i])
> >>> + - av_le2ne16(srcp_below2[i])) >>
> >> 3, 0, clip_max));
> >>> + // Prevent over-sharpening:
> >>> + // dst must not exceed src when the average of above and below
> >>> + // is less than src. And the other way around.
> >>> + if (srcp_ab > srcp_x) {
> >>> + if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
> >>> + dstp[i] = srcp[i];
> >>> + } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
> >>> + dstp[i] = srcp[i];
> >>
> >> ...but chances are this over-sharpening prevention part will not. You're
> >> loading in native endianness here before storing. You only byteswapped
> >> for the comparison.
> >>
> >> Also, consider using local variables inside the for loop. You're loading
> >> scrp[i] and dstp[i] several times per iteration.
> >>
> >
> > Okay, then I would do:
> > int i, dstp_le, srcp_le, srcp_x, srcp_ab;
>
> These are not pointers, so maybe remove the "p" to avoid confusion.
>
> > for (i = 0; i < linesize; i++) {
> > srcp_le = av_le2ne16(srcp[i]);
> > srcp_x = srcp_le << 1;
> > srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
> > dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1)
> > - av_le2ne16(srcp_above2[i])
> > - av_le2ne16(srcp_below2[i])) >> 3, 0,
> clip_max);
> > if (srcp_ab > srcp_x) {
> > if (dstp_le < srcp_le)
> > dstp[i] = srcp[i];
> > else
> > dstp[i] = av_le2ne16(dstp_le);
> > } else if (dstp_le > srcp_le) {
> > dstp[i] = srcp[i];
> > } else
> > dstp[i] = av_le2ne16(dstp_le);
> > }
> > Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ?
>
> Yes. No need to load srcp[i] again.
>
Okay, thanks!
A patch with your suggestions is attached.
I will send a separate patch that removes the "p" from srcp_x and src_ab
in lowpass_line_complex_c function for consistency.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0003-avfilter-interlace-add-support-for-10-and-12-bit.patch
Type: application/octet-stream
Size: 30922 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20170919/0fc3dfba/attachment.obj>
More information about the ffmpeg-devel
mailing list