[FFmpeg-devel] [PATCH] avfilter: add deinterlace_videotoolbox (yadif implementation using Metal.framework)

Thu Dec 16 04:03:17 EET 2021

From: Aman Karmani <aman at tmm1.net>

deinterlaces CVPixelBuffers, i.e. AV_PIX_FMT_VIDEOTOOLBOX frames

for example, an interlaced mpeg2 file can be decoded by avcodec,
uploaded into a CVPixelBuffer, deinterlaced by metal, and then
encoded to h264 by VideoToolbox as follows:

    ffmpeg \
           -init_hw_device videotoolbox \
           -i interlaced.ts \
           -vf hwupload,deinterlace_videotoolbox=mode=send_field:deint=interlaced \
           -c:v h264_videotoolbox \
           -b:v 2000k \
           -c:a copy \
           -y progressive.ts

(note that uploading AVFrame into CVPixelBuffer via hwupload
 requires 504c60660d3194758823ddd45ceddb86e35d806f)

Signed-off-by: Aman Karmani <aman at tmm1.net>
---
 .gitignore                                    |   3 +
 configure                                     |   9 +-
 ffbuild/common.mak                            |   9 +
 libavfilter/Makefile                          |   2 +
 libavfilter/allfilters.c                      |   1 +
 libavfilter/vf_deinterlace_videotoolbox.m     | 457 ++++++++++++++++++
 libavfilter/vf_deinterlace_videotoolbox.metal | 269 +++++++++++
 7 files changed, 749 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_deinterlace_videotoolbox.m
 create mode 100644 libavfilter/vf_deinterlace_videotoolbox.metal

diff --git a/.gitignore b/.gitignore
index 9ed24b542e..1a5bb29ad5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,9 @@
 *.swp
 *.ver
 *.version
+*.metal.air
+*.metallib
+*.metallib.c
 *.ptx
 *.ptx.c
 *.ptx.gz
diff --git a/configure b/configure
index a7593ec2db..8635574d00 100755
--- a/configure
+++ b/configure
@@ -309,6 +309,7 @@ External library support:
                            if openssl, gnutls or libtls is not used [no]
   --enable-mediacodec      enable Android MediaCodec support [no]
   --enable-mediafoundation enable encoding via MediaFoundation [auto]
+  --disable-metal          disable Apple Metal framework [autodetect]
   --enable-libmysofa       enable libmysofa, needed for sofalizer filter [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
   --enable-opencl          enable OpenCL processing [no]
@@ -382,6 +383,7 @@ Toolchain options:
   --dep-cc=DEPCC           use dependency generator DEPCC [$cc_default]
   --nvcc=NVCC              use Nvidia CUDA compiler NVCC or clang [$nvcc_default]
   --ld=LD                  use linker LD [$ld_default]
+  --metalcc=METALCC        use metal compiler METALCC [$metalcc_default]
   --pkg-config=PKGCONFIG   use pkg-config tool PKGCONFIG [$pkg_config_default]
   --pkg-config-flags=FLAGS pass additional flags to pkgconf []
   --ranlib=RANLIB          use ranlib RANLIB [$ranlib_default]
@@ -2560,6 +2562,7 @@ CMDLINE_SET="
     ln_s
     logfile
     malloc_prefix
+    metalcc
     nm
     optflags
     nvcc
@@ -3610,6 +3613,7 @@ cover_rect_filter_deps="avcodec avformat gpl"
 cropdetect_filter_deps="gpl"
 deinterlace_qsv_filter_deps="libmfx"
 deinterlace_vaapi_filter_deps="vaapi"
+deinterlace_videotoolbox_filter_deps="metal corevideo videotoolbox"
 delogo_filter_deps="gpl"
 denoise_vaapi_filter_deps="vaapi"
 derain_filter_select="dnn"
@@ -3828,6 +3832,7 @@ host_cc_default="gcc"
 doxygen_default="doxygen"
 install="install"
 ln_s_default="ln -s -f"
+metalcc_default="xcrun metal"
 nm_default="nm -g"
 pkg_config_default=pkg-config
 ranlib_default="ranlib"
@@ -4428,7 +4433,7 @@ if enabled cuda_nvcc; then
 fi
 
 set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \
-    target_exec x86asmexe
+    target_exec x86asmexe metalcc
 enabled cross_compile || host_cc_default=$cc
 set_default host_cc
 
@@ -6316,6 +6321,7 @@ check_apple_framework CoreFoundation
 check_apple_framework CoreMedia
 check_apple_framework CoreVideo
 check_apple_framework CoreAudio
+check_apple_framework Metal
 
 enabled avfoundation && {
     disable coregraphics applicationservices
@@ -7606,6 +7612,7 @@ ARFLAGS=$arflags
 AR_O=$ar_o
 AR_CMD=$ar
 NM_CMD=$nm
+METALCC=$metalcc
 RANLIB=$ranlib
 STRIP=$strip
 STRIPTYPE=$striptype
diff --git a/ffbuild/common.mak b/ffbuild/common.mak
index 268ae61154..af62c9f3a7 100644
--- a/ffbuild/common.mak
+++ b/ffbuild/common.mak
@@ -104,6 +104,15 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
 $(BIN2CEXE): ffbuild/bin2c_host.o
 	$(HOSTLD) $(HOSTLDFLAGS) $(HOSTLD_O) $^ $(HOSTEXTRALIBS)
 
+%.metal.air: %.metal
+	$(METALCC) $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) -o $@
+
+%.metallib: %.metal.air
+	$(METALCC)lib --split-module-without-linking $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) -o $@
+
+%.metallib.c: %.metallib $(BIN2CEXE)
+	$(BIN2C) $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) $@ $(subst .,_,$(basename $(notdir $@)))
+
 %.ptx: %.cu $(SRC_PATH)/compat/cuda/cuda_runtime.h
 	$(COMPILE_NVCC)
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 2fe495df28..79a5a427ac 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -241,6 +241,8 @@ OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
 OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
 OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_deinterlace_qsv.o
 OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
+OBJS-$(CONFIG_DEINTERLACE_VIDEOTOOLBOX_FILTER) += vf_deinterlace_videotoolbox.o vf_deinterlace_videotoolbox.metallib.o \
+                                                yadif_common.o
 OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
 OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
 OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ec57a2c49c..2ed3deb7dd 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -228,6 +228,7 @@ extern const AVFilter ff_vf_deflate;
 extern const AVFilter ff_vf_deflicker;
 extern const AVFilter ff_vf_deinterlace_qsv;
 extern const AVFilter ff_vf_deinterlace_vaapi;
+extern const AVFilter ff_vf_deinterlace_videotoolbox;
 extern const AVFilter ff_vf_dejudder;
 extern const AVFilter ff_vf_delogo;
 extern const AVFilter ff_vf_denoise_vaapi;
diff --git a/libavfilter/vf_deinterlace_videotoolbox.m b/libavfilter/vf_deinterlace_videotoolbox.m
new file mode 100644
index 0000000000..0253d1d355
--- /dev/null
+++ b/libavfilter/vf_deinterlace_videotoolbox.m
@@ -0,0 +1,457 @@
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl at overt.org>
+ *               2020 Aman Karmani <aman at tmm1.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "internal.h"
+#include "yadif.h"
+#include <libavutil/avassert.h>
+#include <libavutil/hwcontext.h>
+
+#import <CoreVideo/CoreVideo.h>
+#import <Metal/Metal.h>
+
+extern char ff_vf_deinterlace_videotoolbox_metallib_data[];
+extern unsigned int ff_vf_deinterlace_videotoolbox_metallib_len;
+
+typedef struct DeintVTContext {
+    YADIFContext yadif;
+
+    AVBufferRef       *device_ref;
+    AVBufferRef       *input_frames_ref;
+    AVHWFramesContext *input_frames;
+
+    id<MTLDevice> mtlDevice;
+    id<MTLLibrary> mtlLibrary;
+    id<MTLCommandQueue> mtlQueue;
+    id<MTLComputePipelineState> mtlPipeline;
+    id<MTLFunction> mtlFunction;
+    id<MTLBuffer> mtlParamsBuffer;
+
+    CVMetalTextureCacheRef textureCache;
+} DeintVTContext;
+
+struct mtlYadifParams {
+    uint channels;
+    uint parity;
+    uint tff;
+    bool is_second_field;
+    bool skip_spatial_check;
+    int field_mode;
+};
+
+static void call_kernel(AVFilterContext *ctx,
+                        id<MTLTexture> dst,
+                        id<MTLTexture> prev,
+                        id<MTLTexture> cur,
+                        id<MTLTexture> next,
+                        int channels,
+                        int parity,
+                        int tff)
+{
+    DeintVTContext *s = ctx->priv;
+    id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
+    id<MTLComputeCommandEncoder> encoder = buffer.computeCommandEncoder;
+    struct mtlYadifParams *params = (struct mtlYadifParams *)s->mtlParamsBuffer.contents;
+    *params = (struct mtlYadifParams){
+        .channels = channels,
+        .parity = parity,
+        .tff = tff,
+        .is_second_field = !(parity ^ tff),
+        .skip_spatial_check = s->yadif.mode&2,
+        .field_mode = s->yadif.current_field
+    };
+
+    [encoder setComputePipelineState:s->mtlPipeline];
+    [encoder setTexture:dst  atIndex:0];
+    [encoder setTexture:prev atIndex:1];
+    [encoder setTexture:cur  atIndex:2];
+    [encoder setTexture:next atIndex:3];
+    [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
+
+    NSUInteger w = s->mtlPipeline.threadExecutionWidth;
+    NSUInteger h = s->mtlPipeline.maxTotalThreadsPerThreadgroup / w;
+    MTLSize threadsPerThreadgroup = MTLSizeMake(w, h, 1);
+    BOOL fallback = YES;
+    if (@available(macOS 10.15, iOS 11, tvOS 14.5, *)) {
+        if ([s->mtlDevice supportsFamily:MTLGPUFamilyCommon3]) {
+            MTLSize threadsPerGrid = MTLSizeMake(dst.width, dst.height, 1);
+            [encoder dispatchThreads:threadsPerGrid threadsPerThreadgroup:threadsPerThreadgroup];
+            fallback = NO;
+        }
+    }
+    if (fallback) {
+        MTLSize threadgroups = MTLSizeMake((dst.width + w - 1) / w,
+                                           (dst.height + h - 1) / h,
+                                           1);
+        [encoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup];
+    }
+
+    [encoder endEncoding];
+
+    [buffer commit];
+    [buffer waitUntilCompleted];
+
+    [encoder release];
+    encoder = nil;
+    [buffer release];
+    buffer = nil;
+}
+
+static CVMetalTextureRef pixbuf_to_texture(AVFilterContext *ctx,
+                                           CVPixelBufferRef pixbuf,
+                                           int plane,
+                                           MTLPixelFormat format)
+{
+    DeintVTContext *s = ctx->priv;
+    CVMetalTextureRef tex = NULL;
+    CVReturn ret;
+
+    ret = CVMetalTextureCacheCreateTextureFromImage(
+        NULL,
+        s->textureCache,
+        pixbuf,
+        NULL,
+        format,
+        CVPixelBufferGetWidthOfPlane(pixbuf, plane),
+        CVPixelBufferGetHeightOfPlane(pixbuf, plane),
+        plane,
+        &tex
+    );
+    if (ret != kCVReturnSuccess) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTexture from image: %d\n", ret);
+        return NULL;
+    }
+
+    return tex;
+}
+
+static void filter(AVFilterContext *ctx, AVFrame *dst,
+                   int parity, int tff)
+{
+    DeintVTContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+    int i;
+
+    for (i = 0; i < y->csp->nb_components; i++) {
+        int pixel_size, channels;
+        const AVComponentDescriptor *comp = &y->csp->comp[i];
+        CVMetalTextureRef prev, cur, next, dest;
+        id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
+        MTLPixelFormat format;
+
+        if (comp->plane < i) {
+            // We process planes as a whole, so don't reprocess
+            // them for additional components
+            continue;
+        }
+
+        pixel_size = (comp->depth + comp->shift) / 8;
+        channels = comp->step / pixel_size;
+        if (pixel_size > 2 || channels > 2) {
+            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+            goto exit;
+        }
+        switch (pixel_size) {
+        case 1:
+            format = channels == 1 ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm;
+            break;
+        case 2:
+            format = channels == 1 ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm;
+            break;
+        default:
+            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+            goto exit;
+        }
+        av_log(ctx, AV_LOG_TRACE,
+               "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
+               comp->plane, pixel_size, channels);
+
+        prev = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->prev->data[3], i, format);
+        cur  = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->cur->data[3], i, format);
+        next = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->next->data[3], i, format);
+        dest = pixbuf_to_texture(ctx, (CVPixelBufferRef)dst->data[3], i, format);
+
+        tex_prev = CVMetalTextureGetTexture(prev);
+        tex_cur  = CVMetalTextureGetTexture(cur);
+        tex_next = CVMetalTextureGetTexture(next);
+        tex_dest = CVMetalTextureGetTexture(dest);
+
+        call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
+                         channels, parity, tff);
+
+        CFRelease(prev);
+        CFRelease(cur);
+        CFRelease(next);
+        CFRelease(dest);
+    }
+
+    CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3], (CVPixelBufferRef)dst->data[3]);
+
+    if (y->current_field == YADIF_FIELD_END) {
+        y->current_field = YADIF_FIELD_NORMAL;
+    }
+
+exit:
+    return;
+}
+
+static av_cold int deint_videotoolbox_init(AVFilterContext *ctx)
+{
+    DeintVTContext *s = ctx->priv;
+    NSError *err = nil;
+    CVReturn ret;
+
+    s->mtlDevice = MTLCreateSystemDefaultDevice();
+    if (!s->mtlDevice) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", s->mtlDevice.name.UTF8String);
+
+    dispatch_data_t libData = dispatch_data_create(
+        ff_vf_deinterlace_videotoolbox_metallib_data,
+        ff_vf_deinterlace_videotoolbox_metallib_len,
+        nil,
+        nil);
+    s->mtlLibrary = [s->mtlDevice
+        newLibraryWithData:libData
+        error:&err];
+    dispatch_release(libData);
+    libData = nil;
+    if (err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library: %s\n", err.description.UTF8String);
+        return AVERROR_EXTERNAL;
+    }
+    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
+
+    s->mtlQueue = s->mtlDevice.newCommandQueue;
+    if (!s->mtlQueue) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    s->mtlPipeline = [s->mtlDevice
+        newComputePipelineStateWithFunction:s->mtlFunction
+        error:&err];
+    if (err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String);
+        return AVERROR_EXTERNAL;
+    }
+
+    s->mtlParamsBuffer = [s->mtlDevice
+        newBufferWithLength:sizeof(struct mtlYadifParams)
+        options:MTLResourceStorageModeShared];
+    if (!s->mtlParamsBuffer) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for parameters\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = CVMetalTextureCacheCreate(
+        NULL,
+        NULL,
+        s->mtlDevice,
+        NULL,
+        &s->textureCache
+    );
+    if (ret != kCVReturnSuccess) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret);
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static av_cold void deint_videotoolbox_uninit(AVFilterContext *ctx)
+{
+    DeintVTContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+
+    av_frame_free(&y->prev);
+    av_frame_free(&y->cur);
+    av_frame_free(&y->next);
+
+    av_buffer_unref(&s->device_ref);
+    av_buffer_unref(&s->input_frames_ref);
+    s->input_frames = NULL;
+
+    [s->mtlParamsBuffer release];
+    [s->mtlFunction release];
+    [s->mtlPipeline release];
+    [s->mtlQueue release];
+    [s->mtlLibrary release];
+    [s->mtlDevice release];
+
+    s->mtlParamsBuffer = nil;
+    s->mtlFunction = nil;
+    s->mtlPipeline = nil;
+    s->mtlQueue = nil;
+    s->mtlLibrary = nil;
+    s->mtlDevice = nil;
+
+    if (s->textureCache) {
+        CFRelease(s->textureCache);
+        s->textureCache = NULL;
+    }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    DeintVTContext *s  = ctx->priv;
+
+    if (!inlink->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
+               "required to associate the processing device.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
+    if (!s->input_frames_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *link)
+{
+    AVHWFramesContext *output_frames;
+    AVFilterContext *ctx = link->src;
+    DeintVTContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+    int ret = 0;
+
+    av_assert0(s->input_frames);
+    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
+    if (!s->device_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A device reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
+    if (!link->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
+               "for output.\n");
+        ret = AVERROR(ENOMEM);
+        goto exit;
+    }
+
+    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
+
+    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
+    output_frames->sw_format = s->input_frames->sw_format;
+    output_frames->width     = ctx->inputs[0]->w;
+    output_frames->height    = ctx->inputs[0]->h;
+
+    ret = ff_filter_init_hw_frames(ctx, link, 10);
+    if (ret < 0)
+        goto exit;
+
+    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame "
+               "context for output: %d\n", ret);
+        goto exit;
+    }
+
+    link->time_base.num = ctx->inputs[0]->time_base.num;
+    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
+    link->w             = ctx->inputs[0]->w;
+    link->h             = ctx->inputs[0]->h;
+
+    if(y->mode & 1)
+        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
+                                    (AVRational){2, 1});
+
+    if (link->w < 3 || link->h < 3) {
+        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
+        ret = AVERROR(EINVAL);
+        goto exit;
+    }
+
+    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
+    y->filter = filter;
+
+exit:
+    return ret;
+}
+
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
+
+static const AVOption deinterlace_videotoolbox_options[] = {
+    #define OFFSET(x) offsetof(YADIFContext, x)
+    { "mode",   "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
+    CONST("send_frame",           "send one frame for each frame",                                     YADIF_MODE_SEND_FRAME,           "mode"),
+    CONST("send_field",           "send one frame for each field",                                     YADIF_MODE_SEND_FIELD,           "mode"),
+    CONST("send_frame_nospatial", "send one frame for each frame, but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL, "mode"),
+    CONST("send_field_nospatial", "send one frame for each field, but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL, "mode"),
+
+    { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
+    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,  "parity"),
+    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,  "parity"),
+    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO, "parity"),
+
+    { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
+    CONST("all",        "deinterlace all frames",                       YADIF_DEINT_ALL,        "deint"),
+    CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"),
+    #undef OFFSET
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(deinterlace_videotoolbox);
+
+static const AVFilterPad deint_videotoolbox_inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = ff_yadif_filter_frame,
+        .config_props  = config_input,
+    },
+};
+
+static const AVFilterPad deint_videotoolbox_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = ff_yadif_request_frame,
+        .config_props  = config_output,
+    },
+};
+
+AVFilter ff_vf_deinterlace_videotoolbox = {
+    .name           = "deinterlace_videotoolbox",
+    .description    = NULL_IF_CONFIG_SMALL("Deinterlace VideoToolbox frames with Metal compute"),
+    .priv_size      = sizeof(DeintVTContext),
+    .priv_class     = &deinterlace_videotoolbox_class,
+    .init           = deint_videotoolbox_init,
+    .uninit         = deint_videotoolbox_uninit,
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
+    FILTER_INPUTS(deint_videotoolbox_inputs),
+    FILTER_OUTPUTS(deint_videotoolbox_outputs),
+    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_deinterlace_videotoolbox.metal b/libavfilter/vf_deinterlace_videotoolbox.metal
new file mode 100644
index 0000000000..50783f2ffe
--- /dev/null
+++ b/libavfilter/vf_deinterlace_videotoolbox.metal
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl at overt.org>
+ *               2020 Aman Karmani <aman at tmm1.net>
+ *               2020 Stefan Dyulgerov <stefan.dyulgerov at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <metal_stdlib>
+#include <metal_integer>
+#include <metal_texture>
+
+using namespace metal;
+
+/*
+ * Parameters
+ */
+
+struct deintParams {
+    uint channels;
+    uint parity;
+    uint tff;
+    bool is_second_field;
+    bool skip_spatial_check;
+    int field_mode;
+};
+
+/*
+ * Texture access helpers
+ */
+
+#define accesstype access::sample
+const sampler s(coord::pixel);
+
+template <typename T>
+T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
+{
+    return tex.sample(s, float2(x, y)).x;
+}
+
+template <>
+float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x, uint y)
+{
+    return tex.sample(s, float2(x, y)).xy;
+}
+
+template <typename T>
+T tex2D(texture2d<float, access::read> tex, uint x, uint y)
+{
+    return tex.read(uint2(x, y)).x;
+}
+
+template <>
+float2 tex2D<float2>(texture2d<float, access::read> tex, uint x, uint y)
+{
+    return tex.read(uint2(x, y)).xy;
+}
+
+/*
+ * YADIF helpers
+ */
+
+template<typename T>
+T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
+                    T h, T i, T j, T k, T l, T m, T n)
+{
+    T spatial_pred = (d + k)/2;
+    T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
+
+    T score = abs(b - k) + abs(c - l) + abs(d - m);
+    if (score < spatial_score) {
+        spatial_pred = (c + l)/2;
+        spatial_score = score;
+        score = abs(a - l) + abs(b - m) + abs(c - n);
+        if (score < spatial_score) {
+            spatial_pred = (b + m)/2;
+            spatial_score = score;
+        }
+    }
+    score = abs(d - i) + abs(e - j) + abs(f - k);
+    if (score < spatial_score) {
+        spatial_pred = (e + j)/2;
+        spatial_score = score;
+        score = abs(e - h) + abs(f - i) + abs(g - j);
+        if (score < spatial_score) {
+            spatial_pred = (f + i)/2;
+            spatial_score = score;
+        }
+    }
+    return spatial_pred;
+}
+
+template<typename T>
+T temporal_predictor(T A, T B, T C, T D, T E, T F,
+                     T G, T H, T I, T J, T K, T L,
+                     T spatial_pred, bool skip_check)
+{
+    T p0 = (C + H) / 2;
+    T p1 = F;
+    T p2 = (D + I) / 2;
+    T p3 = G;
+    T p4 = (E + J) / 2;
+
+    T tdiff0 = abs(D - I);
+    T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
+    T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
+
+    T diff = max3(tdiff0, tdiff1, tdiff2);
+
+    if (!skip_check) {
+        T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
+        T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
+        diff = max3(diff, mini, -maxi);
+    }
+
+    return clamp(spatial_pred, p2 - diff, p2 + diff);
+}
+
+#define T float2
+template <>
+T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
+                       T h, T i, T j, T k, T l, T m, T n)
+{
+    return T(
+        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
+                          h.x, i.x, j.x, k.x, l.x, m.x, n.x),
+        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
+                          h.y, i.y, j.y, k.y, l.y, m.y, n.y)
+    );
+}
+
+template <>
+T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
+                        T G, T H, T I, T J, T K, T L,
+                        T spatial_pred, bool skip_check)
+{
+    return T(
+        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
+                           G.x, H.x, I.x, J.x, K.x, L.x,
+                           spatial_pred.x, skip_check),
+        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
+                           G.y, H.y, I.y, J.y, K.y, L.y,
+                           spatial_pred.y, skip_check)
+    );
+}
+#undef T
+
+/*
+ * YADIF compute
+ */
+
+template <typename T>
+T yadif_compute_spatial(
+    texture2d<float, accesstype> cur,
+    uint2 pos)
+{
+    // Calculate spatial prediction
+    T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
+    T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
+    T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
+    T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
+    T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
+    T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
+    T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
+
+    T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
+    T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
+    T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
+    T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
+    T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
+    T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
+    T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
+
+    return spatial_predictor(a, b, c, d, e, f, g,
+                             h, i, j, k, l, m, n);
+}
+
+template <typename T>
+T yadif_compute_temporal(
+    texture2d<float, accesstype> cur,
+    texture2d<float, accesstype> prev2,
+    texture2d<float, accesstype> prev1,
+    texture2d<float, accesstype> next1,
+    texture2d<float, accesstype> next2,
+    T spatial_pred,
+    bool skip_spatial_check,
+    uint2 pos)
+{
+    // Calculate temporal prediction
+    T A = tex2D<T>(prev2, pos.x, pos.y - 1);
+    T B = tex2D<T>(prev2, pos.x, pos.y + 1);
+    T C = tex2D<T>(prev1, pos.x, pos.y - 2);
+    T D = tex2D<T>(prev1, pos.x, pos.y + 0);
+    T E = tex2D<T>(prev1, pos.x, pos.y + 2);
+    T F = tex2D<T>(cur,   pos.x, pos.y - 1);
+    T G = tex2D<T>(cur,   pos.x, pos.y + 1);
+    T H = tex2D<T>(next1, pos.x, pos.y - 2);
+    T I = tex2D<T>(next1, pos.x, pos.y + 0);
+    T J = tex2D<T>(next1, pos.x, pos.y + 2);
+    T K = tex2D<T>(next2, pos.x, pos.y - 1);
+    T L = tex2D<T>(next2, pos.x, pos.y + 1);
+
+    return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
+                              spatial_pred, skip_spatial_check);
+}
+
+template <typename T>
+T yadif(
+    texture2d<float, access::write> dst,
+    texture2d<float, accesstype> prev,
+    texture2d<float, accesstype> cur,
+    texture2d<float, accesstype> next,
+    constant deintParams& params,
+    uint2 pos)
+{
+    T spatial_pred = yadif_compute_spatial<T>(cur, pos);
+
+    if (params.is_second_field) {
+        return yadif_compute_temporal(cur, prev, cur, next, next, spatial_pred, params.skip_spatial_check, pos);
+    } else {
+        return yadif_compute_temporal(cur, prev, prev, cur, next, spatial_pred, params.skip_spatial_check, pos);
+    }
+}
+
+/*
+ * Kernel dispatch
+ */
+
+kernel void deint(
+    texture2d<float, access::write> dst [[texture(0)]],
+    texture2d<float, accesstype> prev [[texture(1)]],
+    texture2d<float, accesstype> cur  [[texture(2)]],
+    texture2d<float, accesstype> next [[texture(3)]],
+    constant deintParams& params [[buffer(4)]],
+    uint2 pos [[thread_position_in_grid]])
+{
+    if ((pos.x >= dst.get_width()) ||
+        (pos.y >= dst.get_height())) {
+        return;
+    }
+
+    // Don't modify the primary field
+    if (pos.y % 2 == params.parity) {
+        float4 in = cur.read(pos);
+        dst.write(in, pos);
+        return;
+    }
+
+    float2 pred;
+    if (params.channels == 1)
+        pred = float2(yadif<float>(dst, prev, cur, next, params, pos));
+    else
+        pred = yadif<float2>(dst, prev, cur, next, params, pos);
+    dst.write(pred.xyyy, pos);
+}
-- 
2.33.0