[FFmpeg-devel] [PATCH v4 5/5] avfilter: add vf_yadif_videotoolbox

Fri Dec 17 23:38:22 EET 2021

On Fri, 17 Dec 2021 12:04:18 -0800
Aman Karmani <ffmpeg at tmm1.net> wrote:

> From: Aman Karmani <aman at tmm1.net>
> 
> deinterlaces CVPixelBuffers, i.e. AV_PIX_FMT_VIDEOTOOLBOX frames
> 
> for example, an interlaced mpeg2 video can be decoded by avcodec,
> uploaded into a CVPixelBuffer, deinterlaced by Metal, and then
> encoded to h264 by VideoToolbox as follows:
> 
>     ffmpeg \
>            -init_hw_device videotoolbox \
>            -i interlaced.ts \
>            -vf hwupload,yadif_videotoolbox \
>            -c:v h264_videotoolbox \
>            -b:v 2000k \
>            -c:a copy \
>            -y progressive.ts
> 
> (note that uploading AVFrame into CVPixelBuffer via hwupload
>  requires 504c60660d3194758823ddd45ceddb86e35d806f)
> 
> this work is sponsored by Fancy Bits LLC
> 
> Reviewed-by: Ridley Combs <rcombs at rcombs.me>
> Signed-off-by: Aman Karmani <aman at tmm1.net>
> ---
>  configure                                     |   1 +
>  libavfilter/Makefile                          |   4 +
>  libavfilter/allfilters.c                      |   1 +
>  libavfilter/metal/vf_yadif_videotoolbox.metal | 269 ++++++++++++
>  libavfilter/vf_yadif_videotoolbox.m           | 406
> ++++++++++++++++++ 5 files changed, 681 insertions(+)
>  create mode 100644 libavfilter/metal/vf_yadif_videotoolbox.metal
>  create mode 100644 libavfilter/vf_yadif_videotoolbox.m
> 
> diff --git a/configure b/configure
> index 32a39f5f5b..d8b07c8e00 100755
> --- a/configure
> +++ b/configure
> @@ -3748,6 +3748,7 @@ vpp_qsv_filter_select="qsvvpp"
>  xfade_opencl_filter_deps="opencl"
>  yadif_cuda_filter_deps="ffnvcodec"
>  yadif_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
> +yadif_videotoolbox_filter_deps="metal corevideo videotoolbox"
>  
>  # examples
>  avio_list_dir_deps="avformat avutil"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 2fe495df28..9a061ba3c8 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -519,6 +519,10 @@ OBJS-$(CONFIG_XSTACK_FILTER)                 +=
> vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER)
> += vf_yadif.o yadif_common.o OBJS-$(CONFIG_YADIF_CUDA_FILTER)
>     += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ yadif_common.o
> cuda/load_helper.o +OBJS-$(CONFIG_YADIF_VIDEOTOOLBOX_FILTER)     +=
> vf_yadif_videotoolbox.o \
> +
> metal/vf_yadif_videotoolbox.metallib.o \
> +                                                metal/utils.o \
> +                                                yadif_common.o
>  OBJS-$(CONFIG_YAEPBLUR_FILTER)               += vf_yaepblur.o
>  OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
>  OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index ec57a2c49c..26f1c73505 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -496,6 +496,7 @@ extern const AVFilter ff_vf_xmedian;
>  extern const AVFilter ff_vf_xstack;
>  extern const AVFilter ff_vf_yadif;
>  extern const AVFilter ff_vf_yadif_cuda;
> +extern const AVFilter ff_vf_yadif_videotoolbox;
>  extern const AVFilter ff_vf_yaepblur;
>  extern const AVFilter ff_vf_zmq;
>  extern const AVFilter ff_vf_zoompan;
> diff --git a/libavfilter/metal/vf_yadif_videotoolbox.metal
> b/libavfilter/metal/vf_yadif_videotoolbox.metal new file mode 100644
> index 0000000000..50783f2ffe
> --- /dev/null
> +++ b/libavfilter/metal/vf_yadif_videotoolbox.metal
> @@ -0,0 +1,269 @@
> +/*
> + * Copyright (C) 2018 Philip Langdale <philipl at overt.org>
> + *               2020 Aman Karmani <aman at tmm1.net>
> + *               2020 Stefan Dyulgerov <stefan.dyulgerov at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include <metal_stdlib>
> +#include <metal_integer>
> +#include <metal_texture>
> +
> +using namespace metal;
> +
> +/*
> + * Parameters
> + */
> +
> +struct deintParams {
> +    uint channels;
> +    uint parity;
> +    uint tff;
> +    bool is_second_field;
> +    bool skip_spatial_check;
> +    int field_mode;
> +};
> +
> +/*
> + * Texture access helpers
> + */
> +
> +#define accesstype access::sample
> +const sampler s(coord::pixel);
> +
> +template <typename T>
> +T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
> +{
> +    return tex.sample(s, float2(x, y)).x;
> +}
> +
> +template <>
> +float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x,
> uint y) +{
> +    return tex.sample(s, float2(x, y)).xy;
> +}
> +
> +template <typename T>
> +T tex2D(texture2d<float, access::read> tex, uint x, uint y)
> +{
> +    return tex.read(uint2(x, y)).x;
> +}
> +
> +template <>
> +float2 tex2D<float2>(texture2d<float, access::read> tex, uint x,
> uint y) +{
> +    return tex.read(uint2(x, y)).xy;
> +}
> +
> +/*
> + * YADIF helpers
> + */
> +
> +template<typename T>
> +T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
> +                    T h, T i, T j, T k, T l, T m, T n)
> +{
> +    T spatial_pred = (d + k)/2;
> +    T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
> +
> +    T score = abs(b - k) + abs(c - l) + abs(d - m);
> +    if (score < spatial_score) {
> +        spatial_pred = (c + l)/2;
> +        spatial_score = score;
> +        score = abs(a - l) + abs(b - m) + abs(c - n);
> +        if (score < spatial_score) {
> +            spatial_pred = (b + m)/2;
> +            spatial_score = score;
> +        }
> +    }
> +    score = abs(d - i) + abs(e - j) + abs(f - k);
> +    if (score < spatial_score) {
> +        spatial_pred = (e + j)/2;
> +        spatial_score = score;
> +        score = abs(e - h) + abs(f - i) + abs(g - j);
> +        if (score < spatial_score) {
> +            spatial_pred = (f + i)/2;
> +            spatial_score = score;
> +        }
> +    }
> +    return spatial_pred;
> +}
> +
> +template<typename T>
> +T temporal_predictor(T A, T B, T C, T D, T E, T F,
> +                     T G, T H, T I, T J, T K, T L,
> +                     T spatial_pred, bool skip_check)
> +{
> +    T p0 = (C + H) / 2;
> +    T p1 = F;
> +    T p2 = (D + I) / 2;
> +    T p3 = G;
> +    T p4 = (E + J) / 2;
> +
> +    T tdiff0 = abs(D - I);
> +    T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
> +    T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
> +
> +    T diff = max3(tdiff0, tdiff1, tdiff2);
> +
> +    if (!skip_check) {
> +        T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
> +        T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
> +        diff = max3(diff, mini, -maxi);
> +    }
> +
> +    return clamp(spatial_pred, p2 - diff, p2 + diff);
> +}
> +
> +#define T float2
> +template <>
> +T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
> +                       T h, T i, T j, T k, T l, T m, T n)
> +{
> +    return T(
> +        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
> +                          h.x, i.x, j.x, k.x, l.x, m.x, n.x),
> +        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
> +                          h.y, i.y, j.y, k.y, l.y, m.y, n.y)
> +    );
> +}
> +
> +template <>
> +T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
> +                        T G, T H, T I, T J, T K, T L,
> +                        T spatial_pred, bool skip_check)
> +{
> +    return T(
> +        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
> +                           G.x, H.x, I.x, J.x, K.x, L.x,
> +                           spatial_pred.x, skip_check),
> +        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
> +                           G.y, H.y, I.y, J.y, K.y, L.y,
> +                           spatial_pred.y, skip_check)
> +    );
> +}
> +#undef T
> +
> +/*
> + * YADIF compute
> + */
> +
> +template <typename T>
> +T yadif_compute_spatial(
> +    texture2d<float, accesstype> cur,
> +    uint2 pos)
> +{
> +    // Calculate spatial prediction
> +    T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
> +    T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
> +    T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
> +    T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
> +    T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
> +    T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
> +    T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
> +
> +    T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
> +    T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
> +    T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
> +    T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
> +    T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
> +    T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
> +    T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
> +
> +    return spatial_predictor(a, b, c, d, e, f, g,
> +                             h, i, j, k, l, m, n);
> +}
> +
> +template <typename T>
> +T yadif_compute_temporal(
> +    texture2d<float, accesstype> cur,
> +    texture2d<float, accesstype> prev2,
> +    texture2d<float, accesstype> prev1,
> +    texture2d<float, accesstype> next1,
> +    texture2d<float, accesstype> next2,
> +    T spatial_pred,
> +    bool skip_spatial_check,
> +    uint2 pos)
> +{
> +    // Calculate temporal prediction
> +    T A = tex2D<T>(prev2, pos.x, pos.y - 1);
> +    T B = tex2D<T>(prev2, pos.x, pos.y + 1);
> +    T C = tex2D<T>(prev1, pos.x, pos.y - 2);
> +    T D = tex2D<T>(prev1, pos.x, pos.y + 0);
> +    T E = tex2D<T>(prev1, pos.x, pos.y + 2);
> +    T F = tex2D<T>(cur,   pos.x, pos.y - 1);
> +    T G = tex2D<T>(cur,   pos.x, pos.y + 1);
> +    T H = tex2D<T>(next1, pos.x, pos.y - 2);
> +    T I = tex2D<T>(next1, pos.x, pos.y + 0);
> +    T J = tex2D<T>(next1, pos.x, pos.y + 2);
> +    T K = tex2D<T>(next2, pos.x, pos.y - 1);
> +    T L = tex2D<T>(next2, pos.x, pos.y + 1);
> +
> +    return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
> +                              spatial_pred, skip_spatial_check);
> +}
> +
> +template <typename T>
> +T yadif(
> +    texture2d<float, access::write> dst,
> +    texture2d<float, accesstype> prev,
> +    texture2d<float, accesstype> cur,
> +    texture2d<float, accesstype> next,
> +    constant deintParams& params,
> +    uint2 pos)
> +{
> +    T spatial_pred = yadif_compute_spatial<T>(cur, pos);
> +
> +    if (params.is_second_field) {
> +        return yadif_compute_temporal(cur, prev, cur, next, next,
> spatial_pred, params.skip_spatial_check, pos);
> +    } else {
> +        return yadif_compute_temporal(cur, prev, prev, cur, next,
> spatial_pred, params.skip_spatial_check, pos);
> +    }
> +}
> +
> +/*
> + * Kernel dispatch
> + */
> +
> +kernel void deint(
> +    texture2d<float, access::write> dst [[texture(0)]],
> +    texture2d<float, accesstype> prev [[texture(1)]],
> +    texture2d<float, accesstype> cur  [[texture(2)]],
> +    texture2d<float, accesstype> next [[texture(3)]],
> +    constant deintParams& params [[buffer(4)]],
> +    uint2 pos [[thread_position_in_grid]])
> +{
> +    if ((pos.x >= dst.get_width()) ||
> +        (pos.y >= dst.get_height())) {
> +        return;
> +    }
> +
> +    // Don't modify the primary field
> +    if (pos.y % 2 == params.parity) {
> +        float4 in = cur.read(pos);
> +        dst.write(in, pos);
> +        return;
> +    }
> +
> +    float2 pred;
> +    if (params.channels == 1)
> +        pred = float2(yadif<float>(dst, prev, cur, next, params,
> pos));
> +    else
> +        pred = yadif<float2>(dst, prev, cur, next, params, pos);
> +    dst.write(pred.xyyy, pos);
> +}
> diff --git a/libavfilter/vf_yadif_videotoolbox.m
> b/libavfilter/vf_yadif_videotoolbox.m new file mode 100644
> index 0000000000..af83a73e89
> --- /dev/null
> +++ b/libavfilter/vf_yadif_videotoolbox.m
> @@ -0,0 +1,406 @@
> +/*
> + * Copyright (C) 2018 Philip Langdale <philipl at overt.org>
> + *               2020 Aman Karmani <aman at tmm1.net>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "internal.h"
> +#include "yadif.h"
> +#include <libavutil/avassert.h>
> +#include <libavutil/hwcontext.h>
> +#include <libavutil/objc.h>
> +#include <libavfilter/metal/utils.h>
> +
> +extern char ff_vf_yadif_videotoolbox_metallib_data[];
> +extern unsigned int ff_vf_yadif_videotoolbox_metallib_len;
> +
> +typedef struct YADIFVTContext {
> +    YADIFContext yadif;
> +
> +    AVBufferRef       *device_ref;
> +    AVBufferRef       *input_frames_ref;
> +    AVHWFramesContext *input_frames;
> +
> +    id<MTLDevice> mtlDevice;
> +    id<MTLLibrary> mtlLibrary;
> +    id<MTLCommandQueue> mtlQueue;
> +    id<MTLComputePipelineState> mtlPipeline;
> +    id<MTLFunction> mtlFunction;
> +    id<MTLBuffer> mtlParamsBuffer;
> +
> +    CVMetalTextureCacheRef textureCache;
> +} YADIFVTContext;
> +
> +struct mtlYadifParams {
> +    uint channels;
> +    uint parity;
> +    uint tff;
> +    bool is_second_field;
> +    bool skip_spatial_check;
> +    int field_mode;
> +};
> +
> +static void call_kernel(AVFilterContext *ctx,
> +                        id<MTLTexture> dst,
> +                        id<MTLTexture> prev,
> +                        id<MTLTexture> cur,
> +                        id<MTLTexture> next,
> +                        int channels,
> +                        int parity,
> +                        int tff)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
> +    id<MTLComputeCommandEncoder> encoder =
> buffer.computeCommandEncoder;
> +    struct mtlYadifParams *params = (struct mtlYadifParams
> *)s->mtlParamsBuffer.contents;
> +    *params = (struct mtlYadifParams){
> +        .channels = channels,
> +        .parity = parity,
> +        .tff = tff,
> +        .is_second_field = !(parity ^ tff),
> +        .skip_spatial_check = s->yadif.mode&2,
> +        .field_mode = s->yadif.current_field
> +    };
> +
> +    [encoder setTexture:dst  atIndex:0];
> +    [encoder setTexture:prev atIndex:1];
> +    [encoder setTexture:cur  atIndex:2];
> +    [encoder setTexture:next atIndex:3];
> +    [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
> +    ff_metal_compute_encoder_dispatch(s->mtlDevice, s->mtlPipeline,
> encoder, dst.width, dst.height);
> +    [encoder endEncoding];
> +
> +    [buffer commit];
> +    [buffer waitUntilCompleted];
> +
> +    ff_objc_release(&encoder);
> +    ff_objc_release(&buffer);
> +}
> +
> +static void filter(AVFilterContext *ctx, AVFrame *dst,
> +                   int parity, int tff)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    YADIFContext *y = &s->yadif;
> +    int i;
> +
> +    for (i = 0; i < y->csp->nb_components; i++) {
> +        int pixel_size, channels;
> +        const AVComponentDescriptor *comp = &y->csp->comp[i];
> +        CVMetalTextureRef prev, cur, next, dest;
> +        id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
> +        MTLPixelFormat format;
> +
> +        if (comp->plane < i) {
> +            // We process planes as a whole, so don't reprocess
> +            // them for additional components
> +            continue;
> +        }
> +
> +        pixel_size = (comp->depth + comp->shift) / 8;
> +        channels = comp->step / pixel_size;
> +        if (pixel_size > 2 || channels > 2) {
> +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> %s\n", y->csp->name);
> +            goto exit;
> +        }
> +        switch (pixel_size) {
> +        case 1:
> +            format = channels == 1 ? MTLPixelFormatR8Unorm :
> MTLPixelFormatRG8Unorm;
> +            break;
> +        case 2:
> +            format = channels == 1 ? MTLPixelFormatR16Unorm :
> MTLPixelFormatRG16Unorm;
> +            break;
> +        default:
> +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> %s\n", y->csp->name);
> +            goto exit;
> +        }
> +        av_log(ctx, AV_LOG_TRACE,
> +               "Deinterlacing plane %d: pixel_size: %d channels:
> %d\n",
> +               comp->plane, pixel_size, channels);
> +
> +        prev = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)y->prev->data[3], i, format);
> +        cur  = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)y->cur->data[3], i, format);
> +        next = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)y->next->data[3], i, format);
> +        dest = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)dst->data[3], i, format); +
> +        tex_prev = CVMetalTextureGetTexture(prev);
> +        tex_cur  = CVMetalTextureGetTexture(cur);
> +        tex_next = CVMetalTextureGetTexture(next);
> +        tex_dest = CVMetalTextureGetTexture(dest);
> +
> +        call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
> +                         channels, parity, tff);
> +
> +        CFRelease(prev);
> +        CFRelease(cur);
> +        CFRelease(next);
> +        CFRelease(dest);
> +    }
> +
> +    CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3],
> (CVPixelBufferRef)dst->data[3]); +
> +    if (y->current_field == YADIF_FIELD_END) {
> +        y->current_field = YADIF_FIELD_NORMAL;
> +    }
> +
> +exit:
> +    return;
> +}
> +
> +static av_cold void yadif_videotoolbox_uninit(AVFilterContext *ctx)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    YADIFContext *y = &s->yadif;
> +
> +    av_frame_free(&y->prev);
> +    av_frame_free(&y->cur);
> +    av_frame_free(&y->next);
> +
> +    av_buffer_unref(&s->device_ref);
> +    av_buffer_unref(&s->input_frames_ref);
> +    s->input_frames = NULL;
> +
> +    ff_objc_release(&s->mtlParamsBuffer);
> +    ff_objc_release(&s->mtlFunction);
> +    ff_objc_release(&s->mtlPipeline);
> +    ff_objc_release(&s->mtlQueue);
> +    ff_objc_release(&s->mtlLibrary);
> +    ff_objc_release(&s->mtlDevice);
> +
> +    if (s->textureCache) {
> +        CFRelease(s->textureCache);
> +        s->textureCache = NULL;
> +    }
> +}
> +
> +static av_cold int yadif_videotoolbox_init(AVFilterContext *ctx)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    NSError *err = nil;
> +    CVReturn ret;
> +
> +    s->mtlDevice = MTLCreateSystemDefaultDevice();
> +    if (!s->mtlDevice) {
> +        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
> +        goto fail;
> +    }
> +
> +    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n",
> s->mtlDevice.name.UTF8String); +
> +    dispatch_data_t libData = dispatch_data_create(
> +        ff_vf_yadif_videotoolbox_metallib_data,
> +        ff_vf_yadif_videotoolbox_metallib_len,
> +        nil,
> +        nil);
> +    s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData
> error:&err];
> +    dispatch_release(libData);
> +    libData = nil;
> +    if (err) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library:
> %s\n", err.description.UTF8String);
> +        goto fail;
> +    }
> +
> +    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
> +    if (!s->mtlFunction) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal
> function!\n");
> +        goto fail;
> +    }
> +
> +    s->mtlQueue = s->mtlDevice.newCommandQueue;
> +    if (!s->mtlQueue) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command
> queue!\n");
> +        goto fail;
> +    }
> +
> +    s->mtlPipeline = [s->mtlDevice
> +        newComputePipelineStateWithFunction:s->mtlFunction
> +        error:&err];
> +    if (err) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute
> pipeline: %s\n", err.description.UTF8String);
> +        goto fail;
> +    }
> +
> +    s->mtlParamsBuffer = [s->mtlDevice
> +        newBufferWithLength:sizeof(struct mtlYadifParams)
> +        options:MTLResourceStorageModeShared];
> +    if (!s->mtlParamsBuffer) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for
> parameters\n");
> +        goto fail;
> +    }
> +
> +    ret = CVMetalTextureCacheCreate(
> +        NULL,
> +        NULL,
> +        s->mtlDevice,
> +        NULL,
> +        &s->textureCache
> +    );
> +    if (ret != kCVReturnSuccess) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create
> CVMetalTextureCache: %d\n", ret);
> +        goto fail;
> +    }
> +
> +    return 0;
> +fail:
> +    yadif_videotoolbox_uninit(ctx);
> +    return AVERROR_EXTERNAL;
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    YADIFVTContext *s = ctx->priv;
> +
> +    if (!inlink->hw_frames_ctx) {
> +        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
> +               "required to associate the processing device.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
> +    if (!s->input_frames_ref) {
> +        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
> +               "failed.\n");
> +        return AVERROR(ENOMEM);
> +    }
> +    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
> +
> +    return 0;
> +}
> +
> +static int config_output(AVFilterLink *link)
> +{
> +    AVHWFramesContext *output_frames;
> +    AVFilterContext *ctx = link->src;
> +    YADIFVTContext *s = ctx->priv;
> +    YADIFContext *y = &s->yadif;
> +    int ret = 0;
> +
> +    av_assert0(s->input_frames);
> +    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
> +    if (!s->device_ref) {
> +        av_log(ctx, AV_LOG_ERROR, "A device reference create "
> +               "failed.\n");
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
> +    if (!link->hw_frames_ctx) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context
> "
> +               "for output.\n");
> +        ret = AVERROR(ENOMEM);
> +        goto exit;
> +    }
> +
> +    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
> +
> +    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
> +    output_frames->sw_format = s->input_frames->sw_format;
> +    output_frames->width     = ctx->inputs[0]->w;
> +    output_frames->height    = ctx->inputs[0]->h;
> +
> +    ret = ff_filter_init_hw_frames(ctx, link, 10);
> +    if (ret < 0)
> +        goto exit;
> +
> +    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
> +    if (ret < 0) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox
> frame "
> +               "context for output: %d\n", ret);
> +        goto exit;
> +    }
> +
> +    link->time_base.num = ctx->inputs[0]->time_base.num;
> +    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
> +    link->w             = ctx->inputs[0]->w;
> +    link->h             = ctx->inputs[0]->h;
> +
> +    if(y->mode & 1)
> +        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
> +                                    (AVRational){2, 1});
> +
> +    if (link->w < 3 || link->h < 3) {
> +        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or
> lines is not supported\n");
> +        ret = AVERROR(EINVAL);
> +        goto exit;
> +    }
> +
> +    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
> +    y->filter = filter;
> +
> +exit:
> +    return ret;
> +}
> +
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +#define CONST(name, help, val, unit) { name, help, 0,
> AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } +
> +static const AVOption yadif_videotoolbox_options[] = {
> +    #define OFFSET(x) offsetof(YADIFContext, x)
> +    { "mode",   "specify the interlacing mode", OFFSET(mode),
> AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
> +    CONST("send_frame",           "send one frame for each frame",
>                                   YADIF_MODE_SEND_FRAME,
> "mode"),
> +    CONST("send_field",           "send one frame for each field",
>                                   YADIF_MODE_SEND_FIELD,
> "mode"),
> +    CONST("send_frame_nospatial", "send one frame for each frame,
> but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL,
> "mode"),
> +    CONST("send_field_nospatial", "send one frame for each field,
> but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL,
> "mode"), +
> +    { "parity", "specify the assumed picture field parity",
> OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1,
> FLAGS, "parity" },
> +    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,
> "parity"),
> +    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,
> "parity"),
> +    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO,
> "parity"), +
> +    { "deint", "specify which frames to deinterlace", OFFSET(deint),
> AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
> +    CONST("all",        "deinterlace all frames",
>    YADIF_DEINT_ALL,        "deint"),
> +    CONST("interlaced", "only deinterlace frames marked as
> interlaced", YADIF_DEINT_INTERLACED, "deint"),
> +    #undef OFFSET
> +
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(yadif_videotoolbox);
> +
> +static const AVFilterPad yadif_videotoolbox_inputs[] = {
> +    {
> +        .name          = "default",
> +        .type          = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame  = ff_yadif_filter_frame,
> +        .config_props  = config_input,
> +    },
> +};
> +
> +static const AVFilterPad yadif_videotoolbox_outputs[] = {
> +    {
> +        .name          = "default",
> +        .type          = AVMEDIA_TYPE_VIDEO,
> +        .request_frame = ff_yadif_request_frame,
> +        .config_props  = config_output,
> +    },
> +};
> +
> +AVFilter ff_vf_yadif_videotoolbox = {
> +    .name           = "yadif_videotoolbox",
> +    .description    = NULL_IF_CONFIG_SMALL("YADIF for VideoToolbox
> frames using Metal compute"),
> +    .priv_size      = sizeof(YADIFVTContext),
> +    .priv_class     = &yadif_videotoolbox_class,
> +    .init           = yadif_videotoolbox_init,
> +    .uninit         = yadif_videotoolbox_uninit,
> +    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
> +    FILTER_INPUTS(yadif_videotoolbox_inputs),
> +    FILTER_OUTPUTS(yadif_videotoolbox_outputs),
> +    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};

LGTM for the general part. I'll take your work that the metal specific
parts work as intended.

--phil