[FFmpeg-devel] [PATCH v2] avfilter/vf_libvmaf: Add metadata propagation support

Yigithan Yigit yigithanyigitdevel at gmail.com
Thu Sep 12 03:41:06 EEST 2024


Hi,

Thanks for feedbacks!


> On Aug 30, 2024, at 7:44 AM, Kyle Swanson <k at ylo.ph> wrote:
> 
> Hi,
> 
> 
> On Mon, Aug 26, 2024 at 10:51=E2=80=AFAM Yigithan Yigit
> <yigithanyigitdevel at gmail.com <mailto:yigithanyigitdevel at gmail.com>> wrote:
>> 
>> ---
>> libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 326 insertions(+), 2 deletions(-)
>> 
>> diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
>> index f655092b20..e6707aff53 100644
>> --- a/libavfilter/vf_libvmaf.c
>> +++ b/libavfilter/vf_libvmaf.c
>> @@ -27,8 +27,11 @@
>> #include "config_components.h"
>> 
>> #include <libvmaf.h>
>> +#include <libvmaf/version.h>
>> 
>> #include "libavutil/avstring.h"
>> +#include "libavutil/dict.h"
>> +#include "libavutil/frame.h"
>> #include "libavutil/mem.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/pixdesc.h"
>> @@ -46,6 +49,31 @@
>> #include "libavutil/hwcontext_cuda_internal.h"
>> #endif
>> 
>> +#define VMAF_VERSION_INT_VER(major, minor, patch) \
>> +    ((major) * 10000 + (minor) * 100 + (patch))
>> +
>> +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR,=
> VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0)
>> +#define CONFIG_LIBVMAF_METADATA_ENABLED 1
>> +#else
>> +#define CONFIG_LIBVMAF_METADATA_ENABLED 0
>> +#endif
> 
> You should be able to check pkg_cfg and set this
> CONFIG_LIBVMAF_METADATA_ENABLED define from the configure script.

Fixed locally.

> 
>> +
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +#include <stdatomic.h>
>> +
>> +typedef struct FrameList {
>> +    AVFrame *frame;
>> +    unsigned frame_number;
>> +    unsigned propagated_handlers_cnt;
>> +    struct FrameList *next;
>> +} FrameList;
>> +
>> +typedef struct CallbackStruct {
>> +    struct LIBVMAFContext *s;
>> +    FrameList *frame_list;
>> +} CallbackStruct;
>> +#endif
>> +
>> typedef struct LIBVMAFContext {
>>     const AVClass *class;
>>     FFFrameSync fs;
>> @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext {
>>     int n_subsample;
>>     char *model_cfg;
>>     char *feature_cfg;
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    char *metadata_feature_cfg;
>> +    struct {
>> +        VmafMetadataConfiguration *metadata_cfgs;
>> +        unsigned metadata_cfg_cnt;
>> +    } metadata_cfg_list;
>> +    CallbackStruct *cb;
>> +    atomic_uint outlink_eof;
>> +    atomic_uint eof_frame;
>> +#endif
>>     VmafContext *vmaf;
>>     VmafModel **model;
>> +    int flushed;
>>     unsigned model_cnt;
>>     unsigned frame_cnt;
>>     unsigned bpc;
>> @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] =3D {
>>     {"n_subsample", "Set interval for frame subsampling used when comput=
> ing vmaf.",     OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=3D1}, 1, UINT_M=
> AX, FLAGS},
>>     {"model",  "Set the model to be used for computing vmaf.",          =
>                OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str=3D"version=3Dv=
> maf_v0.6.1"}, 0, 1, FLAGS},
>>     {"feature",  "Set the feature to be used for computing vmaf.",      =
>                OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=3DNULL}, 0, =
> 1, FLAGS},
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    {"metadata_handler",  "Set the feature to be propagated as metadata.=
> ",              OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str=3D"=
> name=3Dvmaf"}, 0, 1, FLAGS},
> 
> Would be better to make this option a bool. When true, propagate all
> registered features and models. You can read the names during init,
> they should be available inside `parse_models()` and
> `parse_features()`.

Yes, but we design vmaf api for individual metrics. Using an identifier doesn’t work unfortunately and as far as I know there is no API for accessing individual features with identifiers. However I made a small patch for that. That brings more generic use case for the API.

https://github.com/Netflix/vmaf/pull/1387

> 
>> +#endif
>>     { NULL }
>> };
>> 
>> @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixe=
> lFormat av_pix_fmt)
>>     }
>> }
>> 
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned =
> frame_number)
>> +{
>> +    FrameList *new_frame =3D av_malloc(sizeof(FrameList));
>> +    if (!new_frame)
>> +        return AVERROR(ENOMEM);
>> +
>> +    new_frame->frame =3D frame;
>> +    new_frame->frame_number =3D frame_number;
>> +    new_frame->propagated_handlers_cnt =3D 0;
>> +    new_frame->next =3D NULL;
>> +
>> +    if (*head =3D=3D NULL) {
>> +        *head =3D new_frame;
>> +    } else {
>> +        FrameList *current =3D *head;
>> +        while (current->next !=3D NULL) {
>> +            current =3D current->next;
>> +        }
>> +        current->next =3D new_frame;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int remove_from_frame_list(FrameList **frame_list, unsigned frame=
> _number)
>> +{
>> +    FrameList *cur =3D *frame_list;
>> +    FrameList *prev =3D NULL;
>> +
>> +    while (cur) {
>> +        if (cur->frame_number =3D=3D frame_number) {
>> +            if (prev)
>> +                prev->next =3D cur->next;
>> +            else
>> +                *frame_list =3D cur->next;
>> +            av_free(cur);
>> +            return 0;
>> +        }
>> +        prev =3D cur;
>> +        cur =3D cur->next;
>> +    }
>> +
>> +    return AVERROR(EINVAL);
>> +}
>> +
>> +static int free_frame_list(FrameList **frame_list)
>> +{
>> +    FrameList *cur =3D *frame_list;
>> +    while (cur) {
>> +        FrameList *next =3D cur->next;
>> +        av_frame_free(&cur->frame);
>> +        av_free(cur);
>> +        cur =3D next;
>> +    }
>> +    *frame_list =3D NULL;
>> +    return 0;
>> +}
>> +
>> +static FrameList* get_frame_from_frame_list(FrameList *frame_list,
>> +                                          unsigned frame_number)
>> +{
>> +    FrameList *cur =3D frame_list;
>> +    while (cur) {
>> +        if (cur->frame_number =3D=3D frame_number)
>> +            return cur;
>> +        cur =3D cur->next;
>> +    }
>> +    return NULL;
>> +}
>> +
> 
> Would be great if we didn't need to invent a data structure here. I
> guess av_fifo is no good here because metadata callbacks are not
> guaranteed to come in order?

Yes, metadata callbacks are not coming in order. I looked libavutil and I couldn’t find a data structure for this use case. I am open to suggestions If we don’t prefer to use this data structure.

> 
>> +static void set_meta(void *data, VmafMetadata *metadata)
>> +{
>> +    int err =3D 0;
>> +    FrameList *current_frame =3D NULL;
>> +    CallbackStruct *cb =3D data;
>> +    char value[128], key[128];
>> +    snprintf(value, sizeof(value), "%0.2f", metadata->score);
>> +    snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata=
> ->picture_index);
>> +
>> +    current_frame =3D get_frame_from_frame_list(cb->frame_list, metadata=
> ->picture_index);
>> +    if (!current_frame) {
>> +        av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\=
> n",
>> +               metadata->picture_index);
>> +        return;
>> +    }
>> +
>> +    err =3D av_dict_set(&current_frame->frame->metadata, key, value, 0);
>> +    if (err < 0)
>> +        av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key);
>> +
>> +    current_frame->propagated_handlers_cnt++;
>> +
>> +    if (current_frame->propagated_handlers_cnt =3D=3D cb->s->metadata_cf=
> g_list.metadata_cfg_cnt) {
>> +        FrameList *cur =3D cb->frame_list;
>> +        // This code block allows to send frames monotonically
>> +        while(cur && cur->frame_number <=3D metadata->picture_index) {
>> +            if (cur->propagated_handlers_cnt =3D=3D cb->s->metadata_cfg_=
> list.metadata_cfg_cnt) {
>> +                FrameList *next;
>> +                // Check outlink is closed
>> +                if (!cb->s->outlink_eof) {
>> +                    av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature=
> : %d, score: %f\n", cur->frame_number, metadata->score);
>> +                    cb->s->eof_frame =3D cur->frame_number;
>> +                    if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur=
> ->frame))
>> +                        return;
>> +                }
>> +                next =3D cur->next;
>> +                remove_from_frame_list(&cb->frame_list, cur->frame_numbe=
> r);
>> +                cur =3D next;
>> +            }
>> +            else
>> +                break;
>> +        }
>> +    }
>> +}
>> +#endif
>> +
>> static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bp=
> c)
>> {
>>     const int bytes_per_value =3D bpc > 8 ? 2 : 1;
>> @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs)
>>         return AVERROR(ENOMEM);
>>     }
>> 
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    err =3D add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt);
>> +    if (err) {
>> +        av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n");
>> +        return AVERROR(ENOMEM);
>> +    }
>> +#endif
>> +
>>     err =3D vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cn=
> t++);
>>     if (err) {
>>         av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
>>         return AVERROR(EINVAL);
>>     }
>> 
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    if (s->metadata_cfg_list.metadata_cfg_cnt)
>> +        return 0;
>> +    else
>> +        return ff_filter_frame(ctx->outputs[0], dist);
>> +#else
>>     return ff_filter_frame(ctx->outputs[0], dist);
>> +#endif
>> }
>> 
>> static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
>> @@ -408,6 +582,83 @@ exit:
>>     return err;
>> }
>> 
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +static int parse_metadata_handlers(AVFilterContext *ctx)
>> +{
>> +    LIBVMAFContext *s =3D ctx->priv;
>> +    AVDictionary **dict;
>> +    unsigned dict_cnt;
>> +    int err =3D 0;
>> +
>> +    if (!s->metadata_feature_cfg)
>> +        return 0;
>> +
>> +    dict_cnt =3D 0;
>> +    dict =3D delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt);
>> +    if (!dict) {
>> +        av_log(ctx, AV_LOG_ERROR,
>> +               "could not parse metadata feature config: %s\n",
>> +               s->metadata_feature_cfg);
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    for (unsigned i =3D 0; i < dict_cnt; i++) {
>> +        VmafMetadataConfiguration *metadata_cfg =3D av_calloc(1, sizeof(=
> *metadata_cfg));
>> +        const AVDictionaryEntry *e =3D NULL;
>> +        char *feature_name =3D NULL;
>> +
>> +        while (e =3D av_dict_iterate(dict[i], e)) {
>> +            if (!strcmp(e->key, "name")) {
>> +                metadata_cfg->feature_name =3D av_strdup(e->value);
>> +                continue;
>> +            }
>> +        }
>> +
>> +        metadata_cfg->data =3D s->cb;
>> +        metadata_cfg->callback =3D &set_meta;
>> +
>> +        err =3D vmaf_register_metadata_handler(s->vmaf, *metadata_cfg);
>> +        if (err) {
>> +            av_log(ctx, AV_LOG_ERROR,
>> +                   "problem during vmaf_register_metadata_handler: %s\n"=
> ,
>> +                   feature_name);
>> +            goto exit;
>> +        }
>> +
>> +        s->metadata_cfg_list.metadata_cfgs =3D av_realloc(s->metadata_cf=
> g_list.metadata_cfgs,
>> +                                             (s->metadata_cfg_list.metad=
> ata_cfg_cnt + 1) *
>> +                                             sizeof(*s->metadata_cfg_lis=
> t.metadata_cfgs));
>> +        if (!s->metadata_cfg_list.metadata_cfgs) {
>> +            err =3D AVERROR(ENOMEM);
>> +            goto exit;
>> +        }
>> +
>> +        s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata=
> _cfg_cnt++] =3D *metadata_cfg;
>> +    }
>> +
>> +exit:
>> +    for (unsigned i =3D 0; i < dict_cnt; i++) {
>> +        if (dict[i])
>> +            av_dict_free(&dict[i]);
>> +    }
>> +    av_free(dict);
>> +    return err;
>> +}
>> +
>> +static int init_metadata(AVFilterContext *ctx)
>> +{
>> +    LIBVMAFContext *s =3D ctx->priv;
>> +
>> +    s->cb =3D av_calloc(1, sizeof(CallbackStruct));
>> +    if (!s->cb)
>> +        return AVERROR(ENOMEM);
>> +
>> +    s->cb->s =3D s;
>> +
>> +    return 0;
>> +}
>> +#endif
>> +
>> static enum VmafLogLevel log_level_map(int log_level)
>> {
>>     switch (log_level) {
>> @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx)
>>     if (err)
>>         return AVERROR(EINVAL);
>> 
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    err =3D init_metadata(ctx);
>> +    if (err)
>> +        return err;
>> +
>> +    err =3D parse_metadata_handlers(ctx);
>> +    if (err)
>> +        return err;
>> +#endif
>> +
>>     err =3D parse_models(ctx);
>>     if (err)
>>         return err;
>> @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink)
>> static int activate(AVFilterContext *ctx)
>> {
>>     LIBVMAFContext *s =3D ctx->priv;
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    // There are 2 cases for metadata propagation:
>> +    // 1. Where the case that outlink closes
>> +    // 2. Where inlink closes
>> +    // Case 1:
>> +    //   In this case we need check outlink somehow for the status in ev=
> ery iteration.
>> +    //   If outlink is not wanting frame anymore, we need to proceed wit=
> h uninit with setting inlink.
>> +    //   But nature of multithreading settting eof inside the activate c=
> all can make sync issues and
>> +    //   can lead to extra propagated frames. Atomic variables are used =
> to avoid this.
>> +    // Case 2:
>> +    //   This case relatively easy to handle. Because of calculation of =
> vmaf score takes time
>> +    //   So `do_vmaf` buffers many of frames before sending to outlink t=
> hat causes
>> +    //   premature close of outlink.
>> +    //   Checking inlink status is enough and if inlink =3D=3D eof flush=
> ing vmaf is enough for this.
>> +    int64_t pts;
>> +    int status, ret =3D 0;
>> +
>> +    if (ff_outlink_get_status(ctx->outputs[0]))
>> +        s->outlink_eof =3D 1;
>> +
>> +    if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) &&
>> +        ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) {
>> +        if (!s->flushed) {
>> +            ret =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
>> +            if (ret)
>> +                av_log(ctx, AV_LOG_ERROR,
>> +                       "problem flushing libvmaf context.\n");
>> +            else
>> +                s->flushed =3D 1;
>> +        }
>> +    }
>> +#endif
>>     return ff_framesync_activate(&s->fs);
>> }
>> 
>> @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx)
>>     LIBVMAFContext *s =3D ctx->priv;
>>     int err =3D 0;
>> 
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    if (!s->outlink_eof)
>> +        s->outlink_eof =3D 1;
>> +#endif
>> +
>>     ff_framesync_uninit(&s->fs);
>> 
>>     if (!s->frame_cnt)
>>         goto clean_up;
>> 
>> -    err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
>> +    if (!s->flushed) {
>> +        err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
>> +        if (err) {
>> +            av_log(ctx, AV_LOG_ERROR,
>> +                   "problem flushing libvmaf context.\n");
>> +        } else
>> +            s->flushed =3D 1;
>> +    }
>> +
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +    if (s->metadata_cfg_list.metadata_cfgs) {
>> +        for (unsigned i =3D 0; i < s->metadata_cfg_list.metadata_cfg_cnt=
> ; i++) {
>> +            av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name);
>> +        }
>> +        av_free(s->metadata_cfg_list.metadata_cfgs);
>> +    }
>> +
>> +    err =3D free_frame_list(&s->cb->frame_list);
>>     if (err) {
>>         av_log(ctx, AV_LOG_ERROR,
>> -               "problem flushing libvmaf context.\n");
>> +               "problem freeing frame list.\n");
>>     }
>> +#endif
>> 
>>     for (unsigned i =3D 0; i < s->model_cnt; i++) {
>>         double vmaf_score;
>> +
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +        err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
> s->pool),
>> +                                &vmaf_score, 0, s->eof_frame);
>> +        av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_fra=
> me, s->frame_cnt - 1);
>> +#else
>>         err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
> s->pool),
>>                                 &vmaf_score, 0, s->frame_cnt - 1);
>> +#endif
>> +
>>         if (err) {
>>             av_log(ctx, AV_LOG_ERROR,
>>                    "problem getting pooled vmaf score.\n");
>> --
>> 2.45.2
>> 
> 
> Thanks,
> Kyle

Thanks,
Yigithan



More information about the ffmpeg-devel mailing list