[FFmpeg-devel] [PATCH v2] avfilter/vf_libvmaf: Add metadata propagation support
Yigithan Yigit
yigithanyigitdevel at gmail.com
Thu Sep 12 03:41:06 EEST 2024
Hi,
Thanks for feedbacks!
> On Aug 30, 2024, at 7:44 AM, Kyle Swanson <k at ylo.ph> wrote:
>
> Hi,
>
>
> On Mon, Aug 26, 2024 at 10:51=E2=80=AFAM Yigithan Yigit
> <yigithanyigitdevel at gmail.com <mailto:yigithanyigitdevel at gmail.com>> wrote:
>>
>> ---
>> libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 326 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
>> index f655092b20..e6707aff53 100644
>> --- a/libavfilter/vf_libvmaf.c
>> +++ b/libavfilter/vf_libvmaf.c
>> @@ -27,8 +27,11 @@
>> #include "config_components.h"
>>
>> #include <libvmaf.h>
>> +#include <libvmaf/version.h>
>>
>> #include "libavutil/avstring.h"
>> +#include "libavutil/dict.h"
>> +#include "libavutil/frame.h"
>> #include "libavutil/mem.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/pixdesc.h"
>> @@ -46,6 +49,31 @@
>> #include "libavutil/hwcontext_cuda_internal.h"
>> #endif
>>
>> +#define VMAF_VERSION_INT_VER(major, minor, patch) \
>> + ((major) * 10000 + (minor) * 100 + (patch))
>> +
>> +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR,=
> VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0)
>> +#define CONFIG_LIBVMAF_METADATA_ENABLED 1
>> +#else
>> +#define CONFIG_LIBVMAF_METADATA_ENABLED 0
>> +#endif
>
> You should be able to check pkg_cfg and set this
> CONFIG_LIBVMAF_METADATA_ENABLED define from the configure script.
Fixed locally.
>
>> +
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +#include <stdatomic.h>
>> +
>> +typedef struct FrameList {
>> + AVFrame *frame;
>> + unsigned frame_number;
>> + unsigned propagated_handlers_cnt;
>> + struct FrameList *next;
>> +} FrameList;
>> +
>> +typedef struct CallbackStruct {
>> + struct LIBVMAFContext *s;
>> + FrameList *frame_list;
>> +} CallbackStruct;
>> +#endif
>> +
>> typedef struct LIBVMAFContext {
>> const AVClass *class;
>> FFFrameSync fs;
>> @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext {
>> int n_subsample;
>> char *model_cfg;
>> char *feature_cfg;
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + char *metadata_feature_cfg;
>> + struct {
>> + VmafMetadataConfiguration *metadata_cfgs;
>> + unsigned metadata_cfg_cnt;
>> + } metadata_cfg_list;
>> + CallbackStruct *cb;
>> + atomic_uint outlink_eof;
>> + atomic_uint eof_frame;
>> +#endif
>> VmafContext *vmaf;
>> VmafModel **model;
>> + int flushed;
>> unsigned model_cnt;
>> unsigned frame_cnt;
>> unsigned bpc;
>> @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] =3D {
>> {"n_subsample", "Set interval for frame subsampling used when comput=
> ing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=3D1}, 1, UINT_M=
> AX, FLAGS},
>> {"model", "Set the model to be used for computing vmaf.", =
> OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str=3D"version=3Dv=
> maf_v0.6.1"}, 0, 1, FLAGS},
>> {"feature", "Set the feature to be used for computing vmaf.", =
> OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=3DNULL}, 0, =
> 1, FLAGS},
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + {"metadata_handler", "Set the feature to be propagated as metadata.=
> ", OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str=3D"=
> name=3Dvmaf"}, 0, 1, FLAGS},
>
> Would be better to make this option a bool. When true, propagate all
> registered features and models. You can read the names during init,
> they should be available inside `parse_models()` and
> `parse_features()`.
Yes, but we design vmaf api for individual metrics. Using an identifier doesn’t work unfortunately and as far as I know there is no API for accessing individual features with identifiers. However I made a small patch for that. That brings more generic use case for the API.
https://github.com/Netflix/vmaf/pull/1387
>
>> +#endif
>> { NULL }
>> };
>>
>> @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixe=
> lFormat av_pix_fmt)
>> }
>> }
>>
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned =
> frame_number)
>> +{
>> + FrameList *new_frame =3D av_malloc(sizeof(FrameList));
>> + if (!new_frame)
>> + return AVERROR(ENOMEM);
>> +
>> + new_frame->frame =3D frame;
>> + new_frame->frame_number =3D frame_number;
>> + new_frame->propagated_handlers_cnt =3D 0;
>> + new_frame->next =3D NULL;
>> +
>> + if (*head =3D=3D NULL) {
>> + *head =3D new_frame;
>> + } else {
>> + FrameList *current =3D *head;
>> + while (current->next !=3D NULL) {
>> + current =3D current->next;
>> + }
>> + current->next =3D new_frame;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int remove_from_frame_list(FrameList **frame_list, unsigned frame=
> _number)
>> +{
>> + FrameList *cur =3D *frame_list;
>> + FrameList *prev =3D NULL;
>> +
>> + while (cur) {
>> + if (cur->frame_number =3D=3D frame_number) {
>> + if (prev)
>> + prev->next =3D cur->next;
>> + else
>> + *frame_list =3D cur->next;
>> + av_free(cur);
>> + return 0;
>> + }
>> + prev =3D cur;
>> + cur =3D cur->next;
>> + }
>> +
>> + return AVERROR(EINVAL);
>> +}
>> +
>> +static int free_frame_list(FrameList **frame_list)
>> +{
>> + FrameList *cur =3D *frame_list;
>> + while (cur) {
>> + FrameList *next =3D cur->next;
>> + av_frame_free(&cur->frame);
>> + av_free(cur);
>> + cur =3D next;
>> + }
>> + *frame_list =3D NULL;
>> + return 0;
>> +}
>> +
>> +static FrameList* get_frame_from_frame_list(FrameList *frame_list,
>> + unsigned frame_number)
>> +{
>> + FrameList *cur =3D frame_list;
>> + while (cur) {
>> + if (cur->frame_number =3D=3D frame_number)
>> + return cur;
>> + cur =3D cur->next;
>> + }
>> + return NULL;
>> +}
>> +
>
> Would be great if we didn't need to invent a data structure here. I
> guess av_fifo is no good here because metadata callbacks are not
> guaranteed to come in order?
Yes, metadata callbacks are not coming in order. I looked libavutil and I couldn’t find a data structure for this use case. I am open to suggestions If we don’t prefer to use this data structure.
>
>> +static void set_meta(void *data, VmafMetadata *metadata)
>> +{
>> + int err =3D 0;
>> + FrameList *current_frame =3D NULL;
>> + CallbackStruct *cb =3D data;
>> + char value[128], key[128];
>> + snprintf(value, sizeof(value), "%0.2f", metadata->score);
>> + snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata=
> ->picture_index);
>> +
>> + current_frame =3D get_frame_from_frame_list(cb->frame_list, metadata=
> ->picture_index);
>> + if (!current_frame) {
>> + av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\=
> n",
>> + metadata->picture_index);
>> + return;
>> + }
>> +
>> + err =3D av_dict_set(¤t_frame->frame->metadata, key, value, 0);
>> + if (err < 0)
>> + av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key);
>> +
>> + current_frame->propagated_handlers_cnt++;
>> +
>> + if (current_frame->propagated_handlers_cnt =3D=3D cb->s->metadata_cf=
> g_list.metadata_cfg_cnt) {
>> + FrameList *cur =3D cb->frame_list;
>> + // This code block allows to send frames monotonically
>> + while(cur && cur->frame_number <=3D metadata->picture_index) {
>> + if (cur->propagated_handlers_cnt =3D=3D cb->s->metadata_cfg_=
> list.metadata_cfg_cnt) {
>> + FrameList *next;
>> + // Check outlink is closed
>> + if (!cb->s->outlink_eof) {
>> + av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature=
> : %d, score: %f\n", cur->frame_number, metadata->score);
>> + cb->s->eof_frame =3D cur->frame_number;
>> + if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur=
> ->frame))
>> + return;
>> + }
>> + next =3D cur->next;
>> + remove_from_frame_list(&cb->frame_list, cur->frame_numbe=
> r);
>> + cur =3D next;
>> + }
>> + else
>> + break;
>> + }
>> + }
>> +}
>> +#endif
>> +
>> static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bp=
> c)
>> {
>> const int bytes_per_value =3D bpc > 8 ? 2 : 1;
>> @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs)
>> return AVERROR(ENOMEM);
>> }
>>
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + err =3D add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt);
>> + if (err) {
>> + av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n");
>> + return AVERROR(ENOMEM);
>> + }
>> +#endif
>> +
>> err =3D vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cn=
> t++);
>> if (err) {
>> av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
>> return AVERROR(EINVAL);
>> }
>>
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + if (s->metadata_cfg_list.metadata_cfg_cnt)
>> + return 0;
>> + else
>> + return ff_filter_frame(ctx->outputs[0], dist);
>> +#else
>> return ff_filter_frame(ctx->outputs[0], dist);
>> +#endif
>> }
>>
>> static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
>> @@ -408,6 +582,83 @@ exit:
>> return err;
>> }
>>
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> +static int parse_metadata_handlers(AVFilterContext *ctx)
>> +{
>> + LIBVMAFContext *s =3D ctx->priv;
>> + AVDictionary **dict;
>> + unsigned dict_cnt;
>> + int err =3D 0;
>> +
>> + if (!s->metadata_feature_cfg)
>> + return 0;
>> +
>> + dict_cnt =3D 0;
>> + dict =3D delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt);
>> + if (!dict) {
>> + av_log(ctx, AV_LOG_ERROR,
>> + "could not parse metadata feature config: %s\n",
>> + s->metadata_feature_cfg);
>> + return AVERROR(EINVAL);
>> + }
>> +
>> + for (unsigned i =3D 0; i < dict_cnt; i++) {
>> + VmafMetadataConfiguration *metadata_cfg =3D av_calloc(1, sizeof(=
> *metadata_cfg));
>> + const AVDictionaryEntry *e =3D NULL;
>> + char *feature_name =3D NULL;
>> +
>> + while (e =3D av_dict_iterate(dict[i], e)) {
>> + if (!strcmp(e->key, "name")) {
>> + metadata_cfg->feature_name =3D av_strdup(e->value);
>> + continue;
>> + }
>> + }
>> +
>> + metadata_cfg->data =3D s->cb;
>> + metadata_cfg->callback =3D &set_meta;
>> +
>> + err =3D vmaf_register_metadata_handler(s->vmaf, *metadata_cfg);
>> + if (err) {
>> + av_log(ctx, AV_LOG_ERROR,
>> + "problem during vmaf_register_metadata_handler: %s\n"=
> ,
>> + feature_name);
>> + goto exit;
>> + }
>> +
>> + s->metadata_cfg_list.metadata_cfgs =3D av_realloc(s->metadata_cf=
> g_list.metadata_cfgs,
>> + (s->metadata_cfg_list.metad=
> ata_cfg_cnt + 1) *
>> + sizeof(*s->metadata_cfg_lis=
> t.metadata_cfgs));
>> + if (!s->metadata_cfg_list.metadata_cfgs) {
>> + err =3D AVERROR(ENOMEM);
>> + goto exit;
>> + }
>> +
>> + s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata=
> _cfg_cnt++] =3D *metadata_cfg;
>> + }
>> +
>> +exit:
>> + for (unsigned i =3D 0; i < dict_cnt; i++) {
>> + if (dict[i])
>> + av_dict_free(&dict[i]);
>> + }
>> + av_free(dict);
>> + return err;
>> +}
>> +
>> +static int init_metadata(AVFilterContext *ctx)
>> +{
>> + LIBVMAFContext *s =3D ctx->priv;
>> +
>> + s->cb =3D av_calloc(1, sizeof(CallbackStruct));
>> + if (!s->cb)
>> + return AVERROR(ENOMEM);
>> +
>> + s->cb->s =3D s;
>> +
>> + return 0;
>> +}
>> +#endif
>> +
>> static enum VmafLogLevel log_level_map(int log_level)
>> {
>> switch (log_level) {
>> @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx)
>> if (err)
>> return AVERROR(EINVAL);
>>
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + err =3D init_metadata(ctx);
>> + if (err)
>> + return err;
>> +
>> + err =3D parse_metadata_handlers(ctx);
>> + if (err)
>> + return err;
>> +#endif
>> +
>> err =3D parse_models(ctx);
>> if (err)
>> return err;
>> @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink)
>> static int activate(AVFilterContext *ctx)
>> {
>> LIBVMAFContext *s =3D ctx->priv;
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + // There are 2 cases for metadata propagation:
>> + // 1. Where the case that outlink closes
>> + // 2. Where inlink closes
>> + // Case 1:
>> + // In this case we need check outlink somehow for the status in ev=
> ery iteration.
>> + // If outlink is not wanting frame anymore, we need to proceed wit=
> h uninit with setting inlink.
>> + // But nature of multithreading settting eof inside the activate c=
> all can make sync issues and
>> + // can lead to extra propagated frames. Atomic variables are used =
> to avoid this.
>> + // Case 2:
>> + // This case relatively easy to handle. Because of calculation of =
> vmaf score takes time
>> + // So `do_vmaf` buffers many of frames before sending to outlink t=
> hat causes
>> + // premature close of outlink.
>> + // Checking inlink status is enough and if inlink =3D=3D eof flush=
> ing vmaf is enough for this.
>> + int64_t pts;
>> + int status, ret =3D 0;
>> +
>> + if (ff_outlink_get_status(ctx->outputs[0]))
>> + s->outlink_eof =3D 1;
>> +
>> + if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) &&
>> + ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) {
>> + if (!s->flushed) {
>> + ret =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
>> + if (ret)
>> + av_log(ctx, AV_LOG_ERROR,
>> + "problem flushing libvmaf context.\n");
>> + else
>> + s->flushed =3D 1;
>> + }
>> + }
>> +#endif
>> return ff_framesync_activate(&s->fs);
>> }
>>
>> @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx)
>> LIBVMAFContext *s =3D ctx->priv;
>> int err =3D 0;
>>
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + if (!s->outlink_eof)
>> + s->outlink_eof =3D 1;
>> +#endif
>> +
>> ff_framesync_uninit(&s->fs);
>>
>> if (!s->frame_cnt)
>> goto clean_up;
>>
>> - err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
>> + if (!s->flushed) {
>> + err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
>> + if (err) {
>> + av_log(ctx, AV_LOG_ERROR,
>> + "problem flushing libvmaf context.\n");
>> + } else
>> + s->flushed =3D 1;
>> + }
>> +
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + if (s->metadata_cfg_list.metadata_cfgs) {
>> + for (unsigned i =3D 0; i < s->metadata_cfg_list.metadata_cfg_cnt=
> ; i++) {
>> + av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name);
>> + }
>> + av_free(s->metadata_cfg_list.metadata_cfgs);
>> + }
>> +
>> + err =3D free_frame_list(&s->cb->frame_list);
>> if (err) {
>> av_log(ctx, AV_LOG_ERROR,
>> - "problem flushing libvmaf context.\n");
>> + "problem freeing frame list.\n");
>> }
>> +#endif
>>
>> for (unsigned i =3D 0; i < s->model_cnt; i++) {
>> double vmaf_score;
>> +
>> +#if CONFIG_LIBVMAF_METADATA_ENABLED
>> + err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
> s->pool),
>> + &vmaf_score, 0, s->eof_frame);
>> + av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_fra=
> me, s->frame_cnt - 1);
>> +#else
>> err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(=
> s->pool),
>> &vmaf_score, 0, s->frame_cnt - 1);
>> +#endif
>> +
>> if (err) {
>> av_log(ctx, AV_LOG_ERROR,
>> "problem getting pooled vmaf score.\n");
>> --
>> 2.45.2
>>
>
> Thanks,
> Kyle
Thanks,
Yigithan
More information about the ffmpeg-devel
mailing list