[FFmpeg-devel] [PATCH] avfilter: add (a)segment filters

Mon Aug 9 12:13:42 EEST 2021

Paul B Mahol (12021-08-02):
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
>  doc/filters.texi         |  31 ++++
>  libavfilter/Makefile     |   2 +
>  libavfilter/allfilters.c |   2 +
>  libavfilter/f_segment.c  | 328 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 363 insertions(+)
>  create mode 100644 libavfilter/f_segment.c
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 66c0f87e47..fa72eeed8b 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -25659,6 +25659,37 @@ A processing speed faster than what is possible without these filters cannot
>  be achieved.
>  @end table
>  
> + at section segment, asegment
> +
> +Split single input stream into multiple streams.

Please add:

"This filter does the opposite of concat."

The idea is to make sure the documentation contains as many key words as
possible so that users will find what they are looking for even if they do not
think with the same words as us.

> +
> + at code{segment} works on video frames, @code{asegment} on audio samples.
> +
> +This filter accepts the following options:
> +
> + at table @option
> + at item durations

> +Durations of input at which to split input. Each duration point is split by '|'.

"Durations of the segments output segments, separated by '|'.
The first segment will run from the beginning of the input stream.
The last segment will run until the end of the input stream."

But the code is inconsistent with this: it uses timestamps, not
durations. That need to be fixed: see below.

> +
> + at item frames, samples

> +Exact frame/sample at which to do separations of input video/audio stream. Each point
> +is split by '|'.

"Exact frame/sample count to split the segments."

> + at end table
> +
> + at subsection Examples
> +
> + at itemize
> +
> + at item
> +Split input audio stream into three output audio streams, starting at start of input audio stream
> +and storing that in 1st output audio stream, then following at 60th second and storing than in 2nd
> +output audio stream, and last after 120th second of input audio stream store in 3rd output audio stream:
> + at example
> +asegment=durations="60 | 120"
> + at end example
> +
> + at end itemize
> +
>  @anchor{select}
>  @section select, aselect
>  
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 49c0c8342b..102ce7beff 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -78,6 +78,7 @@ OBJS-$(CONFIG_AREALTIME_FILTER)              += f_realtime.o
>  OBJS-$(CONFIG_ARESAMPLE_FILTER)              += af_aresample.o
>  OBJS-$(CONFIG_AREVERSE_FILTER)               += f_reverse.o
>  OBJS-$(CONFIG_ARNNDN_FILTER)                 += af_arnndn.o
> +OBJS-$(CONFIG_ASEGMENT_FILTER)               += f_segment.o
>  OBJS-$(CONFIG_ASELECT_FILTER)                += f_select.o
>  OBJS-$(CONFIG_ASENDCMD_FILTER)               += f_sendcmd.o
>  OBJS-$(CONFIG_ASETNSAMPLES_FILTER)           += af_asetnsamples.o
> @@ -404,6 +405,7 @@ OBJS-$(CONFIG_SCALE_VULKAN_FILTER)           += vf_scale_vulkan.o vulkan.o
>  OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale_eval.o
>  OBJS-$(CONFIG_SCDET_FILTER)                  += vf_scdet.o
>  OBJS-$(CONFIG_SCROLL_FILTER)                 += vf_scroll.o
> +OBJS-$(CONFIG_SEGMENT_FILTER)                += f_segment.o
>  OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
>  OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
>  OBJS-$(CONFIG_SENDCMD_FILTER)                += f_sendcmd.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index ae74f9c891..73040d2824 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -71,6 +71,7 @@ extern const AVFilter ff_af_arealtime;
>  extern const AVFilter ff_af_aresample;
>  extern const AVFilter ff_af_areverse;
>  extern const AVFilter ff_af_arnndn;
> +extern const AVFilter ff_af_asegment;
>  extern const AVFilter ff_af_aselect;
>  extern const AVFilter ff_af_asendcmd;
>  extern const AVFilter ff_af_asetnsamples;
> @@ -385,6 +386,7 @@ extern const AVFilter ff_vf_scale_vulkan;
>  extern const AVFilter ff_vf_scale2ref;
>  extern const AVFilter ff_vf_scdet;
>  extern const AVFilter ff_vf_scroll;
> +extern const AVFilter ff_vf_segment;
>  extern const AVFilter ff_vf_select;
>  extern const AVFilter ff_vf_selectivecolor;
>  extern const AVFilter ff_vf_sendcmd;
> diff --git a/libavfilter/f_segment.c b/libavfilter/f_segment.c
> new file mode 100644
> index 0000000000..aecadc8224
> --- /dev/null
> +++ b/libavfilter/f_segment.c
> @@ -0,0 +1,328 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +
> +#include "libavutil/avstring.h"
> +#include "libavutil/channel_layout.h"
> +#include "libavutil/common.h"
> +#include "libavutil/log.h"
> +#include "libavutil/mathematics.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/parseutils.h"
> +#include "libavutil/samplefmt.h"
> +
> +#include "audio.h"
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "internal.h"
> +
> +typedef struct SegmentContext {
> +    const AVClass *class;
> +

> +    char *durations_str;

timestamps_str

> +    char *points_str;

> +    int use_durations;

use_timestamps, here and everywhere.

> +
> +    int current_point;
> +    int nb_points;
> +
> +    int64_t *points;
> +
> +    int64_t current_sample;
> +
> +    int64_t start_pts;
> +} SegmentContext;
> +
> +static void count_points(char *item_str, int *nb_items)
> +{
> +    char *p;
> +
> +    if (!item_str)
> +        return;
> +
> +    *nb_items = 1;
> +    for (p = item_str; *p; p++) {
> +        if (*p == '|')
> +            (*nb_items)++;
> +    }
> +}
> +
> +static int parse_points(AVFilterContext *ctx, char *item_str, int nb_points, int64_t *points, int use_durations)
> +{
> +    char *arg, *p = item_str;
> +    char *saveptr = NULL;
> +    int ret = 0;

Allow users to specify durations and lengths as well as timestamps:

    int64_t cur = 0, ref;

> +
> +    for (int i = 0; i < nb_points; i++) {
> +        if (!(arg = av_strtok(p, "|", &saveptr)))
> +            return AVERROR(EINVAL);
> +
> +        p = NULL;

      ref = 0;
      if (*arg == '+') {
	  ref = cur;
	  arg++;
      }

> +
> +        if (use_durations) {
> +            ret = av_parse_time(&points[i], arg, 1);
> +        } else {

> +            if (sscanf(arg, "%"PRId64, &points[i]) != 1)

SCN, not PRI.

This does not test if there is junk at the end, but we can not care.

> +                ret = AVERROR(EINVAL);
> +        }
> +
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid splits supplied: %s\n", arg);
> +            return AVERROR(EINVAL);
> +        }

    points[i] += ref;

> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold int init(AVFilterContext *ctx, enum AVMediaType type)
> +{
> +    SegmentContext *s = ctx->priv;
> +    int ret;
> +
> +    s->start_pts = AV_NOPTS_VALUE;
> +
> +    if (s->durations_str)
> +        count_points(s->durations_str, &s->nb_points);
> +    else
> +        count_points(s->points_str, &s->nb_points);
> +    s->nb_points++;
> +
> +    s->points = av_calloc(s->nb_points, sizeof(*s->points));
> +    if (!s->points)
> +        return AVERROR(ENOMEM);
> +
> +    s->use_durations = s->durations_str != NULL;
> +    ret = parse_points(ctx, s->use_durations ? s->durations_str : s->points_str, s->nb_points - 1, s->points, s->use_durations);

You are duplicating the selection logic. Better:

    char *split_str;

    if (s->timestamps_str && s->points_str) {
	av_log(ctx, AV_LOG_ERROR, "Both timestamps and counts supplied.\n");
	return AVERROR(EINVAL);
    } else if (s->timestamps_str) {
	s->use_timestamps = 1;
	split_str = s->timestamps_str;
    } else if (s->points_str) {
	split_str = s->points_str;
    } else {
	av_log(ctx, AV_LOG_ERROR, "Neither timestamps nor counts supplied.\n");
	return AVERROR(EINVAL);
    }

> +    if (ret < 0)
> +        return ret;
> +
> +    s->points[s->nb_points - 1] = INT64_MAX;
> +
> +    for (int i = 0; i < s->nb_points; i++) {
> +        AVFilterPad pad = { 0 };
> +
> +        pad.type = type;
> +        pad.name = av_asprintf("output%d", i);
> +        if (!pad.name)
> +            return AVERROR(ENOMEM);
> +
> +        if ((ret = ff_insert_outpad(ctx, i, &pad)) < 0) {
> +            av_freep(&pad.name);
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    SegmentContext *s = ctx->priv;
> +    AVRational tb = inlink->time_base;
> +

> +    if (!s->use_durations)
> +        return 0;

I think it would be clearer if the rescaling logic was inside the if rather
than using an early return.

> +
> +    for (int i = 0; i < s->nb_points - 1; i++) {

> +        int64_t pts = av_rescale_q(s->points[i], AV_TIME_BASE_Q, tb);
> +
> +        s->points[i] = pts;

The intermediate variable and empty line are not very useful IMHO.

> +    }
> +
> +    return 0;
> +}
> +
> +static int activate(AVFilterContext *ctx)
> +{
> +    AVFilterLink *inlink = ctx->inputs[0];
> +    SegmentContext *s = ctx->priv;
> +    AVFrame *frame = NULL;
> +    int ret, status;
> +    int64_t pts;
> +
> +    for (int i = s->current_point; i < s->nb_points; i++) {
> +        FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[i], ctx);
> +    }
> +
> +    if (s->use_durations) {

> +        if ((ret = ff_inlink_consume_frame(inlink, &frame)) > 0) {

For audio, the split timestamp could be in the middle of the frame, you need to
use ff_inlink_consume_samples() here too.

> +            if (s->start_pts == AV_NOPTS_VALUE)
> +                s->start_pts = frame->pts;

This is wrong: the initial timestamp of a stream is relevant for the user,
otherwise earlier parts of the code would have negated it.

> +
> +            while (frame->pts - s->start_pts >= s->points[s->current_point]) {
> +                ff_outlink_set_status(ctx->outputs[s->current_point], AVERROR_EOF, frame->pts);
> +                s->current_point++;
> +
> +                if (s->current_point >= s->nb_points) {
> +                    av_frame_free(&frame);

> +                    return AVERROR(EINVAL);

Since the last point is INT64_MAX, if you remove the start_pts logic this can
never happen.

> +                }
> +            }
> +
> +            ret = ff_filter_frame(ctx->outputs[s->current_point], frame);
> +        }
> +    } else {
> +        switch (inlink->type) {
> +        case AVMEDIA_TYPE_VIDEO:
> +            if ((ret = ff_inlink_consume_frame(inlink, &frame)) > 0) {
> +                if (inlink->frame_count_out - 1 >= s->points[s->current_point]) {
> +                    ff_outlink_set_status(ctx->outputs[s->current_point], AVERROR_EOF, frame->pts);
> +                    s->current_point++;
> +                }
> +
> +                if (s->current_point >= s->nb_points) {
> +                    av_frame_free(&frame);
> +                    return AVERROR(EINVAL);
> +                }
> +
> +                ret = ff_filter_frame(ctx->outputs[s->current_point], frame);
> +            }
> +            break;
> +        case AVMEDIA_TYPE_AUDIO:
> +            if ((ret = ff_inlink_consume_samples(inlink, 1,
> +                                                 FFMIN(s->points[s->current_point] - s->current_sample, INT_MAX),
> +                                                 &frame)) > 0) {
> +                s->current_sample += frame->nb_samples;
> +
> +                if (s->current_sample >= s->points[s->current_point]) {
> +                    ff_outlink_set_status(ctx->outputs[s->current_point], AVERROR_EOF, frame->pts);
> +                    s->current_point++;
> +                }
> +
> +                if (s->current_point >= s->nb_points) {
> +                    av_frame_free(&frame);
> +                    return AVERROR(EINVAL);
> +                }
> +
> +                ret = ff_filter_frame(ctx->outputs[s->current_point], frame);
> +            }
> +            break;
> +        }

There is a lot of duplicated code here.

Merge the consuming:

	case AUDIO:
	    max_samples = s->use_timestamps ? ... : ...;
	    ret = ff_inlink_consume_samples(...);
	case VIDEO:
	    ret = ff_inlink_consume_frame(...);

	if (ret < 0)
	    ...

Then merge the splitting by moving the test in a separate function:

	while (current_segment_finished(s)) {
	    ff_outlink_set_status(...);
	    s->current_point++;
	}
	ret = ff_filter_frame(...);

> +    }
> +
> +    if (ret < 0) {
> +        return ret;
> +    } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> +        for (int i = s->current_point; i < s->nb_points; i++)
> +            ff_outlink_set_status(ctx->outputs[i], status, pts);
> +        return 0;
> +    } else {
> +        for (int i = s->current_point; i < s->nb_points; i++) {
> +            if (ff_outlink_frame_wanted(ctx->outputs[i]))
> +                ff_inlink_request_frame(inlink);
> +        }
> +        return 0;
> +    }
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    SegmentContext *s = ctx->priv;
> +
> +    av_freep(&s->points);
> +
> +    for (unsigned i = 0; i < ctx->nb_outputs; i++)
> +        av_freep(&ctx->output_pads[i].name);
> +}
> +
> +#define OFFSET(x) offsetof(SegmentContext, x)
> +#define COMMON_OPTS \
> +    { "durations", "durations of input at which to split input", OFFSET(durations_str),  AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, \
> +
> +#if CONFIG_SEGMENT_FILTER
> +
> +static av_cold int video_init(AVFilterContext *ctx)
> +{
> +    return init(ctx, AVMEDIA_TYPE_VIDEO);
> +}
> +
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
> +static const AVOption segment_options[] = {
> +    { "frames", "frames at which to split input", OFFSET(points_str), AV_OPT_TYPE_STRING,  { .str = "25" }, 0, 0, FLAGS },
> +    COMMON_OPTS
> +    { NULL }
> +};
> +#undef FLAGS
> +
> +AVFILTER_DEFINE_CLASS(segment);
> +
> +static const AVFilterPad segment_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_input,
> +    },
> +    { NULL }
> +};
> +
> +const AVFilter ff_vf_segment = {
> +    .name        = "segment",
> +    .description = NULL_IF_CONFIG_SMALL("Segment video stream."),
> +    .init        = video_init,
> +    .uninit      = uninit,
> +    .priv_size   = sizeof(SegmentContext),
> +    .priv_class  = &segment_class,
> +    .activate    = activate,
> +    .inputs      = segment_inputs,
> +    .outputs     = NULL,
> +    .flags       = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
> +};
> +#endif // CONFIG_SEGMENT_FILTER
> +
> +#if CONFIG_ASEGMENT_FILTER
> +
> +static av_cold int audio_init(AVFilterContext *ctx)
> +{
> +    return init(ctx, AVMEDIA_TYPE_AUDIO);
> +}
> +
> +#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
> +static const AVOption asegment_options[] = {
> +    { "samples", "samples at which to split input", OFFSET(points_str), AV_OPT_TYPE_STRING,  { .str = "44100" }, 0, 0, FLAGS },
> +    COMMON_OPTS
> +    { NULL }
> +};
> +#undef FLAGS
> +
> +AVFILTER_DEFINE_CLASS(asegment);
> +
> +static const AVFilterPad asegment_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_AUDIO,
> +        .config_props = config_input,
> +    },
> +    { NULL }
> +};
> +
> +const AVFilter ff_af_asegment = {
> +    .name        = "asegment",
> +    .description = NULL_IF_CONFIG_SMALL("Segment audio stream."),
> +    .init        = audio_init,
> +    .uninit      = uninit,
> +    .priv_size   = sizeof(SegmentContext),
> +    .priv_class  = &asegment_class,
> +    .activate    = activate,
> +    .inputs      = asegment_inputs,
> +    .outputs     = NULL,
> +    .flags       = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
> +};
> +#endif // CONFIG_ASEGMENT_FILTER

Regards,

-- 
  Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20210809/8c367e1e/attachment.sig>