[FFmpeg-devel] [PATCH v4] avcodec/libvpxenc: add VP9 temporal scalability encoding option
Wonkap Jang
wonkap at google.com
Thu Jan 16 20:31:21 EET 2020
Hi James,
On Thu, Jan 16, 2020 at 10:28 AM Wonkap Jang <wonkap at google.com> wrote:
> This commit reuses the configuration options for VP8 that enables
> temporal scalability for VP9. It also adds a way to enable three
> preset temporal structures (refer to the documentation for more
> detail) that can be used in offline encoding.
> ---
> doc/encoders.texi | 18 ++-
> libavcodec/libvpxenc.c | 252 +++++++++++++++++++++++++++++++++++++----
> 2 files changed, 244 insertions(+), 26 deletions(-)
>
> diff --git a/doc/encoders.texi b/doc/encoders.texi
> index 61e674cf96..88429aed4c 100644
> --- a/doc/encoders.texi
> +++ b/doc/encoders.texi
> @@ -1885,8 +1885,6 @@ Enable error resiliency features.
> Increase sharpness at the expense of lower PSNR.
> The valid range is [0, 7].
>
> - at item VP8-specific options
> - at table @option
> @item ts-parameters
> Sets the temporal scalability configuration using a :-separated list of
> key=value pairs. For example, to specify temporal scalability parameters
> @@ -1894,7 +1892,7 @@ with @code{ffmpeg}:
> @example
> ffmpeg -i INPUT -c:v libvpx -ts-parameters ts_number_layers=3:\
> ts_target_bitrate=250,500,1000:ts_rate_decimator=4,2,1:\
> -ts_periodicity=4:ts_layer_id=0,2,1,2 OUTPUT
> +ts_periodicity=4:ts_layer_id=0,2,1,2:ts_layering_mode=3 OUTPUT
> @end example
> Below is a brief explanation of each of the parameters, please
> refer to @code{struct vpx_codec_enc_cfg} in @code{vpx/vpx_encoder.h} for
> more
> @@ -1911,6 +1909,20 @@ Frame rate decimation factor for each temporal
> layer.
> Length of the sequence defining frame temporal layer membership.
> @item ts_layer_id
> Template defining the membership of frames to temporal layers.
> + at item ts_layering_mode
> +(optional) Selecting the temporal structure from a set of pre-defined
> temporal layering modes.
> +Currently supports the following options.
> + at table @option
> + at item 0
> +No temporal layering flags are provided internally,
> +relies on flags being passed in using metadata in AVFrame.
> + at item 2
> +Two temporal layers. 0-1...
> + at item 3
> +Three temporal layers. 0-2-1-2...; with single reference frame.
> + at item 4
> +Same as option "3", except there is a dependency between
> +the two temporal layer 2 frames within the temporal period.
> @end table
> @end table
>
> diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
> index 0b8a070304..2497f83938 100644
> --- a/libavcodec/libvpxenc.c
> +++ b/libavcodec/libvpxenc.c
> @@ -100,7 +100,9 @@ typedef struct VPxEncoderContext {
> int rc_undershoot_pct;
> int rc_overshoot_pct;
>
> - AVDictionary *vp8_ts_parameters;
> + AVDictionary *vpx_ts_parameters;
> + int *ts_layer_flags;
> + int current_temporal_idx;
>
> // VP9-only
> int lossless;
> @@ -137,6 +139,7 @@ static const char *const ctlidstr[] = {
> [VP8E_SET_CQ_LEVEL] = "VP8E_SET_CQ_LEVEL",
> [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
> [VP8E_SET_SHARPNESS] = "VP8E_SET_SHARPNESS",
> + [VP8E_SET_TEMPORAL_LAYER_ID] = "VP8E_SET_TEMPORAL_LAYER_ID",
> #if CONFIG_LIBVPX_VP9_ENCODER
> [VP9E_SET_LOSSLESS] = "VP9E_SET_LOSSLESS",
> [VP9E_SET_TILE_COLUMNS] = "VP9E_SET_TILE_COLUMNS",
> @@ -144,6 +147,11 @@ static const char *const ctlidstr[] = {
> [VP9E_SET_FRAME_PARALLEL_DECODING] =
> "VP9E_SET_FRAME_PARALLEL_DECODING",
> [VP9E_SET_AQ_MODE] = "VP9E_SET_AQ_MODE",
> [VP9E_SET_COLOR_SPACE] = "VP9E_SET_COLOR_SPACE",
> + [VP9E_SET_SVC_LAYER_ID] = "VP9E_SET_SVC_LAYER_ID",
> +#if VPX_ENCODER_ABI_VERSION >= 12
> + [VP9E_SET_SVC_PARAMETERS] = "VP9E_SET_SVC_PARAMETERS",
> +#endif
> + [VP9E_SET_SVC] = "VP9E_SET_SVC",
> #if VPX_ENCODER_ABI_VERSION >= 11
> [VP9E_SET_COLOR_RANGE] = "VP9E_SET_COLOR_RANGE",
> #endif
> @@ -221,10 +229,22 @@ static av_cold void dump_enc_cfg(AVCodecContext
> *avctx,
> width, "rc_overshoot_pct:", cfg->rc_overshoot_pct);
> av_log(avctx, level, "temporal layering settings\n"
> " %*s%u\n", width, "ts_number_layers:",
> cfg->ts_number_layers);
> - av_log(avctx, level,
> - "\n %*s", width, "ts_target_bitrate:");
> - for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> - av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> + if (avctx->codec_id == AV_CODEC_ID_VP8) {
> + av_log(avctx, level,
> + "\n %*s", width, "ts_target_bitrate:");
> + for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> + av_log(avctx, level,
> + "%u ", cfg->ts_target_bitrate[i]);
> + }
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> + if (avctx->codec_id == AV_CODEC_ID_VP9) {
> + av_log(avctx, level,
> + "\n %*s", width, "layer_target_bitrate:")
> + for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> + av_log(avctx, level,
> + "%u ", cfg->layer_target_bitrate[i]);
> + }
> +#endif
> av_log(avctx, level, "\n");
> av_log(avctx, level,
> "\n %*s", width, "ts_rate_decimator:");
> @@ -346,6 +366,8 @@ static av_cold int vpx_free(AVCodecContext *avctx)
> }
> #endif
>
> + av_freep(&ctx->ts_layer_flags);
> +
> vpx_codec_destroy(&ctx->encoder);
> if (ctx->is_alpha) {
> vpx_codec_destroy(&ctx->encoder_alpha);
> @@ -370,23 +392,151 @@ static void vp8_ts_parse_int_array(int *dest, char
> *value, size_t value_len, int
> }
> }
>
> -static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char
> *key, char *value)
> +static void set_temporal_layer_pattern(int layering_mode,
> + vpx_codec_enc_cfg_t *cfg,
> + int *layer_flags,
> + int *flag_periodicity)
> +{
> + switch (layering_mode) {
> + case 2: {
> + /**
> + * 2-layers, 2-frame period.
> + */
> + int ids[2] = { 0, 1 };
> + cfg->ts_periodicity = 2;
> + *flag_periodicity = 2;
> + cfg->ts_number_layers = 2;
> + cfg->ts_rate_decimator[0] = 2;
> + cfg->ts_rate_decimator[1] = 1;
> + memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> + layer_flags[0] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[1] =
> + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_LAST |
> + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
> + break;
> + }
> + case 3: {
> + /**
> + * 3-layers structure with one reference frame.
> + * This works same as temporal_layering_mode 3.
> + *
> + * 3-layers, 4-frame period.
> + */
> + int ids[4] = { 0, 2, 1, 2 };
> + cfg->ts_periodicity = 4;
> + *flag_periodicity = 4;
> + cfg->ts_number_layers = 3;
> + cfg->ts_rate_decimator[0] = 4;
> + cfg->ts_rate_decimator[1] = 2;
> + cfg->ts_rate_decimator[2] = 1;
> + memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> + /**
> + * 0=L, 1=GF, 2=ARF,
> + * Intra-layer prediction disabled.
> + */
> + layer_flags[0] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[1] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[2] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
> + layer_flags[3] =
> + VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_ARF;
> + break;
> + }
> + case 4: {
> + /**
> + * 3-layers structure.
> + * added dependency between the two TL2 frames (on top of case 3).
> + * 3-layers, 4-frame period.
> + */
> + int ids[4] = { 0, 2, 1, 2 };
> + cfg->ts_periodicity = 4;
> + *flag_periodicity = 4;
> + cfg->ts_number_layers = 3;
> + cfg->ts_rate_decimator[0] = 4;
> + cfg->ts_rate_decimator[1] = 2;
> + cfg->ts_rate_decimator[2] = 1;
> + memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> + /**
> + * 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
> + */
> + layer_flags[0] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[1] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
> + layer_flags[2] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
> + layer_flags[3] =
> + VP8_EFLAG_NO_REF_LAST |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_ARF;
> + break;
> + }
> + default:
> + /**
> + * do not change the layer_flags or the flag_periodicity in this
> case;
> + * it might be that the code is using external flags to be used.
> + */
> + break;
> +
> + }
> +}
> +
> +static int vpx_ts_param_parse(VPxContext *ctx, struct vpx_codec_enc_cfg
> *enccfg,
> + char *key, char *value, enum AVCodecID
> codec_id)
> {
> size_t value_len = strlen(value);
> + int ts_layering_mode = 0;
>
> if (!value_len)
> return -1;
>
> if (!strcmp(key, "ts_number_layers"))
> enccfg->ts_number_layers = strtoul(value, &value, 10);
> - else if (!strcmp(key, "ts_target_bitrate"))
> - vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> - else if (!strcmp(key, "ts_rate_decimator"))
> - vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len,
> VPX_TS_MAX_LAYERS);
> - else if (!strcmp(key, "ts_periodicity"))
> + else if (!strcmp(key, "ts_target_bitrate")) {
> + if (codec_id == AV_CODEC_ID_VP8)
> + vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> + if (codec_id == AV_CODEC_ID_VP9)
> + vp8_ts_parse_int_array(enccfg->layer_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> +#endif
> + } else if (!strcmp(key, "ts_rate_decimator")) {
> + vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value,
> value_len, VPX_TS_MAX_LAYERS);
> + } else if (!strcmp(key, "ts_periodicity")) {
> enccfg->ts_periodicity = strtoul(value, &value, 10);
> - else if (!strcmp(key, "ts_layer_id"))
> + } else if (!strcmp(key, "ts_layer_id")) {
> vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len,
> VPX_TS_MAX_PERIODICITY);
> + } else if (!strcmp(key, "ts_layering_mode")) {
> + /* option for pre-defined temporal structures in function
> set_temporal_layer_pattern. */
> + ts_layering_mode = strtoul(value, &value, 4);
> + }
> +
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> + enccfg->temporal_layering_mode = 1; // only bypass mode is supported
> for now.
> + enccfg->ss_number_layers = 1; // TODO: add spatial scalability
> support.
> +#endif
> + if (ts_layering_mode) {
> + // make sure the ts_layering_mode comes at the end of the
> ts_parameter string to ensure that
> + // correct configuration is done.
> + ctx->ts_layer_flags = av_malloc(sizeof(*ctx->ts_layer_flags) *
> VPX_TS_MAX_PERIODICITY);
> + set_temporal_layer_pattern(ts_layering_mode, enccfg,
> ctx->ts_layer_flags, &enccfg->ts_periodicity);
> + }
>
> return 0;
> }
> @@ -590,7 +740,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
> vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
> #if CONFIG_LIBVPX_VP9_ENCODER
> vpx_codec_caps_t codec_caps = vpx_codec_get_caps(iface);
> + vpx_svc_extra_cfg_t svc_params;
> #endif
> + AVDictionaryEntry* en = NULL;
>
> av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
> av_log(avctx, AV_LOG_VERBOSE, "%s\n", vpx_codec_build_config());
> @@ -648,6 +800,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
> if (avctx->bit_rate) {
> enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1,
> 1000,
> AV_ROUND_NEAR_INF);
> +#if CONFIG_LIBVPX_VP9_ENCODER
> + enccfg.ss_target_bitrate[0] = enccfg.rc_target_bitrate;
> +#endif
> } else {
> // Set bitrate to default value. Also sets CRF to default if
> needed.
> set_vpx_defaults(avctx, &enccfg);
> @@ -757,14 +912,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
>
> enccfg.g_error_resilient = ctx->error_resilient || ctx->flags &
> VP8F_ERROR_RESILIENT;
>
> - if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
> - AVDictionaryEntry* en = NULL;
> - while ((en = av_dict_get(ctx->vp8_ts_parameters, "", en,
> AV_DICT_IGNORE_SUFFIX))) {
> - if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
> - av_log(avctx, AV_LOG_WARNING,
> - "Error parsing option '%s = %s'.\n",
> - en->key, en->value);
> - }
> + while ((en = av_dict_get(ctx->vpx_ts_parameters, "", en,
> AV_DICT_IGNORE_SUFFIX))) {
> + if (vpx_ts_param_parse(ctx, &enccfg, en->key, en->value,
> avctx->codec_id) < 0)
> + av_log(avctx, AV_LOG_WARNING,
> + "Error parsing option '%s = %s'.\n",
> + en->key, en->value);
> }
>
> dump_enc_cfg(avctx, &enccfg);
> @@ -774,7 +926,21 @@ FF_ENABLE_DEPRECATION_WARNINGS
> log_encoder_error(avctx, "Failed to initialize encoder");
> return AVERROR(EINVAL);
> }
> -
> +#if CONFIG_LIBVPX_VP9_ENCODER
> + if (avctx->codec_id == AV_CODEC_ID_VP9 && enccfg.ts_number_layers >
> 1) {
> + memset(&svc_params, 0, sizeof(svc_params));
> + for (int i = 0; i < enccfg.ts_number_layers; ++i) {
> + svc_params.max_quantizers[i] = enccfg.rc_max_quantizer;
> + svc_params.min_quantizers[i] = enccfg.rc_min_quantizer;
> + }
> + svc_params.scaling_factor_num[0] = enccfg.g_h;
> + svc_params.scaling_factor_den[0] = enccfg.g_h;
> +#if VPX_ENCODER_ABI_VERSION >= 12
> + codecctl_int(avctx, VP9E_SET_SVC, 1);
> + codecctl_intp(avctx, VP9E_SET_SVC_PARAMETERS, (int *)&svc_params);
> +#endif
> + }
> +#endif
> if (ctx->is_alpha) {
> enccfg_alpha = enccfg;
> res = vpx_codec_enc_init(&ctx->encoder_alpha, iface,
> &enccfg_alpha, flags);
> @@ -1321,6 +1487,9 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> int64_t timestamp = 0;
> int res, coded_size;
> vpx_enc_frame_flags_t flags = 0;
> + const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
> + vpx_svc_layer_id_t layer_id;
> + int layer_id_valid = 0;
>
> if (frame) {
> const AVFrameSideData *sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_REGIONS_OF_INTEREST);
> @@ -1368,6 +1537,42 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> }
> }
>
> + // this is for encoding with preset temporal layering patterns
> defined in
> + // set_temporal_layer_pattern function.
> + if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
> + if (flags & VPX_EFLAG_FORCE_KF) {
> + // keyframe, reset temporal layering.
> + ctx->current_temporal_idx = 0;
> + flags = VPX_EFLAG_FORCE_KF;
> + } else {
> + flags = 0;
> + }
> +
> + /* get the flags from the temporal layer configuration. */
> + flags |= ctx->ts_layer_flags[ctx->current_temporal_idx];
> +
> + memset(&layer_id, 0, sizeof(layer_id));
> +#if VPX_ENCODER_ABI_VERSION >= 12
> + layer_id.spatial_layer_id = 0;
> +#endif
> + layer_id.temporal_layer_id =
> enccfg->ts_layer_id[ctx->current_temporal_idx];
> +#ifdef VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT
> + layer_id.temporal_layer_id_per_spatial[0] =
> layer_id.temporal_layer_id;
> +#endif
> + layer_id_valid = 1;
> + }
> +
> + if (layer_id_valid) {
> + if (avctx->codec_id == AV_CODEC_ID_VP8) {
> + codecctl_int(avctx, VP8E_SET_TEMPORAL_LAYER_ID,
> layer_id.temporal_layer_id);
> + }
> +#if CONFIG_LIBVPX_VP9_ENCODER && VPX_ENCODER_ABI_VERSION >= 12
> + else if (avctx->codec_id == AV_CODEC_ID_VP9) {
> + codecctl_intp(avctx, VP9E_SET_SVC_LAYER_ID, (int *)&layer_id);
> + }
> +#endif
> + }
> +
> res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
> avctx->ticks_per_frame, flags, ctx->deadline);
> if (res != VPX_CODEC_OK) {
> @@ -1397,6 +1602,8 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> }
> av_base64_encode(avctx->stats_out, b64_size,
> ctx->twopass_stats.buf,
> ctx->twopass_stats.sz);
> + } else if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
> + ctx->current_temporal_idx = (ctx->current_temporal_idx + 1) %
> enccfg->ts_periodicity;
> }
>
> *got_packet = !!coded_size;
> @@ -1435,6 +1642,7 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> { "noise-sensitivity", "Noise sensitivity",
> OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
> { "undershoot-pct", "Datarate undershoot (min) target (%)",
> OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
> { "overshoot-pct", "Datarate overshoot (max) target (%)",
> OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
> + { "ts-parameters", "Temporal scaling configuration using a
> :-separated list of key=value parameters", OFFSET(vpx_ts_parameters),
> AV_OPT_TYPE_DICT, {.str=NULL}, 0, 0, VE}, \
>
> #define LEGACY_OPTIONS \
> {"speed", "", offsetof(VPxContext, cpu_used), AV_OPT_TYPE_INT, {.i64
> = 1}, -16, 16, VE}, \
> @@ -1454,8 +1662,6 @@ static const AVOption vp8_options[] = {
> { "auto-alt-ref", "Enable use of alternate reference "
> "frames (2-pass only)",
> OFFSET(auto_alt_ref), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE},
> { "cpu-used", "Quality/Speed ratio modifier",
> OFFSET(cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
> - { "ts-parameters", "Temporal scaling configuration using a "
> - ":-separated list of key=value parameters",
> OFFSET(vp8_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL}, 0, 0, VE},
> LEGACY_OPTIONS
> { NULL }
> };
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>
>
I have made the changes you requested. Please take a look.
Wonkap
More information about the ffmpeg-devel
mailing list