[FFmpeg-devel] [PATCH v2 1/2] avcodec/libvpxenc: add VP9 temporal scalability encoding option
Wonkap Jang
wonkap at google.com
Wed Dec 18 01:05:44 EET 2019
This commit reuses the configuration options for VP8 that enables
temporal scalability for VP9. It also adds a way to enable three
preset temporal structures (refer to the documentation for more
detail) that can be used in offline encoding.
---
libavcodec/libvpxenc.c | 237 +++++++++++++++++++++++++++++++++++++----
1 file changed, 219 insertions(+), 18 deletions(-)
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 721a052641..7612551a56 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -100,7 +100,9 @@ typedef struct VPxEncoderContext {
int rc_undershoot_pct;
int rc_overshoot_pct;
- char *vp8_ts_parameters;
+ char *vpx_ts_parameters;
+ int *ts_layer_flags;
+ int current_temporal_idx;
// VP9-only
int lossless;
@@ -137,6 +139,7 @@ static const char *const ctlidstr[] = {
[VP8E_SET_CQ_LEVEL] = "VP8E_SET_CQ_LEVEL",
[VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
[VP8E_SET_SHARPNESS] = "VP8E_SET_SHARPNESS",
+ [VP8E_SET_TEMPORAL_LAYER_ID] = "VP8E_SET_TEMPORAL_LAYER_ID",
#if CONFIG_LIBVPX_VP9_ENCODER
[VP9E_SET_LOSSLESS] = "VP9E_SET_LOSSLESS",
[VP9E_SET_TILE_COLUMNS] = "VP9E_SET_TILE_COLUMNS",
@@ -144,6 +147,9 @@ static const char *const ctlidstr[] = {
[VP9E_SET_FRAME_PARALLEL_DECODING] =
"VP9E_SET_FRAME_PARALLEL_DECODING",
[VP9E_SET_AQ_MODE] = "VP9E_SET_AQ_MODE",
[VP9E_SET_COLOR_SPACE] = "VP9E_SET_COLOR_SPACE",
+ [VP9E_SET_SVC_LAYER_ID] = "VP9E_SET_SVC_LAYER_ID",
+ [VP9E_SET_SVC_PARAMETERS] = "VP9E_SET_SVC_PARAMETERS",
+ [VP9E_SET_SVC] = "VP9E_SET_SVC",
#if VPX_ENCODER_ABI_VERSION >= 11
[VP9E_SET_COLOR_RANGE] = "VP9E_SET_COLOR_RANGE",
#endif
@@ -223,8 +229,16 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
" %*s%u\n", width, "ts_number_layers:", cfg->ts_number_layers);
av_log(avctx, level,
"\n %*s", width, "ts_target_bitrate:");
- for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
- av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+ if (avctx->codec_id == AV_CODEC_ID_VP8) {
+ for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+ av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+ }
+#if CONFIG_LIBVPX_VP9_ENCODER
+ if (avctx->codec_id == AV_CODEC_ID_VP9) {
+ for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+ av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
+ }
+#endif
av_log(avctx, level, "\n");
av_log(avctx, level,
"\n %*s", width, "ts_rate_decimator:");
@@ -346,6 +360,8 @@ static av_cold int vpx_free(AVCodecContext *avctx)
}
#endif
+ av_freep(&ctx->ts_layer_flags);
+
vpx_codec_destroy(&ctx->encoder);
if (ctx->is_alpha) {
vpx_codec_destroy(&ctx->encoder_alpha);
@@ -370,23 +386,154 @@ static void vp8_ts_parse_int_array(int *dest, char
*value, size_t value_len, int
}
}
-static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char *key,
char *value)
+static void set_temporal_layer_pattern(int layering_mode,
+ vpx_codec_enc_cfg_t *cfg,
+ int *layer_flags,
+ int *flag_periodicity)
+{
+ switch (layering_mode) {
+ case 2: {
+ /**
+ * 2-layers, 2-frame period.
+ */
+ int ids[2] = { 0, 1 };
+ cfg->ts_periodicity = 2;
+ *flag_periodicity = 2;
+ cfg->ts_number_layers = 2;
+ cfg->ts_rate_decimator[0] = 2;
+ cfg->ts_rate_decimator[1] = 1;
+ memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+ layer_flags[0] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+
+ layer_flags[1] =
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
+ break;
+ }
+ case 3: {
+ /**
+ * 3-layers structure with one reference frame.
+ * This works same as temporal_layering_mode 3.
+ *
+ * 3-layers, 4-frame period.
+ */
+ int ids[4] = { 0, 2, 1, 2 };
+ cfg->ts_periodicity = 4;
+ *flag_periodicity = 4;
+ cfg->ts_number_layers = 3;
+ cfg->ts_rate_decimator[0] = 4;
+ cfg->ts_rate_decimator[1] = 2;
+ cfg->ts_rate_decimator[2] = 1;
+ memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+ /**
+ * 0=L, 1=GF, 2=ARF,
+ * Intra-layer prediction disabled.
+ */
+ layer_flags[0] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ layer_flags[2] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+ layer_flags[1] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ layer_flags[3] =
+ VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ break;
+ }
+ case 4: {
+ /**
+ * 3-layers structure.
+ * added dependency between the two TL2 frames (on top of case 3).
+ * 3-layers, 4-frame period.
+ */
+ int ids[4] = { 0, 2, 1, 2 };
+ cfg->ts_periodicity = 4;
+ *flag_periodicity = 4;
+ cfg->ts_number_layers = 3;
+ cfg->ts_rate_decimator[0] = 4;
+ cfg->ts_rate_decimator[1] = 2;
+ cfg->ts_rate_decimator[2] = 1;
+ memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+ /**
+ * 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
+ */
+ layer_flags[0] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ layer_flags[2] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+ layer_flags[1] =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+ layer_flags[3] =
+ VP8_EFLAG_NO_REF_LAST |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ break;
+ }
+ default:
+ /**
+ * do not change the layer_flags or the flag_periodicity in this
case;
+ * it might be that the code is using external flags to be used.
+ */
+ break;
+
+ }
+}
+
+static int vpx_ts_param_parse(VPxContext *ctx,
+ struct vpx_codec_enc_cfg *enccfg,
+ char *key, char *value,
+ const enum AVCodecID codec_id)
{
size_t value_len = strlen(value);
+ int ts_layering_mode = 0;
if (!value_len)
return -1;
if (!strcmp(key, "ts_number_layers"))
enccfg->ts_number_layers = strtoul(value, &value, 10);
- else if (!strcmp(key, "ts_target_bitrate"))
- vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
value_len, VPX_TS_MAX_LAYERS);
- else if (!strcmp(key, "ts_rate_decimator"))
- vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len,
VPX_TS_MAX_LAYERS);
- else if (!strcmp(key, "ts_periodicity"))
+ else if (!strcmp(key, "ts_target_bitrate")) {
+ if (codec_id == AV_CODEC_ID_VP8)
+ vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
value_len, VPX_TS_MAX_LAYERS);
+#if CONFIG_LIBVPX_VP9_ENCODER
+ if (codec_id == AV_CODEC_ID_VP9)
+ vp8_ts_parse_int_array(enccfg->layer_target_bitrate, value,
value_len, VPX_TS_MAX_LAYERS);
+#endif
+ } else if (!strcmp(key, "ts_rate_decimator")) {
+ vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value,
value_len, VPX_TS_MAX_LAYERS);
+ } else if (!strcmp(key, "ts_periodicity")) {
enccfg->ts_periodicity = strtoul(value, &value, 10);
- else if (!strcmp(key, "ts_layer_id"))
+ } else if (!strcmp(key, "ts_layer_id")) {
vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len,
VPX_TS_MAX_PERIODICITY);
+ } else if (!strcmp(key, "ts_layering_mode")) {
+ /* option for pre-defined temporal structures in function
set_temporal_layer_pattern. */
+ ts_layering_mode = strtoul(value, &value, 4);
+ }
+
+#if CONFIG_LIBVPX_VP9_ENCODER
+ enccfg->temporal_layering_mode = 1; // only bypass mode is being
supported for now.
+ enccfg->ss_number_layers = 1; // making sure the spatial scalability
is off. Will support this in future.
+#endif
+ if (ts_layering_mode) {
+ // make sure the ts_layering_mode comes at the end of the
ts_parameter string to ensure that
+ // correct configuration is done.
+ ctx->ts_layer_flags = av_malloc(sizeof(int) *
VPX_TS_MAX_PERIODICITY);
+ set_temporal_layer_pattern(ts_layering_mode, enccfg,
ctx->ts_layer_flags, &enccfg->ts_periodicity);
+ }
return 0;
}
@@ -590,6 +737,7 @@ static av_cold int vpx_init(AVCodecContext *avctx,
vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
#if CONFIG_LIBVPX_VP9_ENCODER
vpx_codec_caps_t codec_caps = vpx_codec_get_caps(iface);
+ vpx_svc_extra_cfg_t svc_params;
#endif
av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
@@ -611,8 +759,8 @@ static av_cold int vpx_init(AVCodecContext *avctx,
}
#endif
- if(!avctx->bit_rate)
- if(avctx->rc_max_rate || avctx->rc_buffer_size ||
avctx->rc_initial_buffer_occupancy) {
+ if (!avctx->bit_rate)
+ if (avctx->rc_max_rate || avctx->rc_buffer_size ||
avctx->rc_initial_buffer_occupancy) {
av_log( avctx, AV_LOG_ERROR, "Rate control parameters set
without a bitrate\n");
return AVERROR(EINVAL);
}
@@ -648,6 +796,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
if (avctx->bit_rate) {
enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
AV_ROUND_NEAR_INF);
+#if CONFIG_LIBVPX_VP9_ENCODER
+ enccfg.ss_target_bitrate[0] = enccfg.rc_target_bitrate;
+#endif
} else {
// Set bitrate to default value. Also sets CRF to default if
needed.
set_vpx_defaults(avctx, &enccfg);
@@ -757,13 +908,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
enccfg.g_error_resilient = ctx->error_resilient || ctx->flags &
VP8F_ERROR_RESILIENT;
- if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8 &&
ctx->vp8_ts_parameters) {
+ if (ctx->vpx_ts_parameters) {
AVDictionary *dict = NULL;
AVDictionaryEntry* en = NULL;
- if (!av_dict_parse_string(&dict, ctx->vp8_ts_parameters, "=", ":",
0)) {
+ if (!av_dict_parse_string(&dict, ctx->vpx_ts_parameters, "=", ":",
0)) {
while ((en = av_dict_get(dict, "", en,
AV_DICT_IGNORE_SUFFIX))) {
- if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
+ if (vpx_ts_param_parse(ctx, &enccfg, en->key, en->value,
avctx->codec_id) < 0)
av_log(avctx, AV_LOG_WARNING,
"Error parsing option '%s = %s'.\n",
en->key, en->value);
@@ -780,7 +931,21 @@ FF_ENABLE_DEPRECATION_WARNINGS
log_encoder_error(avctx, "Failed to initialize encoder");
return AVERROR(EINVAL);
}
-
+#if CONFIG_LIBVPX_VP9_ENCODER
+ if (avctx->codec_id == AV_CODEC_ID_VP9 && enccfg.ts_number_layers > 1)
{
+ memset(&svc_params, 0, sizeof(svc_params));
+ for (int i = 0; i < enccfg.ts_number_layers; ++i) {
+ svc_params.max_quantizers[i] = enccfg.rc_max_quantizer;
+ svc_params.min_quantizers[i] = enccfg.rc_min_quantizer;
+ }
+ svc_params.scaling_factor_num[0] = enccfg.g_h;
+ svc_params.scaling_factor_den[0] = enccfg.g_h;
+#if VPX_ENCODER_ABI_VERSION >= 12
+ codecctl_int(avctx, VP9E_SET_SVC, 1);
+ codecctl_intp(avctx, VP9E_SET_SVC_PARAMETERS, (int *) &svc_params);
+#endif
+ }
+#endif
if (ctx->is_alpha) {
enccfg_alpha = enccfg;
res = vpx_codec_enc_init(&ctx->encoder_alpha, iface,
&enccfg_alpha, flags);
@@ -1328,6 +1493,9 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket
*pkt,
int64_t timestamp = 0;
int res, coded_size;
vpx_enc_frame_flags_t flags = 0;
+ const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
+ vpx_svc_layer_id_t layer_id;
+ int layer_id_valid = 0;
if (frame) {
const AVFrameSideData *sd = av_frame_get_side_data(frame,
AV_FRAME_DATA_REGIONS_OF_INTEREST);
@@ -1375,6 +1543,38 @@ static int vpx_encode(AVCodecContext *avctx,
AVPacket *pkt,
}
}
+ // this is for encoding with preset temporal layering patterns defined
in
+ // set_temporal_layer_pattern function.
+ if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
+ if (flags & VPX_EFLAG_FORCE_KF) {
+ // keyframe, reset temporal layering.
+ ctx->current_temporal_idx = 0;
+ flags = VPX_EFLAG_FORCE_KF;
+ } else {
+ flags = 0;
+ }
+
+ /* get the flags from the temporal layer configuration. */
+ flags |= ctx->ts_layer_flags[ctx->current_temporal_idx];
+
+ memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
+ layer_id.spatial_layer_id = 0;
+ layer_id.temporal_layer_id =
enccfg->ts_layer_id[ctx->current_temporal_idx];
+ layer_id.temporal_layer_id_per_spatial[0] =
layer_id.temporal_layer_id;
+ layer_id_valid = 1;
+ }
+
+ if (layer_id_valid) {
+ if (avctx->codec_id == AV_CODEC_ID_VP8) {
+ codecctl_int(avctx, VP8E_SET_TEMPORAL_LAYER_ID,
layer_id.temporal_layer_id);
+ }
+#if CONFIG_LIBVPX_VP9_ENCODER
+ else if (avctx->codec_id == AV_CODEC_ID_VP9) {
+ codecctl_intp(avctx, VP9E_SET_SVC_LAYER_ID, (int *) &layer_id);
+ }
+#endif
+ }
+
res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
avctx->ticks_per_frame, flags, ctx->deadline);
if (res != VPX_CODEC_OK) {
@@ -1404,6 +1604,8 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket
*pkt,
}
av_base64_encode(avctx->stats_out, b64_size,
ctx->twopass_stats.buf,
ctx->twopass_stats.sz);
+ } else if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
+ ctx->current_temporal_idx = (ctx->current_temporal_idx + 1) %
enccfg->ts_periodicity;
}
*got_packet = !!coded_size;
@@ -1442,6 +1644,7 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket
*pkt,
{ "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity),
AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
{ "undershoot-pct", "Datarate undershoot (min) target (%)",
OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
{ "overshoot-pct", "Datarate overshoot (max) target (%)",
OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
+ { "ts-parameters", "Temporal scaling configuration using a
:-separated list of key=value parameters", OFFSET(vpx_ts_parameters),
AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, \
#define LEGACY_OPTIONS \
{"speed", "", offsetof(VPxContext, cpu_used), AV_OPT_TYPE_INT, {.i64 =
1}, -16, 16, VE}, \
@@ -1461,8 +1664,6 @@ static const AVOption vp8_options[] = {
{ "auto-alt-ref", "Enable use of alternate reference "
"frames (2-pass only)",
OFFSET(auto_alt_ref), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE},
{ "cpu-used", "Quality/Speed ratio modifier",
OFFSET(cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
- { "ts-parameters", "Temporal scaling configuration using a "
- ":-separated list of key=value parameters",
OFFSET(vp8_ts_parameters), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
LEGACY_OPTIONS
{ NULL }
};
--
2.24.1.735.g03f4e72817-goog
More information about the ffmpeg-devel
mailing list