[FFmpeg-devel] [PATCH] avcodec/mpeg12dec: extract only one type of CC substream
Stefano Sabatini
stefasab at gmail.com
Mon Mar 11 18:27:17 EET 2024
On date Sunday 2024-03-10 19:18:56 -0500, Marth64 wrote:
> In MPEG-2 user data, there can be different types of Closed Captions
> formats embedded (A53, SCTE-20, or DVD). The current behavior of the
> CC extraction code in the MPEG-2 decoder is to not be aware of
> multiple formats if multiple exist, therefore allowing one format
> to overwrite the other during the extraction process since the CC
> extraction shares one output buffer for the normalized bytes.
>
> This causes sources that have two CC formats to produce flawed output.
> There exist real-world samples which contain both A53 and SCTE-20 captions
> in the same MPEG-2 stream, and that manifest this problem. Example of symptom:
> THANK YOU (expected) --> THTHANANK K YOYOUU (actual)
>
> The solution is to pick only the first CC substream observed with valid bytes,
> and ignore the other types. Additionally, provide an option for users
> to manually "force" a type in the event that this matters for a particular source.
>
> In tandem, I am working on an improvement to src_movie to allow passing decoder
> options (as src_movie via lavfi is the "de facto" way to extract CCs right now).
> This way, users can realize the newly added option.
>
> Signed-off-by: Marth64 <marth64 at proxyid.net>
> ---
> libavcodec/mpeg12dec.c | 49 +++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
> index 3a2f17e508..a42e1c661f 100644
> --- a/libavcodec/mpeg12dec.c
> +++ b/libavcodec/mpeg12dec.c
> @@ -62,6 +62,13 @@
>
> #define A53_MAX_CC_COUNT 2000
>
> +enum Mpeg2ClosedCaptionsFormat {
> + CC_FORMAT_AUTO,
> + CC_FORMAT_A53_PART4,
> + CC_FORMAT_SCTE20,
> + CC_FORMAT_DVD
> +};
> +
> typedef struct Mpeg1Context {
> MpegEncContext mpeg_enc_ctx;
> int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
> @@ -70,6 +77,7 @@ typedef struct Mpeg1Context {
> AVStereo3D stereo3d;
> int has_stereo3d;
> AVBufferRef *a53_buf_ref;
> + enum Mpeg2ClosedCaptionsFormat cc_format;
> uint8_t afd;
> int has_afd;
> int slice_count;
> @@ -1908,7 +1916,8 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
> {
> Mpeg1Context *s1 = avctx->priv_data;
>
> - if (buf_size >= 6 &&
> + if ((!s1->cc_format || s1->cc_format == CC_FORMAT_A53_PART4) &&
> + buf_size >= 6 &&
> p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' &&
> p[4] == 3 && (p[5] & 0x40)) {
> /* extract A53 Part 4 CC data */
> @@ -1927,9 +1936,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
> memcpy(s1->a53_buf_ref->data + old_size, p + 7, cc_count * UINT64_C(3));
>
> avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> + if (!s1->cc_format) {
> + s1->cc_format = CC_FORMAT_A53_PART4;
> + av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is A53 format\n");
> + }
> }
> return 1;
> - } else if (buf_size >= 2 &&
> + } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_SCTE20) &&
> + buf_size >= 2 &&
> p[0] == 0x03 && (p[1]&0x7f) == 0x01) {
> /* extract SCTE-20 CC data */
> GetBitContext gb;
> @@ -1974,9 +1989,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
> }
> }
> avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> + if (!s1->cc_format) {
> + s1->cc_format = CC_FORMAT_SCTE20;
> + av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is SCTE-20 format\n");
> + }
> }
> return 1;
> - } else if (buf_size >= 11 &&
> + } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_DVD) &&
> + buf_size >= 11 &&
> p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) {
> /* extract DVD CC data
> *
> @@ -2034,6 +2055,11 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
> }
> }
> avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> + if (!s1->cc_format) {
> + s1->cc_format = CC_FORMAT_DVD;
> + av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is DVD format\n");
> + }
nit: probably this might be factorized, either with a function or a
macro
> }
> return 1;
> }
> @@ -2598,11 +2624,28 @@ const FFCodec ff_mpeg1video_decoder = {
> },
> };
>
> +#define M2V_OFFSET(x) offsetof(Mpeg1Context, x)
> +#define M2V_PARAM AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> +
> +static const AVOption mpeg2video_options[] = {
> + { "cc_format", "extract a specific Closed Captions format (0=auto)", M2V_OFFSET(cc_format), AV_OPT_TYPE_INT, { .i64 = 0 }, CC_FORMAT_AUTO, CC_FORMAT_DVD, M2V_PARAM },
> + { NULL }
ideally this should be documented in the doc, but it is missing for
this one so probably it's not needed
[...]
Patch looks good to me, but let's wait a few days in case there are
more comments.
More information about the ffmpeg-devel
mailing list