[FFmpeg-devel] [PATCH] avcodec/mpeg12dec: extract only one type of CC substream

Mon Mar 11 18:27:17 EET 2024

On date Sunday 2024-03-10 19:18:56 -0500, Marth64 wrote:
> In MPEG-2 user data, there can be different types of Closed Captions
> formats embedded (A53, SCTE-20, or DVD). The current behavior of the
> CC extraction code in the MPEG-2 decoder is to not be aware of
> multiple formats if multiple exist, therefore allowing one format
> to overwrite the other during the extraction process since the CC
> extraction shares one output buffer for the normalized bytes.
> 
> This causes sources that have two CC formats to produce flawed output.
> There exist real-world samples which contain both A53 and SCTE-20 captions
> in the same MPEG-2 stream, and that manifest this problem. Example of symptom:
> THANK YOU (expected) --> THTHANANK K YOYOUU (actual)
> 
> The solution is to pick only the first CC substream observed with valid bytes,
> and ignore the other types. Additionally, provide an option for users
> to manually "force" a type in the event that this matters for a particular source.
> 
> In tandem, I am working on an improvement to src_movie to allow passing decoder
> options (as src_movie via lavfi is the "de facto" way to extract CCs right now).
> This way, users can realize the newly added option.
> 
> Signed-off-by: Marth64 <marth64 at proxyid.net>
> ---
>  libavcodec/mpeg12dec.c | 49 +++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 46 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
> index 3a2f17e508..a42e1c661f 100644
> --- a/libavcodec/mpeg12dec.c
> +++ b/libavcodec/mpeg12dec.c
> @@ -62,6 +62,13 @@
>  
>  #define A53_MAX_CC_COUNT 2000
>  
> +enum Mpeg2ClosedCaptionsFormat {
> +    CC_FORMAT_AUTO,
> +    CC_FORMAT_A53_PART4,
> +    CC_FORMAT_SCTE20,
> +    CC_FORMAT_DVD
> +};
> +
>  typedef struct Mpeg1Context {
>      MpegEncContext mpeg_enc_ctx;
>      int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
> @@ -70,6 +77,7 @@ typedef struct Mpeg1Context {
>      AVStereo3D stereo3d;
>      int has_stereo3d;
>      AVBufferRef *a53_buf_ref;
> +    enum Mpeg2ClosedCaptionsFormat cc_format;
>      uint8_t afd;
>      int has_afd;
>      int slice_count;
> @@ -1908,7 +1916,8 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>  {
>      Mpeg1Context *s1 = avctx->priv_data;
>  
> -    if (buf_size >= 6 &&
> +    if ((!s1->cc_format || s1->cc_format == CC_FORMAT_A53_PART4) &&
> +        buf_size >= 6 &&
>          p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' &&
>          p[4] == 3 && (p[5] & 0x40)) {
>          /* extract A53 Part 4 CC data */
> @@ -1927,9 +1936,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>                  memcpy(s1->a53_buf_ref->data + old_size, p + 7, cc_count * UINT64_C(3));
>  
>              avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> +            if (!s1->cc_format) {
> +                s1->cc_format = CC_FORMAT_A53_PART4;
> +                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is A53 format\n");
> +            }
>          }
>          return 1;
> -    } else if (buf_size >= 2 &&
> +    } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_SCTE20) &&
> +               buf_size >= 2 &&
>                 p[0] == 0x03 && (p[1]&0x7f) == 0x01) {
>          /* extract SCTE-20 CC data */
>          GetBitContext gb;
> @@ -1974,9 +1989,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>                  }
>              }
>              avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> +            if (!s1->cc_format) {
> +                s1->cc_format = CC_FORMAT_SCTE20;
> +                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is SCTE-20 format\n");
> +            }
>          }
>          return 1;
> -    } else if (buf_size >= 11 &&
> +    } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_DVD) &&
> +               buf_size >= 11 &&
>                 p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) {
>          /* extract DVD CC data
>           *
> @@ -2034,6 +2055,11 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>                  }
>              }

>              avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;

> +
> +            if (!s1->cc_format) {
> +                s1->cc_format = CC_FORMAT_DVD;
> +                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is DVD format\n");
> +            }

nit: probably this might be factorized, either with a function or a
macro

>          }
>          return 1;
>      }
> @@ -2598,11 +2624,28 @@ const FFCodec ff_mpeg1video_decoder = {
>                             },
>  };
>  
> +#define M2V_OFFSET(x) offsetof(Mpeg1Context, x)
> +#define M2V_PARAM     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> +

> +static const AVOption mpeg2video_options[] = {
> +    { "cc_format", "extract a specific Closed Captions format (0=auto)", M2V_OFFSET(cc_format), AV_OPT_TYPE_INT, { .i64 = 0 }, CC_FORMAT_AUTO, CC_FORMAT_DVD, M2V_PARAM },
> +    { NULL }

ideally this should be documented in the doc, but it is missing for
this one so probably it's not needed

[...]

Patch looks good to me, but let's wait a few days in case there are
more comments.