[FFmpeg-devel] [PATCH] Closed caption support for cuviddec with ff_parse_a53_cc preserving a53 data
Dhanish Vijayan
dhanishvijayan at gmail.com
Tue Apr 13 17:35:10 EEST 2021
---
libavcodec/cuviddec.c | 183 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 183 insertions(+)
diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index ec57afdefe..9967cb4c94 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -31,6 +31,8 @@
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
+#include "get_bits.h"
+#include "atsc_a53.h"
#include "avcodec.h"
#include "decode.h"
#include "hwconfig.h"
@@ -46,6 +48,9 @@
#define CUVID_HAS_AV1_SUPPORT
#endif
+#define MAX_FRAME_COUNT 25
+#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8)
+
typedef struct CuvidContext
{
AVClass *avclass;
@@ -89,6 +94,11 @@ typedef struct CuvidContext
cudaVideoCodec codec_type;
cudaVideoChromaFormat chroma_format;
+ AVBufferRef* a53_caption;
+ int a53_caption_size;
+ uint8_t* a53_caption_queue[A53_QUEUE_SIZE];
+ int a53_caption_size_queue[A53_QUEUE_SIZE];
+
CUVIDDECODECAPS caps8, caps10, caps12;
CUVIDPARSERPARAMS cuparseinfo;
@@ -103,6 +113,8 @@ typedef struct CuvidParsedFrame
CUVIDPARSERDISPINFO dispinfo;
int second_field;
int is_deinterlacing;
+ uint8_t* a53_caption;
+ int a53_caption_size;
} CuvidParsedFrame;
#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
@@ -338,6 +350,24 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
+ if (ctx->a53_caption)
+ {
+
+ if (picparams->CurrPicIdx >= A53_QUEUE_SIZE)
+ {
+ av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx);
+ av_freep(&ctx->a53_caption);
+ }
+ else
+ {
+ int pos = picparams->CurrPicIdx;
+ av_freep(&ctx->a53_caption_queue[pos]);
+ ctx->a53_caption_queue[pos] = ctx->a53_caption;
+ ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size;
+ ctx->a53_caption = NULL;
+ }
+ }
+
ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
if (ctx->internal_error < 0)
return 0;
@@ -350,6 +380,20 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
AVCodecContext *avctx = opaque;
CuvidContext *ctx = avctx->priv_data;
CuvidParsedFrame parsed_frame = { { 0 } };
+ uint8_t* a53_caption = NULL;
+ int a53_caption_size = 0;
+
+ if (dispinfo->picture_index >= A53_QUEUE_SIZE)
+ {
+ av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index);
+ }
+ else
+ {
+ int pos = dispinfo->picture_index;
+ a53_caption = ctx->a53_caption_queue[pos];
+ a53_caption_size = ctx->a53_caption_size_queue[pos];
+ ctx->a53_caption_queue[pos] = NULL;
+ }
parsed_frame.dispinfo = *dispinfo;
ctx->internal_error = 0;
@@ -358,11 +402,17 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
+ parsed_frame.a53_caption = a53_caption;
+ parsed_frame.a53_caption_size = a53_caption_size;
av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
} else {
parsed_frame.is_deinterlacing = 1;
+ parsed_frame.a53_caption = a53_caption;
+ parsed_frame.a53_caption_size = a53_caption_size;
av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
if (!ctx->drop_second_field) {
+ parsed_frame.a53_caption = NULL;
+ parsed_frame.a53_caption_size = 0;
parsed_frame.second_field = 1;
av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
}
@@ -382,6 +432,121 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx)
return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
}
+static void cuvid_mpeg_parse_a53(AVCodecContext *avctx, CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+ const uint8_t* buf_end = p + buf_size;
+ for(;;)
+ {
+ uint32_t start_code = -1;
+ p = avpriv_find_start_code(p, buf_end, &start_code);
+ if (start_code > 0x1ff)
+ break;
+ if (start_code != 0x1b2)
+ continue;
+ buf_size = buf_end - p;
+
+ GetBitContext gb_payload;
+ init_get_bits(&gb_payload, p, buf_size);
+
+ uint32_t user_identifier = get_bits_long(&gb_payload, 32);
+ switch (user_identifier) {
+ case MKBETAG('D', 'T', 'G', '1'): // afd_data
+ av_log(avctx, AV_LOG_VERBOSE,
+ "Not implemented ITU-T T35 SEI message (atsc user_identifier = 0x%04x)\n",
+ user_identifier);
+ break;
+ case MKBETAG('G', 'A', '9', '4'): // closed captions
+ {
+ AVBufferRef *avBuffer = NULL;
+ int cc_count = ff_parse_a53_cc(&avBuffer, gb_payload.buffer + get_bits_count(&gb_payload) / 8,
+ buf_size);
+ if (cc_count > 0) {
+ av_freep(&ctx->a53_caption);
+ ctx->a53_caption_size = cc_count * 3;
+ ctx->a53_caption = av_malloc(ctx->a53_caption_size);
+ if (ctx->a53_caption->data) {
+ memcpy(ctx->a53_caption, avBuffer->data, ctx->a53_caption_size);
+ }
+ }
+ }
+ break;
+ default:
+ av_log(avctx, AV_LOG_VERBOSE,
+ "Unsupported User Data Registered ITU-T T35 SEI message (atsc user_identifier = 0x%04x)\n",
+ user_identifier);
+ break;
+ }
+ }
+}
+
+static void cuvid_h264_parse_a53(AVCodecContext *avctx, CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+ const uint8_t* buf_end = p + buf_size;
+ while(p < buf_end)
+ {
+ int i, size, cc_count;
+ uint32_t start_code = -1;
+ uint64_t new_size;
+ p = avpriv_find_start_code(p, buf_end, &start_code);
+ if (start_code > 0x1ff)
+ break;
+ if (start_code != 0x106)
+ continue;
+ buf_size = buf_end - p;
+ if (buf_size < 1 || p[0] != 4)
+ continue;
+ p += 1; buf_size -= 1;
+ size = 0;
+ while (buf_size > 0)
+ {
+ size += p[0];
+ buf_size -= 1;
+ if (*(p++) != 0xFF)
+ break;
+ }
+ if (buf_size <= 0 || buf_size < size)
+ continue;
+ if (size < 7)
+ continue;
+ if (p[0] == 0xFF)
+ {
+ p+=4;
+ size-=4;
+ }
+ else
+ {
+ p+=3;
+ size-=3;
+ }
+
+ GetBitContext gb_payload;
+ init_get_bits(&gb_payload, p, buf_size);
+ uint32_t user_identifier = get_bits_long(&gb_payload, 32);
+ switch (user_identifier) {
+ case MKBETAG('G', 'A', '9', '4'): // closed captions
+ {
+ AVBufferRef *avBuffer = NULL;
+ int cc_count = ff_parse_a53_cc(&avBuffer, gb_payload.buffer + get_bits_count(&gb_payload) / 8,
+ buf_size);
+ if (cc_count > 0) {
+ av_freep(&ctx->a53_caption);
+ ctx->a53_caption_size = cc_count * 3;
+ ctx->a53_caption = av_malloc(ctx->a53_caption_size);
+ if (ctx->a53_caption->data) {
+ memcpy(ctx->a53_caption, avBuffer->data, ctx->a53_caption_size);
+ }
+ }
+ }
+ break;
+ default:
+ av_log(avctx, AV_LOG_VERBOSE,
+ "Unsupported User Data Registered ITU-T T35 SEI message (atsc user_identifier = 0x%04x)\n",
+ user_identifier);
+ break;
+ }
+ }
+}
+
static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
{
CuvidContext *ctx = avctx->priv_data;
@@ -424,6 +589,15 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
+ // assume there is one frame delay (the parser outputs previous picture once it sees new frame data)
+ av_freep(&ctx->a53_caption);
+ if (avpkt && avpkt->size) {
+ if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2)
+ cuvid_mpeg_parse_a53(avctx, ctx, avpkt->data, avpkt->size);
+ else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264){}
+ cuvid_h264_parse_a53(avctx, ctx, avpkt->data, avpkt->size);
+ }
+
if (ret < 0)
goto error;
@@ -627,6 +801,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
if (frame->interlaced_frame)
frame->top_field_first = parsed_frame.dispinfo.top_field_first;
+
+ if (parsed_frame.a53_caption)
+ {
+ AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size);
+ if (sd)
+ memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size);
+ av_freep(&parsed_frame.a53_caption);
+ avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+ }
} else if (ctx->decoder_flushing) {
ret = AVERROR_EOF;
} else {
--
2.25.1
More information about the ffmpeg-devel
mailing list