[FFmpeg-devel] [PATCH 2/2] Adding closed caption decoder
Anshul
anshul.ffmpeg at gmail.com
Fri Dec 12 11:53:57 CET 2014
On 12/06/2014 04:24 PM, Anshul wrote:
> On 12/06/2014 04:22 PM, Anshul wrote:
>> On 12/05/2014 07:56 PM, Nicolas George wrote:
>>> Hi. I had time to look at the code with some more details. Comments are
>>> below.
>>>
>>>> >From 31f69ccfb45247a7cc203084a931b8523284aa13 Mon Sep 17 00:00:00 2001
>>>> From: Anshul Maheshwari <anshul.ffmpeg at gmail.com>
>>>> Date: Wed, 3 Dec 2014 23:37:22 +0530
>>>> Subject: [PATCH 2/2] Adding Closed caption Decoder
>>>>
>>>> ---
>>>> libavcodec/Makefile | 1 +
>>>> libavcodec/allcodecs.c | 1 +
>>>> libavcodec/ccaption_dec.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++
>>>> 3 files changed, 320 insertions(+)
>>>> create mode 100644 libavcodec/ccaption_dec.c
>>>>
>>>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>>>> index fa0f53d..bbc516d 100644
>>>> --- a/libavcodec/Makefile
>>>> +++ b/libavcodec/Makefile
>>>> @@ -173,6 +173,7 @@ OBJS-$(CONFIG_BRENDER_PIX_DECODER) += brenderpix.o
>>>> OBJS-$(CONFIG_C93_DECODER) += c93.o
>>>> OBJS-$(CONFIG_CAVS_DECODER) += cavs.o cavsdec.o cavsdsp.o \
>>>> cavsdata.o mpeg12data.o
>>>> +OBJS-$(CONFIG_CCAPTION_DECODER) += ccaption_dec.o
>>>> OBJS-$(CONFIG_CDGRAPHICS_DECODER) += cdgraphics.o
>>>> OBJS-$(CONFIG_CDXL_DECODER) += cdxl.o
>>>> OBJS-$(CONFIG_CINEPAK_DECODER) += cinepak.o
>>>> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
>>>> index 0d39d33..8c07388 100644
>>>> --- a/libavcodec/allcodecs.c
>>>> +++ b/libavcodec/allcodecs.c
>>>> @@ -480,6 +480,7 @@ void avcodec_register_all(void)
>>>> /* subtitles */
>>>> REGISTER_ENCDEC (SSA, ssa);
>>>> REGISTER_ENCDEC (ASS, ass);
>>>> + REGISTER_DECODER(CCAPTION, ccaption);
>>>> REGISTER_ENCDEC (DVBSUB, dvbsub);
>>>> REGISTER_ENCDEC (DVDSUB, dvdsub);
>>>> REGISTER_DECODER(JACOSUB, jacosub);
>>>> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
>>>> new file mode 100644
>>>> index 0000000..0a7dfd8
>>>> --- /dev/null
>>>> +++ b/libavcodec/ccaption_dec.c
>>>> @@ -0,0 +1,318 @@
>>>> +/*
>>>> + * Closed Caption Decoding
>>>> + * Copyright (c) 2014 Anshul Maheshwari
>>>> + *
>>>> + * This file is part of FFmpeg.
>>>> + *
>>>> + * FFmpeg is free software; you can redistribute it and/or
>>>> + * modify it under the terms of the GNU Lesser General Public
>>>> + * License as published by the Free Software Foundation; either
>>>> + * version 2.1 of the License, or (at your option) any later version.
>>>> + *
>>>> + * FFmpeg is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>>> + * Lesser General Public License for more details.
>>>> + *
>>>> + * You should have received a copy of the GNU Lesser General Public
>>>> + * License along with FFmpeg; if not, write to the Free Software
>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>>>> + */
>>>> +
>>>> +#include "avcodec.h"
>>>> +#include "ass.h"
>>>> +
>>>> +#define SCREEN_ROWS 15
>>>> +#define SCREEN_COLUMNS 32
>>>> +
>>>> +#define SET_FLAG(var, val) ( var |= ( 1 << (val) ) )
>>>> +#define UNSET_FLAG(var, val) ( var &= ~( 1 << (val)) )
>>>> +#define CHECK_FLAG(var, val) ( (var) & (1 << (val) ) )
>>>> +
>>>> +enum cc_mode {
>>>> + CCMODE_POPON,
>>>> + CCMODE_PAINTON,
>>>> + CCMODE_ROLLUP_2,
>>>> + CCMODE_ROLLUP_3,
>>>> + CCMODE_ROLLUP_4,
>>>> + CCMODE_TEXT,
>>>> +};
>>>> +
>>>> +struct Screen {
>>>> + uint8_t characters[SCREEN_ROWS][SCREEN_COLUMNS+1];
>>> Maybe add a comment about the +1?
>> done
>>>> + /*
>>>> + * row used flag will be 0 when none in use other wise it will have its
>>>> + * corrosponding bit high.
>>> Language nit. I suggest: "Bitmask of used rows; if a bit is not set, the
>>> corresponding row is not used."
>> done
>>>> + * for setting row 1 use row | (1 >> 1)
>>>> + * for setting row 15 use row | (1 >> 15)
>>> Are you sure that is ">>" and not "<<"? And is it a good idea to number
>>> starting from 1?
>> It was just commented in wrong way, corrected comment.
>>>> + */
>>>> + int16_t row_used;
>>>> +};
>>>> +
>>>> +
>>>> +typedef struct CCaptionSubContext {
>>>> + int parity_table[256];
>>>> + int row_cnt;
>>>> + struct Screen screen[2];
>>>> + int active_screen;
>>>> + int cursor_row;
>>>> + int cursor_column;
>>>> + AVBPrint buffer;
>>>> + /* erase display memory */
>>>> + int edm;
>>> It is used only a handful of times: I suggest a more meaningful name instead
>>> of a comment: "erase_disp_mem" for example.
>> done
>>>> + int rollup;
>>>> + enum cc_mode mode;
>>>> + int64_t start_time;
>>>> + /* visible screen time */
>>>> + int64_t startv_time;
>>> Is the v a typo?
>> no it was written to express v = visible, (visible screen start time)
>>>> + int64_t end_time;
>>>> + char prev_cmd[2];
>>> The code uses various types for these values: char, unsigned char, uint8_t.
>>> I suggest to stick with uint8_t if it works.
>> changed unsigned char to uint8_t
>>>> +}CCaptionSubContext;
>>>> +
>>>> +static void build_parity_table(int *parity_table)
>>>> +{
>>>> + unsigned int byte;
>>> Inconsistent indentation.
>> corrected
>>>> + int parity_v;
>>>> + for (byte = 0; byte <= 127; byte++) {
>>>> + parity_v = av_popcount(byte & 0x7f) & 1;
>>> The & 0x7f is redundant.
>> removed
>>>> + parity_table[byte] = parity_v;
>>>> + parity_table[byte | 0x80] = !parity_v;
>>>> + }
>>>> +}
>>>> +
>>>> +static av_cold int init_decoder(AVCodecContext *avctx)
>>>> +{
>>>> +
>>>> + CCaptionSubContext *ctx = avctx->priv_data;
>>>> +
>>>> + build_parity_table(ctx->parity_table);
>>>> + ctx->row_cnt = 0;
>>>> + av_bprint_init(&ctx->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
>>>> + ctx->edm = 0;
>>>> + /* taking by default roll up to 2 */
>>>> + ctx->rollup = 2;
>>>> + /* set active screen as 0 */
>>>> + ctx->active_screen = 0;
>>>> + memset(ctx->prev_cmd,0,2);
>>>> + ff_ass_subtitle_header_default(avctx);
>>> All the "= 0" and the memset are unnecessary.
>> removed
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static av_cold int close_decoder(AVCodecContext *avctx)
>>>> +{
>>>> + CCaptionSubContext *ctx = avctx->priv_data;
>>>> + av_bprint_finalize( &ctx->buffer, NULL);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +/**
>>>> + * This function after validating parity bit, also remove it from data pair.
>>>> + */
>>>> +static int validate_cc_data_pair (unsigned char *cc_data_pair, int *parity_table)
>>>> +{
>>>> + unsigned char cc_valid = (*cc_data_pair & 4) >>2;
>>>> + unsigned char cc_type = *cc_data_pair & 3;
>>>> +
>>>> + if (!cc_valid)
>>>> + return -1;
>>>> +
>>>> + // if EIA-608 data then verify parity.
>>>> + if (cc_type==0 || cc_type==1) {
>>>> + if (!parity_table[cc_data_pair[2]]) {
>>>> + // If the second byte doesn't pass parity, ignore pair
>>>> + return -1;
>>> Meaningful error codes? AVERROR(EINVAL) or AVERROR_INVALID_DATA? Even when
>>> they are not supposed to, the -1 have a habit of reaching the user. Same
>>> below.
>> done
>>>> + }
>>>> + if (!parity_table[cc_data_pair[1]]) {
>>>> + // The first byte doesn't pass parity, we replace it with a solid blank
>>>> + // and process the pair.
>>>> + cc_data_pair[1]=0x7F;
>>>> + }
>>>> + }
>>>> +
>>>> + //Skip non-data
>>>> + if( (cc_data_pair[0] == 0xFA || cc_data_pair[0] == 0xFC || cc_data_pair[0] == 0xFD )
>>>> + && (cc_data_pair[1] & 0x7F) == 0 && (cc_data_pair[2] & 0x7F) == 0)
>>>> + return -1;
>>>> +
>>>> + //skip 708 data
>>>> + if(cc_type == 3 || cc_type == 2 )
>>>> + return -1;
>>>> +
>>>> + /* remove parity bit */
>>>> + cc_data_pair[1] &= 0x7F;
>>>> + cc_data_pair[2] &= 0x7F;
>>>> +
>>>> +
>>>> + return 0;
>>>> +
>>>> +}
>>>> +static void handle_pac( CCaptionSubContext *ctx, uint8_t hi, uint8_t lo )
>>>> +{
>>>> + static const int row_map[] = {
>>> Could be int8_t to save memory.
>> done
>>>> + 11, -1, 1, 2, 3, 4, 12, 13, 14, 15, 5, 6, 7, 8, 9, 10
>>>> + };
>>>> + const int index = ( (hi<<1) & 0x0e) | ( (lo>>5) & 0x01 );
>>>> +
>>> It looks like the values come from external data with subtle arithmetic, so
>>> I suggest:
>>>
>>> av_assert2((unsigned)index < sizeof(row_map));
>> I am using 3 bit from hi and 1 bit from lo data, so I am using total 4 bit
>> which mean only 0-15 possible. and for unknown value not defined in spec
>> I have kept the value as -1 on that index.
>> so no need to check.
>>>> + if( row_map[index] <= 0 )
>>>> + return;
>>>> +
>>>> + ctx->cursor_row = row_map[index] - 1;
>>>> + ctx->cursor_column = 0;
>>>> +
>>>> +}
>>>> +
>>>> +/**
>>>> + * @param pts it is required to set end time
>>>> + */
>>>> +static void handle_edm(CCaptionSubContext *ctx,int64_t pts)
>>>> +{
>>>> + int i;
>>>> + struct Screen *screen = ctx->screen + ctx->active_screen;
>>>> +
>>>> + ctx->start_time = ctx->startv_time;
>>>> + for( i = 0; screen->row_used && i < SCREEN_ROWS; i++)
>>>> + {
>>>> + if(CHECK_FLAG(screen->row_used,i)) {
>>>> + av_bprint_append_data(&ctx->buffer, screen->characters[i], strlen(screen->characters[i]));
>>>> + av_bprint_append_data(&ctx->buffer, "\\N",2);
>>>> + UNSET_FLAG(screen->row_used, i);
>>>> + }
>>>> + }
>>>> + ctx->startv_time = pts;
>>>> + ctx->edm = 1;
>>>> + ctx->end_time = pts;
>>>> +}
>>>> +static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
>>>> +{
>>>> + ctx->active_screen = !ctx->active_screen;
>>>> + ctx->startv_time = pts;
>>>> +}
>>>> +static struct Screen *get_writing_screen(CCaptionSubContext *ctx)
>>>> +{
>>>> + switch (ctx->mode) {
>>>> + case CCMODE_POPON:
>>>> + // use Inactive screen
>>>> + return ctx->screen + !ctx->active_screen;
>>>> + case CCMODE_PAINTON:
>>>> + case CCMODE_ROLLUP_2:
>>>> + case CCMODE_ROLLUP_3:
>>>> + case CCMODE_ROLLUP_4:
>>>> + case CCMODE_TEXT:
>>>> + // use active screen
>>>> + return ctx->screen + !ctx->active_screen;
>>>> + }
>>>> + /* It was never an option */
>>>> + return NULL;
>>>> +}
>>>> +static void handle_char(CCaptionSubContext *ctx, char hi, char lo, int64_t pts)
>>>> +{
>>>> + struct Screen *screen = get_writing_screen(ctx);
>>>> + char *row = screen->characters[ctx->cursor_row] + ctx->cursor_column;
>>> Here too: av_assert0((unsigned)ctx->cursor_row < SCREEN_ROWS) and same for
>>> column.
>> We can never get row greater then SCREEN_ROWS, I have not implemented to
>> read column value from pac. so left it as it is.
>>>> +
>>>> + SET_FLAG(screen->row_used,ctx->cursor_row);
>>>> +
>>>> + *row++ = hi;
>>>> + ctx->cursor_column++;
>>>> + if(lo) {
>>>> + *row++ = lo;
>>>> + ctx->cursor_column++;
>>>> + }
>>>> + *row = 0;
>>>> + /* reset prev command since character can repeat */
>>>> + ctx->prev_cmd[0] = 0;
>>>> + ctx->prev_cmd[1] = 0;
>>>> +}
>>>> +static int process_cc608(CCaptionSubContext *ctx, int64_t pts, unsigned char hi, unsigned char lo)
>>>> +{
>>>> +
>>>> +#define COR3(var, with1, with2, with3) ( (var) == (with1) || (var) == (with2) || (var) == (with3) )
>>> It looks like you always use COR3(hi, 0x14, 0x15, 0x1C) exactly: is it on
>>> purpose, a typo, or maybe can you simplify?
>> These are done for purpose, I have to implement some other command which
>> have
>> different values to be compared. so not simplifying it.
>>>> + if ( hi == ctx->prev_cmd[0] && lo == ctx->prev_cmd[1]) {
>>>> + /* ignore redundant command */
>>>> + } else if ( (hi == 0x10 && (lo >= 0x40 || lo <= 0x5f)) ||
>>>> + ( (hi >= 0x11 && hi <= 0x17) && (lo >= 0x40 && lo <= 0x7f) ) ) {
>>>> + handle_pac(ctx, hi, lo);
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x20 ) {
>>>> + /* resume caption loading */
>>>> + ctx->mode = CCMODE_POPON;
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x25 ) {
>>>> + ctx->rollup = 2;
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x26 ) {
>>>> + ctx->rollup = 3;
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x27 ) {
>>>> + ctx->rollup = 4;
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x29 ) {
>>>> + /* resume direct captioning */
>>>> + ctx->mode = CCMODE_PAINTON;
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2C ) {
>>>> + /* erase display memory */
>>>> + handle_edm(ctx, pts);
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2D ) {
>>>> + /* carriage return */
>>>> + ctx->row_cnt++;
>>>> + if(ctx->row_cnt == ctx->rollup) {
>>>> + ctx->row_cnt = 0;
>>>> + handle_edm(ctx, pts);
>>>> + }
>>>> + } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2F ) {
>>>> + /* end of caption */
>>>> + handle_eoc(ctx, pts);
>>>> + } else if (hi>=0x20) {
>>>> + /* Standard characters (always in pairs) */
>>>> + handle_char(ctx, hi, lo, pts);
>>>> + } else {
>>>> + /* Ignoring all other non data code */
>>>> + }
>>>> +
>>>> + /* set prev command */
>>>> + ctx->prev_cmd[0] = hi;
>>>> + ctx->prev_cmd[1] = lo;
>>>> +
>>>> +#undef COR3
>>>> + return 0;
>>>> +
>>>> +}
>>>> +static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
>>>> +{
>>>> + CCaptionSubContext *ctx = avctx->priv_data;
>>>> + AVSubtitle *sub = data;
>>>> + unsigned char *bptr = avpkt->data;
>>>> + int len = avpkt->size;
>>>> + int ret = 0;
>>>> + int i;
>>>> +
>>>> + for (i = 0; i < len; i += 3) {
>>>> + unsigned char cc_type = *(bptr + i) & 3;
>>>> + if (validate_cc_data_pair( bptr + i, ctx->parity_table ) )
>>>> + continue;
>>>> + /* ignoring data field 1 */
>>>> + if(cc_type == 1)
>>>> + continue;
>>>> + else
>>>> + process_cc608(ctx, avpkt->pts, *(bptr + i + 1), *(bptr + i + 2));
>>>> + }
>>>> + if(ctx->edm && *ctx->buffer.str)
>>>> + {
>>>> + int start_time = av_rescale_q(ctx->start_time, avctx->time_base, (AVRational){ 1, 100 });
>>>> + int end_time = av_rescale_q(ctx->end_time, avctx->time_base, (AVRational){ 1, 100 });
>>>> + ret = ff_ass_add_rect(sub, ctx->buffer.str, start_time, end_time - start_time , 0);
>>>> a
>>> You need to use av_bprint_is_complete() and return AVERROR(ENOMEM) if the
>>> text was truncated.
>> done in handle_edm
>>>> + if (ret < 0)
>>>> + return ret;
>>>> + sub->pts = av_rescale_q(ctx->start_time, avctx->time_base, AV_TIME_BASE_Q);
>>>> + ctx->edm = 0;
>>>> + av_bprint_clear(&ctx->buffer);
>>>> + }
>>>> +
>>>> + *got_sub = sub->num_rects > 0;
>>>> + return 0;
>>>> +}
>>>> +
>>>> +AVCodec ff_ccaption_decoder = {
>>>> + .name = "cc_dec",
>>>> + .long_name = NULL_IF_CONFIG_SMALL("Closed Caption Decoder"),
>>> I suggest to have "EIA-608" and "CEA-708" appear in the name, for people
>>> specifically looking for it: "Closed Caption (EIA-608 / CEA-708) Decoder"
>>> maybe.
>> done
>>>> + .type = AVMEDIA_TYPE_SUBTITLE,
>>>> + .id = AV_CODEC_ID_EIA_608,
>>>> + .priv_data_size = sizeof(CCaptionSubContext),
>>>> + .init = init_decoder,
>>>> + .close = close_decoder,
>>>> + .decode = decode,
>>>> +};
>>> Regards,
>>>
>> I have also removed some error which were causing to print nothing in
>> subccItalic.mpg.
>>
>> Though subtitle is still not italic.
>>
>> I was thinking to put extra features in next iteration of patch.
>>
>> -Anshul
>>
> Attaching patch with those work.
ping
-please comment
Anshul Maheshwari
More information about the ffmpeg-devel
mailing list