[FFmpeg-devel] [FFmpeg-cvslog] Adding Closed caption Support

Clément Bœsch u at pkh.me
Mon Jan 12 11:46:58 CET 2015


On Sun, Jan 11, 2015 at 11:15:35PM +0100, Anshul Maheshwari wrote:
> ffmpeg | branch: master | Anshul Maheshwari <anshul.ffmpeg at gmail.com> | Tue Jan  6 12:41:34 2015 +0530| [4b6262610bd516320b3fca71ef03ff9bf1aa5066] | committer: Michael Niedermayer
> 
> Adding Closed caption Support
> 
> Signed-off-by: Anshul Maheshwari <anshul.ffmpeg at gmail.com>
> 
> To test Closed caption use following command
> ffmpeg -f lavfi -i "movie=input.ts[out0+subcc]" -map s output.srt
> Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

Sorry for the late review, I wasn't available.

Q: how does it compare to libzvbi, do we still need libzvbi (especially
the text part)?

> 
> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4b6262610bd516320b3fca71ef03ff9bf1aa5066
> ---
> 
>  libavcodec/Makefile       |    1 +
>  libavcodec/allcodecs.c    |    1 +
>  libavcodec/ccaption_dec.c |  529 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 531 insertions(+)
> 

Where is the FATE test for this?

> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 6b85420..80ee389 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -173,6 +173,7 @@ OBJS-$(CONFIG_BRENDER_PIX_DECODER)     += brenderpix.o
>  OBJS-$(CONFIG_C93_DECODER)             += c93.o
>  OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdec.o cavsdsp.o \
>                                            cavsdata.o mpeg12data.o
> +OBJS-$(CONFIG_CCAPTION_DECODER)        += ccaption_dec.o
>  OBJS-$(CONFIG_CDGRAPHICS_DECODER)      += cdgraphics.o
>  OBJS-$(CONFIG_CDXL_DECODER)            += cdxl.o
>  OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 512711e..29b45f3 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -481,6 +481,7 @@ void avcodec_register_all(void)
>      /* subtitles */
>      REGISTER_ENCDEC (SSA,               ssa);
>      REGISTER_ENCDEC (ASS,               ass);
> +    REGISTER_DECODER(CCAPTION,          ccaption);
>      REGISTER_ENCDEC (DVBSUB,            dvbsub);
>      REGISTER_ENCDEC (DVDSUB,            dvdsub);
>      REGISTER_DECODER(JACOSUB,           jacosub);
> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
> new file mode 100644
> index 0000000..a92c609
> --- /dev/null
> +++ b/libavcodec/ccaption_dec.c
> @@ -0,0 +1,529 @@
> +/*
> + * Closed Caption Decoding
> + * Copyright (c) 2015 Anshul Maheshwari
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "ass.h"
> +#include "libavutil/opt.h"
> +

> +#define CHAR_DEBUG

We have av_dlog() for that purpose.

> +#define SCREEN_ROWS 15
> +#define SCREEN_COLUMNS 32
> +
> +#define SET_FLAG(var, val) ( var |= ( 1 << (val) ) )
> +#define UNSET_FLAG(var, val) ( var &=  ~( 1 << (val)) )
> +#define CHECK_FLAG(var, val) ( (var) & (1 << (val) ) )
> +
> +/*
> + * TODO list
> + * 1) handle font and color completely
> + */
> +enum cc_mode {
> +    CCMODE_POPON,
> +    CCMODE_PAINTON,
> +    CCMODE_ROLLUP_2,
> +    CCMODE_ROLLUP_3,
> +    CCMODE_ROLLUP_4,
> +    CCMODE_TEXT,
> +};
> +
> +enum cc_color_code
> +{

The style is broken all over the file, but please at least make it
consistent with the surrounding one.

> +    CCCOL_WHITE,
> +    CCCOL_GREEN,
> +    CCCOL_BLUE,
> +    CCCOL_CYAN,
> +    CCCOL_RED,
> +    CCCOL_YELLOW,
> +    CCCOL_MAGENTA,
> +    CCCOL_USERDEFINED,
> +    CCCOL_BLACK,
> +    CCCOL_TRANSPARENT

note: you should keep a trailing comma at the end of such structure to
reduce later diffs.

> +};
> +
> +enum cc_font
> +{
> +    CCFONT_REGULAR,
> +    CCFONT_ITALICS,
> +    CCFONT_UNDERLINED,
> +    CCFONT_UNDERLINED_ITALICS
> +};
> +
> +static const unsigned char pac2_attribs[][3] = // Color, font, ident
> +{
> +    { CCCOL_WHITE, CCFONT_REGULAR, 0 },  // 0x40 || 0x60
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 0 },  // 0x41 || 0x61
> +    { CCCOL_GREEN, CCFONT_REGULAR, 0 },  // 0x42 || 0x62
> +    { CCCOL_GREEN, CCFONT_UNDERLINED, 0 },  // 0x43 || 0x63
> +    { CCCOL_BLUE, CCFONT_REGULAR, 0 },  // 0x44 || 0x64
> +    { CCCOL_BLUE, CCFONT_UNDERLINED, 0 },  // 0x45 || 0x65
> +    { CCCOL_CYAN, CCFONT_REGULAR, 0 },  // 0x46 || 0x66
> +    { CCCOL_CYAN, CCFONT_UNDERLINED, 0 },  // 0x47 || 0x67
> +    { CCCOL_RED, CCFONT_REGULAR, 0 },  // 0x48 || 0x68
> +    { CCCOL_RED, CCFONT_UNDERLINED, 0 },  // 0x49 || 0x69
> +    { CCCOL_YELLOW, CCFONT_REGULAR, 0 },  // 0x4a || 0x6a
> +    { CCCOL_YELLOW, CCFONT_UNDERLINED, 0 },  // 0x4b || 0x6b
> +    { CCCOL_MAGENTA, CCFONT_REGULAR, 0 },  // 0x4c || 0x6c
> +    { CCCOL_MAGENTA, CCFONT_UNDERLINED, 0 },  // 0x4d || 0x6d
> +    { CCCOL_WHITE, CCFONT_ITALICS, 0 },  // 0x4e || 0x6e
> +    { CCCOL_WHITE, CCFONT_UNDERLINED_ITALICS, 0 },  // 0x4f || 0x6f
> +    { CCCOL_WHITE, CCFONT_REGULAR, 0 },  // 0x50 || 0x70
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 0 },  // 0x51 || 0x71
> +    { CCCOL_WHITE, CCFONT_REGULAR, 4 },  // 0x52 || 0x72
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 4 },  // 0x53 || 0x73
> +    { CCCOL_WHITE, CCFONT_REGULAR, 8 },  // 0x54 || 0x74
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 8 },  // 0x55 || 0x75
> +    { CCCOL_WHITE, CCFONT_REGULAR, 12 }, // 0x56 || 0x76
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 12 }, // 0x57 || 0x77
> +    { CCCOL_WHITE, CCFONT_REGULAR, 16 }, // 0x58 || 0x78
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 16 }, // 0x59 || 0x79
> +    { CCCOL_WHITE, CCFONT_REGULAR, 20 }, // 0x5a || 0x7a
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 20 }, // 0x5b || 0x7b
> +    { CCCOL_WHITE, CCFONT_REGULAR, 24 }, // 0x5c || 0x7c
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 24 }, // 0x5d || 0x7d
> +    { CCCOL_WHITE, CCFONT_REGULAR, 28 }, // 0x5e || 0x7e
> +    { CCCOL_WHITE, CCFONT_UNDERLINED, 28 }  // 0x5f || 0x7f

vertical align would be welcome

> +    /* total 32 entry */

entries*

Also, you can just explicit the 32 into the declaration.

> +};
> +/* 0-255 needs 256 space */

spaces*

Please make the code breath a little with a \n

> +static const uint8_t parity_table[256] = { 0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           0, 1, 1, 0, 1, 0, 0, 1,
> +                                           1, 0, 0, 1, 0, 1, 1, 0 };
> +struct Screen {
> +    /* +1 is used to compensate null character of string */
> +    uint8_t characters[SCREEN_ROWS][SCREEN_COLUMNS+1];
> +    /*
> +     * Bitmask of used rows; if a bit is not set, the
> +     * corresponding row is not used.

> +     * for setting row 1  use row | (0 << 1)

(0 << 1), huh?

> +     * for setting row 15 use row | (1 << 14)
> +     */
> +    int16_t  row_used;
> +};
> +
> +
> +typedef struct CCaptionSubContext {
> +    AVClass *class;
> +    int row_cnt;
> +    struct Screen screen[2];
> +    int active_screen;
> +    uint8_t cursor_row;
> +    uint8_t cursor_column;
> +    uint8_t cursor_color;
> +    uint8_t cursor_font;
> +    AVBPrint buffer;
> +    int erase_display_memory;
> +    int rollup;
> +    enum  cc_mode mode;
> +    int64_t start_time;
> +    /* visible screen time */
> +    int64_t startv_time;
> +    int64_t end_time;
> +    char prev_cmd[2];
> +    /* buffer to store pkt data */
> +    AVBufferRef *pktbuf;
> +}CCaptionSubContext;
> +
> +
> +static av_cold int init_decoder(AVCodecContext *avctx)
> +{
> +    int ret;
> +    CCaptionSubContext *ctx = avctx->priv_data;
> +
> +    av_bprint_init(&ctx->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
> +    /* taking by default roll up to 2 */
> +    ctx->rollup = 2;

> +    ret = ff_ass_subtitle_header_default(avctx);
> +    /* allocate pkt buffer */
> +    ctx->pktbuf = av_buffer_alloc(128);
> +    if( !ctx->pktbuf) {
> +        ret = AVERROR(ENOMEM);
> +    }
> +
> +
> +    return ret;

This logic is weak for future change (the ret from
ff_ass_subtitle_header_default() will probably end up being overridden)

> +}
> +
> +static av_cold int close_decoder(AVCodecContext *avctx)
> +{
> +    CCaptionSubContext *ctx = avctx->priv_data;
> +    av_bprint_finalize( &ctx->buffer, NULL);
> +    av_buffer_unref(&ctx->pktbuf);
> +    return 0;
> +}
> +/**
> + * @param ctx closed caption context just to print log
> + */
> +static int write_char (CCaptionSubContext *ctx, char *row,uint8_t col, char ch)
> +{
> +    if(col < SCREEN_COLUMNS) {
> +        row[col] = ch;
> +        return 0;
> +    }
> +    /* We have extra space at end only for null character */
> +    else if ( col == SCREEN_COLUMNS && ch == 0) {
> +        row[col] = ch;
> +        return 0;
> +    }
> +    else {

> +        av_log(ctx, AV_LOG_WARNING,"Data Ignored since exciding screen width\n");

"exciding" doesn't sound like an english word

> +        return AVERROR_INVALIDDATA;
> +    }
> +}
> +/**
> + * This function after validating parity bit, also remove it from data pair.
> + * The first byte doesn't pass parity, we replace it with a solid blank
> + * and process the pair.
> + * If the second byte doesn't pass parity, it returns INVALIDDATA
> + * user can ignore the whole pair and pass the other pair.
> + */
> +static int validate_cc_data_pair (uint8_t *cc_data_pair)
> +{
> +    uint8_t cc_valid = (*cc_data_pair & 4) >>2;
> +    uint8_t cc_type = *cc_data_pair & 3;
> +
> +    if (!cc_valid)
> +        return AVERROR_INVALIDDATA;
> +
> +    // if EIA-608 data then verify parity.
> +    if (cc_type==0 || cc_type==1) {
> +        if (!parity_table[cc_data_pair[2]]) {
> +            return AVERROR_INVALIDDATA;
> +        }
> +        if (!parity_table[cc_data_pair[1]]) {
> +            cc_data_pair[1]=0x7F;
> +        }
> +    }
> +
> +    //Skip non-data
> +    if( (cc_data_pair[0] == 0xFA || cc_data_pair[0] == 0xFC || cc_data_pair[0] == 0xFD )
> +         && (cc_data_pair[1] & 0x7F) == 0 && (cc_data_pair[2] & 0x7F) == 0)
> +        return AVERROR_PATCHWELCOME;

please add an av_log to explain what the patch should be about

> +
> +    //skip 708 data
> +    if(cc_type == 3 || cc_type == 2 )
> +        return AVERROR_PATCHWELCOME;

ditto; "unsupported cc_type %d" i suppose?

> +/**
> + * @param pts it is required to set end time
> + */
> +static int handle_edm(CCaptionSubContext *ctx,int64_t pts)
> +{
> +    int i;
> +    int ret = 0;
> +    struct Screen *screen = ctx->screen + ctx->active_screen;
> +
> +    ctx->start_time = ctx->startv_time;
> +    for( i = 0; screen->row_used && i < SCREEN_ROWS; i++)
> +    {
> +        if(CHECK_FLAG(screen->row_used,i)) {
> +            char *str = screen->characters[i];
> +            /* skip space */
> +            while (*str == ' ')
> +                str++;

> +            av_bprint_append_data(&ctx->buffer, str, strlen(str));
> +            av_bprint_append_data(&ctx->buffer, "\\N",2);

av_bprintf(&ctx->buffer, "%s\\N", str, 2);

> +            UNSET_FLAG(screen->row_used, i);
> +            ret = av_bprint_is_complete(&ctx->buffer);
> +            if( ret == 0) {
> +                ret = AVERROR(ENOMEM);
> +                break;
> +            }
> +        }
> +
> +    }
> +    ctx->startv_time = pts;
> +    ctx->erase_display_memory = 1;
> +    ctx->end_time = pts;
> +    return ret;
> +}
[...]
> +static const AVOption options[] = {
> +    {NULL}
> +};

What's the point of this?

> +static const AVClass ccaption_dec_class = {
> +    .class_name = "Closed caption Decoder",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +AVCodec ff_ccaption_decoder = {
> +    .name           = "cc_dec",
> +    .long_name      = NULL_IF_CONFIG_SMALL("Closed Caption (EIA-608 / CEA-708) Decoder"),
> +    .type           = AVMEDIA_TYPE_SUBTITLE,
> +    .id             = AV_CODEC_ID_EIA_608,
> +    .priv_data_size = sizeof(CCaptionSubContext),
> +    .init           = init_decoder,
> +    .close          = close_decoder,
> +    .decode         = decode,
> +    .priv_class     = &ccaption_dec_class,
> +};
> 

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150112/383dd586/attachment.asc>


More information about the ffmpeg-devel mailing list