[FFmpeg-devel] [PATCH v6 1/4] avformat/rcwtdec: add RCWT Closed Captions demuxer

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Wed Mar 20 16:14:49 EET 2024


Marth64:
> Raw Captions With Time (RCWT) is a format native to ccextractor, a commonly
> used open source tool for processing 608/708 Closed Captions (CC) sources.
> RCWT can be used to archive the original CC bitstream. The muxer was added
> in January 2024. In this commit, add the demuxer.
> 
> One can now demux RCWT files for rendering in ccaption_dec or interoperate
> with ccextractor (which produces RCWT). Using the muxer/demuxer combination,
> the CC bits can be kept for further processing or rendering with either tool.
> This can be an effective approach to backup original CC presentations.
> 
> Prior to this, the next best solution was FFmpeg's SCC muxer, but SCC itself
> is not compatible with ccextractor (which is a de facto OSS CC processing tool)
> and it is a proprietary format.
> 
> Tests will follow.
> 
> Signed-off-by: Marth64 <marth64 at proxyid.net>
> ---
>  Changelog                |   2 +-
>  libavformat/Makefile     |   1 +
>  libavformat/allformats.c |   1 +
>  libavformat/rcwtdec.c    | 150 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 153 insertions(+), 1 deletion(-)
>  create mode 100644 libavformat/rcwtdec.c
> 
> diff --git a/Changelog b/Changelog
> index e3ca52430c..abe5f79b85 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -19,7 +19,7 @@ version <next>:
>  - lavu/eval: introduce randomi() function in expressions
>  - VVC decoder
>  - fsync filter
> -- Raw Captions with Time (RCWT) closed caption muxer
> +- RCWT (Raw Captions with Time) Closed Captions muxer and demuxer
>  - ffmpeg CLI -bsf option may now be used for input as well as output
>  - ffmpeg CLI options may now be used as -/opt <path>, which is equivalent
>    to -opt <contents of file <path>>
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index 94a949f555..a6de720d8c 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -493,6 +493,7 @@ OBJS-$(CONFIG_QOA_DEMUXER)               += qoadec.o
>  OBJS-$(CONFIG_R3D_DEMUXER)               += r3d.o
>  OBJS-$(CONFIG_RAWVIDEO_DEMUXER)          += rawvideodec.o
>  OBJS-$(CONFIG_RAWVIDEO_MUXER)            += rawenc.o
> +OBJS-$(CONFIG_RCWT_DEMUXER)              += rcwtdec.o subtitles.o
>  OBJS-$(CONFIG_RCWT_MUXER)                += rcwtenc.o subtitles.o
>  OBJS-$(CONFIG_REALTEXT_DEMUXER)          += realtextdec.o subtitles.o
>  OBJS-$(CONFIG_REDSPARK_DEMUXER)          += redspark.o
> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> index e15d0fa6d7..3140018f8d 100644
> --- a/libavformat/allformats.c
> +++ b/libavformat/allformats.c
> @@ -391,6 +391,7 @@ extern const FFInputFormat  ff_qoa_demuxer;
>  extern const FFInputFormat  ff_r3d_demuxer;
>  extern const FFInputFormat  ff_rawvideo_demuxer;
>  extern const FFOutputFormat ff_rawvideo_muxer;
> +extern const FFInputFormat  ff_rcwt_demuxer;
>  extern const FFOutputFormat ff_rcwt_muxer;
>  extern const FFInputFormat  ff_realtext_demuxer;
>  extern const FFInputFormat  ff_redspark_demuxer;
> diff --git a/libavformat/rcwtdec.c b/libavformat/rcwtdec.c
> new file mode 100644
> index 0000000000..0d9eba025a
> --- /dev/null
> +++ b/libavformat/rcwtdec.c
> @@ -0,0 +1,150 @@
> +/*
> + * RCWT (Raw Captions With Time) demuxer
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/*
> + * RCWT (Raw Captions With Time) is a format native to ccextractor, a commonly
> + * used open source tool for processing 608/708 Closed Captions (CC) sources.
> + *
> + * This demuxer implements the specification as of March 2024, which has
> + * been stable and unchanged since April 2014.
> + *
> + * A free specification of RCWT can be found here:
> + * @url{https://github.com/CCExtractor/ccextractor/blob/master/docs/BINARY_FILE_FORMAT.TXT}
> + */
> +
> +#include "avformat.h"
> +#include "demux.h"
> +#include "internal.h"
> +#include "subtitles.h"
> +#include "libavutil/intreadwrite.h"
> +
> +#define RCWT_CLUSTER_MAX_BLOCKS             65535
> +#define RCWT_BLOCK_SIZE                     3
> +#define RCWT_HEADER_SIZE                    11
> +
> +typedef struct RCWTContext {
> +    FFDemuxSubtitlesQueue q;
> +} RCWTContext;
> +
> +static int rcwt_read_header(AVFormatContext *avf)
> +{
> +    RCWTContext *rcwt = avf->priv_data;
> +
> +    AVPacket      *sub = NULL;
> +    AVStream      *st;
> +    uint8_t       header[RCWT_HEADER_SIZE] = {0};

Pointless zeroing.

> +    int           nb_bytes = 0;
> +
> +    /* validate the header */
> +    nb_bytes = avio_read(avf->pb, header, RCWT_HEADER_SIZE);

Better use ffio_read_size()

> +    if (nb_bytes != RCWT_HEADER_SIZE) {
> +        av_log(avf, AV_LOG_ERROR, "Header does not have the expected size "
> +                                  "(expected=%d actual=%d)\n",
> +                                  RCWT_HEADER_SIZE, nb_bytes);

Pointless logmessage

> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (AV_RB16(header + 6) != 0x0001) {
> +        av_log(avf, AV_LOG_ERROR, "RCWT format version is not compatible "
> +                                  "(only version 0.001 is known)\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    av_log(avf, AV_LOG_DEBUG, "RCWT writer application: %02X version: %02x\n",
> +                              header[3], header[5]);
> +
> +    /* setup AVStream */
> +    st = avformat_new_stream(avf, NULL);
> +    if (!st)
> +        return AVERROR(ENOMEM);
> +
> +    st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
> +    st->codecpar->codec_id   = AV_CODEC_ID_EIA_608;
> +
> +    avpriv_set_pts_info(st, 64, 1, 1000);
> +
> +    /* demux */
> +    while (!avio_feof(avf->pb)) {
> +        int64_t       cluster_pts       = AV_NOPTS_VALUE;
> +        int           cluster_nb_blocks = 0;
> +        int           cluster_size      = 0;
> +        uint8_t       *cluster_buf;
> +
> +        cluster_pts       = avio_rl64(avf->pb);
> +        cluster_nb_blocks = avio_rl16(avf->pb);

Why are you initializing these variables with values that are
immediately overwritten?

> +        if (cluster_nb_blocks == 0)
> +            continue;
> +
> +        cluster_size      = cluster_nb_blocks * RCWT_BLOCK_SIZE;
> +        cluster_buf       = av_malloc(cluster_size);
> +        if (!cluster_buf)
> +            return AVERROR(ENOMEM);
> +
> +        nb_bytes          = avio_read(avf->pb, cluster_buf, cluster_size);
> +        if (nb_bytes < 0)
> +            return nb_bytes;

Leak of cluster_buf

> +
> +        if (nb_bytes != cluster_size) {
> +            av_freep(&cluster_buf);
> +            av_log(avf, AV_LOG_ERROR, "Cluster does not have the expected size "
> +                                      "(expected=%d actual=%d pos=%ld)\n",

Wrong printf-specifier

> +                                      cluster_size, nb_bytes, avio_tell(avf->pb));

Also: Pointless logmessage

> +            return AVERROR_INVALIDDATA;
> +        }
> +
> +        sub = ff_subtitles_queue_insert(&rcwt->q, cluster_buf, cluster_size, 0);
> +        if (!sub) {
> +            av_freep(&cluster_buf);
> +            return AVERROR(ENOMEM);
> +        }
> +
> +        sub->pos = avio_tell(avf->pb);

This will use the position after data, not before the data.

> +        sub->pts = cluster_pts;
> +
> +        av_freep(&cluster_buf);
> +        cluster_buf = NULL;

This is already done by av_freep; also: your whole approach has an
unnecessary alloc+memcpy per packet. It would be better if you extended
ff_subtitles_queue_insert() to allow inserting NULL buffers with size
zero and then read the packet via av_get_packet(). This would also avoid
having this cluster_buf pointer which needs to be manually freed on
every error path.

> +    }
> +
> +    ff_subtitles_queue_finalize(avf, &rcwt->q);
> +
> +    return 0;
> +}
> +
> +static int rcwt_probe(const AVProbeData *p)
> +{
> +    return p->buf_size > RCWT_HEADER_SIZE   &&
> +           AV_RB16(p->buf) == 0xCCCC        &&
> +           AV_RB8(p->buf + 2) == 0xED       &&
> +           AV_RB16(p->buf + 6) == 0x0001    ? 50 : 0;
> +}
> +
> +const FFInputFormat ff_rcwt_demuxer = {
> +    .p.name         = "rcwt",
> +    .p.long_name    = NULL_IF_CONFIG_SMALL("RCWT (Raw Captions With Time)"),
> +    .p.extensions   = "bin",
> +    .p.flags        = AVFMT_TS_DISCONT,
> +    .priv_data_size = sizeof(RCWTContext),
> +    .flags_internal = FF_FMT_INIT_CLEANUP,
> +    .read_probe     = rcwt_probe,
> +    .read_header    = rcwt_read_header,
> +    .read_packet    = ff_subtitles_read_packet,
> +    .read_seek2     = ff_subtitles_read_seek,
> +    .read_close     = ff_subtitles_read_close
> +};



More information about the ffmpeg-devel mailing list