[FFmpeg-devel] [PATCH] avformat: Implement subtitle charenc guessing
Rodger Combs
rodger.combs at gmail.com
Fri Dec 12 07:05:27 CET 2014
This also moves general charenc conversion from avcodec to avformat;
the version in avcodec is left, but renamed; I'm not sure if that's
the optimal solution.
The documentation could probably use some improvements, and a few more
options could be added to ENCA.
This very simply prefers libguess over ENCA, and ENCA over uchardet, but
will fall back on a less-preferred guess if something decodes wrong, and will
drop illegal sequences in iconv if all else fails.
It'd be possible to have ffmpeg.c present a UI if multiple guesses are
returned, and other library consumers could do the same.
---
configure | 15 +++
libavcodec/options_table.h | 2 +-
libavformat/aqtitledec.c | 2 +
libavformat/assdec.c | 2 +
libavformat/avformat.h | 50 +++++++++
libavformat/jacosubdec.c | 2 +
libavformat/microdvddec.c | 2 +
libavformat/mpl2dec.c | 2 +
libavformat/mpsubdec.c | 2 +
libavformat/options_table.h | 7 ++
libavformat/pjsdec.c | 2 +
libavformat/realtextdec.c | 2 +
libavformat/samidec.c | 2 +
libavformat/srtdec.c | 2 +
libavformat/stldec.c | 2 +
libavformat/subtitles.c | 262 +++++++++++++++++++++++++++++++++++++++++++-
libavformat/subtitles.h | 1 +
libavformat/subviewer1dec.c | 2 +
libavformat/subviewerdec.c | 2 +
libavformat/utils.c | 2 +
libavformat/vplayerdec.c | 2 +
libavformat/webvttdec.c | 2 +
22 files changed, 365 insertions(+), 4 deletions(-)
diff --git a/configure b/configure
index e2e3619..a5a9f9b 100755
--- a/configure
+++ b/configure
@@ -199,6 +199,9 @@ External library support:
--enable-gnutls enable gnutls, needed for https support
if openssl is not used [no]
--disable-iconv disable iconv [autodetect]
+ --disable-libguess disable libguess [autodetect]
+ --disable-uchardet disable universalchardet [autodetect]
+ --enable-enca disable enca [no]
--enable-ladspa enable LADSPA audio filtering [no]
--enable-libaacplus enable AAC+ encoding via libaacplus [no]
--enable-libass enable libass subtitles rendering,
@@ -1342,6 +1345,9 @@ EXTERNAL_LIBRARY_LIST="
frei0r
gnutls
iconv
+ libguess
+ uchardet
+ enca
ladspa
libaacplus
libass
@@ -4358,6 +4364,7 @@ die_license_disabled gpl libxavs
die_license_disabled gpl libxvid
die_license_disabled gpl libzvbi
die_license_disabled gpl x11grab
+die_license_disabled gpl enca
die_license_disabled nonfree libaacplus
die_license_disabled nonfree libfaac
@@ -5117,6 +5124,14 @@ enabled vdpau && enabled xlib &&
# Funny iconv installations are not unusual, so check it after all flags have been set
disabled iconv || check_func_headers iconv.h iconv || check_lib2 iconv.h iconv -liconv || disable iconv
+disabled iconv || disabled libguess || disable libguess && {
+ check_pkg_config libguess libguess.h libguess_determine_encoding && require_pkg_config libguess libguess.h libguess_determine_encoding && enable libguess;
+}
+disabled iconv || disabled uchardet || disable uchardet && {
+ check_pkg_config uchardet uchardet.h uchardet_new && require_pkg_config uchardet uchardet.h uchardet_new && enable uchardet;
+}
+enabled enca && check_func_headers enca.h enca_analyse || check_lib2 enca.h enca_analyse -lenca || die "ERROR: enca not found"
+
enabled debug && add_cflags -g"$debuglevel" && add_asflags -g"$debuglevel"
# add some useful compiler flags if supported
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 1d5b078..93b3105 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -472,7 +472,7 @@ static const AVOption avcodec_options[] = {
{"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"},
{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, INT_MAX, A|D, "request_sample_fmt"},
{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
-{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
+{"sub_charenc_lavc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"},
{"do_nothing", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
diff --git a/libavformat/aqtitledec.c b/libavformat/aqtitledec.c
index 9508766..65aa3e4 100644
--- a/libavformat/aqtitledec.c
+++ b/libavformat/aqtitledec.c
@@ -55,6 +55,8 @@ static int aqt_read_header(AVFormatContext *s)
int64_t pos = 0, frame = AV_NOPTS_VALUE;
AVPacket *sub = NULL;
+ aqt->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, aqt->frame_rate.den, aqt->frame_rate.num);
diff --git a/libavformat/assdec.c b/libavformat/assdec.c
index c62e76f..958792b 100644
--- a/libavformat/assdec.c
+++ b/libavformat/assdec.c
@@ -114,6 +114,8 @@ static int ass_read_header(AVFormatContext *s)
FFTextReader tr;
ff_text_init_avio(s, &tr, s->pb);
+ ass->q.avctx = s;
+
st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 2e54ed1..8c5fa7e 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1755,6 +1755,56 @@ typedef struct AVFormatContext {
* - demuxing: Set by user.
*/
uint8_t *dump_separator;
+
+ /**
+ * Character encoding of a subtitle file
+ * - encoding: unused
+ * - decoding: Set by user via AVOptions; may be changed after initialization
+ */
+ char *sub_charenc;
+
+ /**
+ * Array of guesses for the character encoding
+ * - encoding: unused
+ * - decoding: Set by demuxer
+ */
+ int nb_sub_charenc_guesses;
+ char **sub_charenc_guesses;
+
+ /**
+ * Language to pass to libguess for charenc detection.
+ * - encoding: unused
+ * - decoding: Set by user via AVOptions (NO direct access)
+ */
+ char *libguess_language;
+
+ /**
+ * Language to pass to libenca for charenc detection.
+ * - encoding: unused
+ * - decoding: Set by user via AVOptions (NO direct access)
+ */
+ char *enca_language;
+
+ /**
+ * Threshold parameter for libenca charenc detection.
+ * - encoding: unused
+ * - decoding: Set by user via AVOptions (NO direct access)
+ */
+ double enca_threshold;
+
+ /**
+ * Whether or not to check for multibyte charsets in libenca.
+ * - encoding: unused
+ * - decoding: Set by user via AVOptions (NO direct access)
+ */
+ int enca_multibyte;
+
+ /**
+ * Whether or not to let libenca return an ambiguous result.
+ * - encoding: unused
+ * - decoding: Set by user via AVOptions (NO direct access)
+ */
+ int enca_ambiguity;
} AVFormatContext;
int av_format_get_probe_score(const AVFormatContext *s);
diff --git a/libavformat/jacosubdec.c b/libavformat/jacosubdec.c
index 1ca0055..fa332fa 100644
--- a/libavformat/jacosubdec.c
+++ b/libavformat/jacosubdec.c
@@ -170,6 +170,8 @@ static int jacosub_read_header(AVFormatContext *s)
st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
st->codec->codec_id = AV_CODEC_ID_JACOSUB;
+ jacosub->q.avctx = s;
+
jacosub->timeres = 30;
av_bprint_init(&header, 1024+FF_INPUT_BUFFER_PADDING_SIZE, 4096);
diff --git a/libavformat/microdvddec.c b/libavformat/microdvddec.c
index ce3433c..5c3b48c 100644
--- a/libavformat/microdvddec.c
+++ b/libavformat/microdvddec.c
@@ -85,6 +85,8 @@ static int microdvd_read_header(AVFormatContext *s)
char line_buf[MAX_LINESIZE];
int has_real_fps = 0;
+ microdvd->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
diff --git a/libavformat/mpl2dec.c b/libavformat/mpl2dec.c
index 260b7be..fa431c3 100644
--- a/libavformat/mpl2dec.c
+++ b/libavformat/mpl2dec.c
@@ -77,6 +77,8 @@ static int mpl2_read_header(AVFormatContext *s)
AVStream *st = avformat_new_stream(s, NULL);
int res = 0;
+ mpl2->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 10);
diff --git a/libavformat/mpsubdec.c b/libavformat/mpsubdec.c
index eddc594..7bb08f9 100644
--- a/libavformat/mpsubdec.c
+++ b/libavformat/mpsubdec.c
@@ -61,6 +61,8 @@ static int mpsub_read_header(AVFormatContext *s)
float multiplier = 100.0;
float current_pts = 0;
+ mpsub->q.avctx = s;
+
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
while (!avio_feof(s->pb)) {
diff --git a/libavformat/options_table.h b/libavformat/options_table.h
index 40f1e0a..741bfb2 100644
--- a/libavformat/options_table.h
+++ b/libavformat/options_table.h
@@ -22,6 +22,7 @@
#define AVFORMAT_OPTIONS_TABLE_H
#include <limits.h>
+#include <float.h> /* DBL_MAX */
#include "libavutil/opt.h"
#include "avformat.h"
@@ -99,6 +100,12 @@ static const AVOption avformat_options[] = {
{"dump_separator", "set information dump field separator", OFFSET(dump_separator), AV_OPT_TYPE_STRING, {.str = ", "}, CHAR_MIN, CHAR_MAX, D|E},
{"codec_whitelist", "List of decoders that are allowed to be used", OFFSET(codec_whitelist), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, D },
{"format_whitelist", "List of demuxers that are allowed to be used", OFFSET(format_whitelist), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, D },
+{"sub_charenc", "subtitle character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, D },
+{"libguess_language", "Language parameter for libguess charenc detection", OFFSET(libguess_language), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, D },
+{"enca_language", "Language parameter for enca charenc detection", OFFSET(enca_language), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, D },
+{"enca_threshold", "Threshold parameter for enca charenc detection", OFFSET(enca_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 1.38 }, 1.0, DBL_MAX, D },
+{"enca_multibyte", "Whether or not to allow enca to guess multibyte charsets", OFFSET(enca_multibyte), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, D },
+{"enca_ambiguity", "Whether or not to allow enca to return ambiguous results", OFFSET(enca_ambiguity), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, D },
{NULL},
};
diff --git a/libavformat/pjsdec.c b/libavformat/pjsdec.c
index 5129b70..252e9d9 100644
--- a/libavformat/pjsdec.c
+++ b/libavformat/pjsdec.c
@@ -67,6 +67,8 @@ static int pjs_read_header(AVFormatContext *s)
AVStream *st = avformat_new_stream(s, NULL);
int res = 0;
+ pjs->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 10);
diff --git a/libavformat/realtextdec.c b/libavformat/realtextdec.c
index fff85d6..d20f0c5 100644
--- a/libavformat/realtextdec.c
+++ b/libavformat/realtextdec.c
@@ -67,6 +67,8 @@ static int realtext_read_header(AVFormatContext *s)
FFTextReader tr;
ff_text_init_avio(s, &tr, s->pb);
+ rt->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 100);
diff --git a/libavformat/samidec.c b/libavformat/samidec.c
index 948e1ed..968f506 100644
--- a/libavformat/samidec.c
+++ b/libavformat/samidec.c
@@ -56,6 +56,8 @@ static int sami_read_header(AVFormatContext *s)
FFTextReader tr;
ff_text_init_avio(s, &tr, s->pb);
+ sami->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 1000);
diff --git a/libavformat/srtdec.c b/libavformat/srtdec.c
index b35e50f..3187490 100644
--- a/libavformat/srtdec.c
+++ b/libavformat/srtdec.c
@@ -89,6 +89,8 @@ static int srt_read_header(AVFormatContext *s)
FFTextReader tr;
ff_text_init_avio(s, &tr, s->pb);
+ srt->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 1000);
diff --git a/libavformat/stldec.c b/libavformat/stldec.c
index b84c7e9..5d96737 100644
--- a/libavformat/stldec.c
+++ b/libavformat/stldec.c
@@ -74,6 +74,8 @@ static int stl_read_header(AVFormatContext *s)
STLContext *stl = s->priv_data;
AVStream *st = avformat_new_stream(s, NULL);
+ stl->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 100);
diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index 67624fc..e953080 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -21,9 +21,23 @@
#include "avformat.h"
#include "subtitles.h"
#include "avio_internal.h"
+#include "internal.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
+#if CONFIG_ICONV
+# include <iconv.h>
+#endif
+#if CONFIG_LIBGUESS
+# include <libguess.h>
+#endif
+#if CONFIG_ENCA
+# include <enca.h>
+#endif
+#if CONFIG_UCHARDET
+# include <uchardet.h>
+#endif
+
void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
{
int i;
@@ -166,26 +180,268 @@ static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
return s1->pos > s2->pos ? 1 : -1;
}
+/**
+ * Add a character encoding guess to an AVFormatContext's list
+ *
+ * @param avctx the context to add to
+ * @param enc the encoding name to add
+ *
+ * A copy is added, so the original string should be free()d if necessary.
+ * If the same encoding name is already present, it isn't added again.
+ * If NULL or an empty string is passed, it's not added.
+ */
+static void add_charenc(AVFormatContext *avctx, const char *enc)
+{
+ char *copy;
+
+ if (!enc || !enc[0])
+ return;
+
+ for (unsigned i = 0; i < avctx->nb_sub_charenc_guesses; i++)
+ if (!strcmp(avctx->sub_charenc_guesses[i], enc))
+ return;
+
+ copy = av_strdup(enc);
+ if (!copy)
+ return;
+
+ dynarray_add(&avctx->sub_charenc_guesses, &avctx->nb_sub_charenc_guesses,
+ copy);
+}
+
+/**
+ * Finish an FFDemuxSubtitlesQueue and prepare it for reading
+ *
+ * @param q the queue to finish
+ *
+ * This sorts packets by position and/or timestamp, adjusts durations for
+ * formats that don't set them, and (if enabled) builds a text buffer for the
+ * charenc detectors.
+ * If enabled, it then checks the buffer with each available charenc detector,
+ * builds a list of guesses, and sets the AVFormatContext's encoding to its
+ * best candidate.
+ */
void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q)
{
int i;
+ char *charenc_buf = NULL;
+ int charenc_buf_size = 0, charenc_buf_len = 0;
+ AVFormatContext *avctx = q->avctx;
+ // Whether or not we're doing charenc detection here
+ int detection = avctx && avctx->sub_charenc &&
+ !strcmp(avctx->sub_charenc, "auto");
qsort(q->subs, q->nb_subs, sizeof(*q->subs),
q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
: cmp_pkt_sub_pos_ts);
- for (i = 0; i < q->nb_subs; i++)
+ for (i = 0; i < q->nb_subs; i++) {
if (q->subs[i].duration == -1 && i < q->nb_subs - 1)
q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts;
+
+ if (detection) {
+ char *newbuf = av_fast_realloc(charenc_buf, &charenc_buf_size,
+ charenc_buf_len + q->subs[i].size);
+ if (!newbuf)
+ continue;
+
+ charenc_buf = newbuf;
+
+ memcpy(charenc_buf + charenc_buf_len, q->subs[i].data,
+ q->subs[i].size);
+ charenc_buf_len += q->subs[i].size;
+ }
+ }
+
+ if (detection) {
+#if CONFIG_LIBGUESS
+ if (avctx->libguess_language) {
+ const char *enc =
+ libguess_determine_encoding(charenc_buf,
+ charenc_buf_len,
+ avctx->libguess_language);
+ av_log(avctx, AV_LOG_INFO, "libguess selected: %s\n", enc);
+ add_charenc(avctx, enc);
+ }
+#endif
+#if CONFIG_ENCA
+ if (avctx->enca_language) {
+ EncaAnalyser an = enca_analyser_alloc(avctx->enca_language);
+ if (an) {
+ EncaEncoding enc;
+ const char *str;
+ enca_set_threshold(an, avctx->enca_threshold);
+ enca_set_multibyte(an, avctx->enca_multibyte);
+ enca_set_ambiguity(an, avctx->enca_ambiguity);
+ enca_set_garbage_test(an, 1);
+
+ enc = enca_analyse_const(an, charenc_buf, charenc_buf_len);
+
+ str = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV);
+ av_log(avctx, AV_LOG_INFO, "ENCA selected: %s\n", str);
+ if (enca_charset_is_known(enc.charset))
+ add_charenc(avctx, str);
+
+ enca_analyser_free(an);
+ } else {
+ av_log(avctx, AV_LOG_ERROR, "ENCA allocation failed\n");
+ }
+ }
+#endif
+#if CONFIG_UCHARDET
+ {
+ uchardet_t det = uchardet_new();
+ if (det) {
+ const char *enc;
+ uchardet_handle_data(det, charenc_buf, charenc_buf_len);
+ uchardet_data_end(det);
+ enc = uchardet_get_charset(det);
+ av_log(avctx, AV_LOG_INFO, "uchardet selected: %s\n", enc);
+ add_charenc(avctx, enc);
+ uchardet_delete(det);
+ }
+ }
+#endif
+
+ av_freep(&avctx->sub_charenc);
+
+ if (avctx->nb_sub_charenc_guesses)
+ avctx->sub_charenc = av_strdup(avctx->sub_charenc_guesses[0]);
+ }
+}
+
+#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
+/**
+ * Convert an AVPacket from one character encoding to another, using the
+ * selected encoding from an AVFormatContext and falling back on other encoding
+ * guesses if necessary.
+ *
+ * @param avctx the AVFormatContext whose character encodings we'll use
+ * @param outpkt the AVPacket to write to
+ * @param inpkt the AVPacket to read from
+ *
+ * This first tries the AVFormatContext's sub_charenc, then falls back on its
+ * sub_charenc_guesses. If none decodes successfully, it tries sub_charenc
+ * again, but instructs iconv to keep chugging on illegal sequences.
+ * If the packet is successfully recoded with an encoding other than the
+ * sub_charenc, then sub_charenc is changed to the working encoding.
+ */
+static int recode_subtitle(AVFormatContext *avctx,
+ AVPacket *outpkt, const AVPacket *inpkt)
+{
+#if CONFIG_ICONV
+ iconv_t cd = (iconv_t)-1;
+ int ret = 0;
+ char *inb, *outb;
+ size_t inl, outl;
+ AVPacket tmp;
+ int i;
+#endif
+
+ // Set attributes on the output packet that aren't covered by
+ // av_copy_packet, like the pts and duration.
+ *outpkt = *inpkt;
+
+ if (av_copy_packet(outpkt, inpkt))
+ return AVERROR(ENOMEM);
+
+ if (!avctx || !avctx->sub_charenc || inpkt->size == 0)
+ return 0;
+
+#if CONFIG_ICONV
+ inb = inpkt->data;
+ inl = inpkt->size;
+
+ if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) {
+ av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
+ ret = AVERROR(ENOMEM);
+ goto end;
+ }
+
+ // Allocate a dummy packet that holds new buffers
+ ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES);
+ if (ret < 0)
+ goto end;
+
+ for (i = -1; i <= avctx->nb_sub_charenc_guesses; i++) {
+ const char *encoding;
+ // If this is our last attempt, skip illegal sequences.
+ int discard_illegal = (i == avctx->nb_sub_charenc_guesses);
+
+ // Reset our buffers and sizes every time, as iconv might change them.
+ outpkt->buf = tmp.buf;
+ outpkt->data = tmp.data;
+ outpkt->size = tmp.size;
+ outb = outpkt->data;
+ outl = outpkt->size;
+
+ // The encoding we're going to try. We use sub_charenc first, then try
+ // our array of guesses, then try sub_charenc again with illegal
+ // sequences enabled.
+ if (i == -1 || i == avctx->nb_sub_charenc_guesses)
+ encoding = avctx->sub_charenc;
+ else
+ encoding = avctx->sub_charenc_guesses[i];
+
+ cd = iconv_open("UTF-8", encoding);
+ if (cd == (iconv_t)-1) {
+ av_log(avctx, AV_LOG_WARNING, "Invalid character encoding: %s\n",
+ encoding);
+ ret = AVERROR(EINVAL);
+ continue;
+ }
+
+ iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard_illegal);
+
+ // Try to run a conversion.
+ if (iconv(cd, &inb, &inl, &outb, &outl) != (size_t)-1 &&
+ iconv(cd, NULL, NULL, &outb, &outl) != (size_t)-1 &&
+ outl < outpkt->size && inl == 0) {
+ // Success, save the new encoding and get out.
+ if (discard_illegal) {
+ av_log(avctx, AV_LOG_WARNING, "Needed to discard illegal "
+ "sequences while recoding subtitle event \"%s\" from %s "
+ "to UTF-8\n", inpkt->data, avctx->sub_charenc);
+ } else if (i >= 0) {
+ av_log(avctx, AV_LOG_INFO, "Switching character encoding from "
+ "from %s to %s\n", avctx->sub_charenc, encoding);
+ av_freep(&avctx->sub_charenc);
+ avctx->sub_charenc = av_strdup(encoding);
+ }
+
+ // Remove and zero extra buffer space that iconv didn't end up using
+ outpkt->size -= outl;
+ memset(outpkt->data + outpkt->size, 0, outl);
+ iconv_close(cd);
+ return 0;
+ }
+
+ ret = FFMIN(AVERROR(errno), -1);
+ iconv_close(cd);
+ }
+
+ av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
+ "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
+ av_free_packet(&tmp);
+
+end:
+ if (cd != (iconv_t)-1)
+ iconv_close(cd);
+ return ret;
+#else
+ av_log(avctx, AV_LOG_ERROR, "requesting subtitles recoding without iconv");
+ return AVERROR(EINVAL);
+#endif
}
int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
{
AVPacket *sub = q->subs + q->current_sub_idx;
+ int ret;
if (q->current_sub_idx == q->nb_subs)
return AVERROR_EOF;
- if (av_copy_packet(pkt, sub) < 0) {
- return AVERROR(ENOMEM);
+ if ((ret = recode_subtitle(q->avctx, pkt, sub)) < 0) {
+ return ret;
}
pkt->dts = pkt->pts;
diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h
index eb719ea..69ced11 100644
--- a/libavformat/subtitles.h
+++ b/libavformat/subtitles.h
@@ -100,6 +100,7 @@ int ff_text_peek_r8(FFTextReader *r);
void ff_text_read(FFTextReader *r, char *buf, size_t size);
typedef struct {
+ AVFormatContext *avctx; ///< AVFormat context; used for charenc parameters
AVPacket *subs; ///< array of subtitles packets
int nb_subs; ///< number of subtitles packets
int allocated_size; ///< allocated size for subs
diff --git a/libavformat/subviewer1dec.c b/libavformat/subviewer1dec.c
index 6b38533..35303ce 100644
--- a/libavformat/subviewer1dec.c
+++ b/libavformat/subviewer1dec.c
@@ -47,6 +47,8 @@ static int subviewer1_read_header(AVFormatContext *s)
SubViewer1Context *subviewer1 = s->priv_data;
AVStream *st = avformat_new_stream(s, NULL);
+ subviewer1->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 1);
diff --git a/libavformat/subviewerdec.c b/libavformat/subviewerdec.c
index f1b0fdf..1197a0c 100644
--- a/libavformat/subviewerdec.c
+++ b/libavformat/subviewerdec.c
@@ -76,6 +76,8 @@ static int subviewer_read_header(AVFormatContext *s)
int duration = -1;
AVPacket *sub = NULL;
+ subviewer->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 100);
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 5a2a72d..052b2fc 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -3597,6 +3597,8 @@ void avformat_free_context(AVFormatContext *s)
av_dict_free(&s->metadata);
av_freep(&s->streams);
av_freep(&s->internal);
+ while (s->nb_sub_charenc_guesses--)
+ av_freep(&s->sub_charenc_guesses[s->nb_sub_charenc_guesses]);
flush_packet_queue(s);
av_free(s);
}
diff --git a/libavformat/vplayerdec.c b/libavformat/vplayerdec.c
index 619ccfd..7cd3363 100644
--- a/libavformat/vplayerdec.c
+++ b/libavformat/vplayerdec.c
@@ -59,6 +59,8 @@ static int vplayer_read_header(AVFormatContext *s)
VPlayerContext *vplayer = s->priv_data;
AVStream *st = avformat_new_stream(s, NULL);
+ vplayer->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 100);
diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
index e457e8f..4d82cca 100644
--- a/libavformat/webvttdec.c
+++ b/libavformat/webvttdec.c
@@ -64,6 +64,8 @@ static int webvtt_read_header(AVFormatContext *s)
int res = 0;
AVStream *st = avformat_new_stream(s, NULL);
+ webvtt->q.avctx = s;
+
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 64, 1, 1000);
--
1.9.1
More information about the ffmpeg-devel
mailing list