[FFmpeg-devel] [PATCH 1/1] libavcodec/movtextdec.c: fixing decoding for UTF-8 (ticket 6021)
Erik BrĂ¥then Solem
erikbsolem at hotmail.com
Sat Dec 17 05:39:02 EET 2016
Character offsets were interpreted as byte offsets, resulting in misplaced
styling tags where multibyte characters were involved. The entire subtitle
stream would even be rendered invalid if such a misplaced tag happened to
split a multibyte character. This patch fixes this for UTF-8; UTF-16 was and
still is broken. These are the only supported encodings according to the spec.
---
libavcodec/movtextdec.c | 95 +++++++++++++++++++++++++++----------------------
1 file changed, 53 insertions(+), 42 deletions(-)
diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 7b5b161..6e1ff73 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -328,6 +328,7 @@ static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
int i = 0;
int j = 0;
int text_pos = 0;
+ int text_pos_chars = 0;
if (text < text_end && m->box_flags & TWRP_BOX) {
if (m->w.wrap_flag == 1) {
@@ -338,50 +339,59 @@ static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
}
while (text < text_end) {
- if (m->box_flags & STYL_BOX) {
- for (i = 0; i < m->style_entries; i++) {
- if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
- av_bprintf(buf, "{\\r}");
+ if ((*text & 0xC0) != 0x80) { // Boxes never split multibyte characters
+ if (m->box_flags & STYL_BOX) {
+ for (i = 0; i < m->style_entries; i++) {
+ if (m->s[i]->style_flag &&
+ text_pos_chars == m->s[i]->style_end)
+ {
+ av_bprintf(buf, "{\\r}");
+ }
}
- }
- for (i = 0; i < m->style_entries; i++) {
- if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
- if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
- av_bprintf(buf, "{\\b1}");
- if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
- av_bprintf(buf, "{\\i1}");
- if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
- av_bprintf(buf, "{\\u1}");
- av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
- for (j = 0; j < m->ftab_entries; j++) {
- if (m->s[i]->style_fontID == m->ftab[j]->fontID)
- av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+ for (i = 0; i < m->style_entries; i++) {
+ if (m->s[i]->style_flag
+ && text_pos_chars == m->s[i]->style_start)
+ {
+ if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
+ av_bprintf(buf, "{\\b1}");
+ if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
+ av_bprintf(buf, "{\\i1}");
+ if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
+ av_bprintf(buf, "{\\u1}");
+ /* (No need to print font style if equal to default?) */
+ av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
+ for (j = 0; j < m->ftab_entries; j++) {
+ if (m->s[i]->style_fontID == m->ftab[j]->fontID)
+ av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+ }
}
}
}
- }
- if (m->box_flags & HLIT_BOX) {
- if (text_pos == m->h.hlit_start) {
- /* If hclr box is present, set the secondary color to the color
- * specified. Otherwise, set primary color to white and secondary
- * color to black. These colors will come from TextSampleModifier
- * boxes in future and inverse video technique for highlight will
- * be implemented.
- */
- if (m->box_flags & HCLR_BOX) {
- av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
- m->c.hlit_color[1], m->c.hlit_color[0]);
- } else {
- av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
+ if (m->box_flags & HLIT_BOX) {
+ if (text_pos_chars == m->h.hlit_start) {
+ /* If hclr box is present, set the secondary color to the
+ * color specified. Otherwise, set primary color to white
+ * and secondary color to black. These colors will come from
+ * TextSampleModifier boxes in future and inverse video
+ * technique for highlight will be implemented.
+ */
+ if (m->box_flags & HCLR_BOX) {
+ av_bprintf(buf, "{\\2c&H%02x%02x%02x&}",
+ m->c.hlit_color[2], m->c.hlit_color[1],
+ m->c.hlit_color[0]);
+ } else {
+ av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
+ }
}
- }
- if (text_pos == m->h.hlit_end) {
- if (m->box_flags & HCLR_BOX) {
- av_bprintf(buf, "{\\2c&H000000&}");
- } else {
- av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
+ if (text_pos_chars == m->h.hlit_end) {
+ if (m->box_flags & HCLR_BOX) {
+ av_bprintf(buf, "{\\2c&H000000&}");
+ } else {
+ av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
+ }
}
}
+ text_pos_chars++;
}
switch (*text) {
@@ -412,10 +422,10 @@ static int mov_text_init(AVCodecContext *avctx) {
MovTextContext *m = avctx->priv_data;
ret = mov_text_tx3g(avctx, m);
if (ret == 0) {
- return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color,
- m->d.back_color, m->d.bold, m->d.italic,
- m->d.underline, ASS_DEFAULT_BORDERSTYLE,
- m->d.alignment);
+ return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize,
+ m->d.color, m->d.back_color, m->d.bold,
+ m->d.italic, m->d.underline,
+ ASS_DEFAULT_BORDERSTYLE, m->d.alignment);
} else
return ff_ass_subtitle_header_default(avctx);
}
@@ -491,7 +501,8 @@ static int mov_text_decode_frame(AVCodecContext *avctx,
for (size_t i = 0; i < box_count; i++) {
if (tsmb_type == box_types[i].type) {
- if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
+ if (m->tracksize + m->size_var +
+ ox_types[i].base_size > avpkt->size)
break;
ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
if (ret_tsmb == -1)
--
1.9.5 (Apple Git-50.3)
More information about the ffmpeg-devel
mailing list