[FFmpeg-devel] [PATCH v2 2/6] avformat/srtdec: UTF-16 support

wm4 nfxjfg at googlemail.com
Thu Sep 4 22:40:23 CEST 2014


---
ff_subtitles_read_line() now returns an error if a 0 byte is
encountered. This is faster and could be considered more correct.
It also handles UTF recode errors better.
---
 libavformat/srtdec.c    | 25 +++++++++++++-----------
 libavformat/subtitles.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++--
 libavformat/subtitles.h | 36 +++++++++++++++++++++++++++++++---
 3 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/libavformat/srtdec.c b/libavformat/srtdec.c
index 53182cd..b63d344 100644
--- a/libavformat/srtdec.c
+++ b/libavformat/srtdec.c
@@ -31,20 +31,21 @@ typedef struct {
 
 static int srt_probe(AVProbeData *p)
 {
-    const unsigned char *ptr = p->buf;
     int i, v, num = 0;
+    FFTextReader tr;
 
-    if (AV_RB24(ptr) == 0xEFBBBF)
-        ptr += 3;  /* skip UTF-8 BOM */
+    ff_text_init_buf(&tr, p->buf, p->buf_size);
 
-    while (*ptr == '\r' || *ptr == '\n')
-        ptr++;
+    while (ff_text_peek_r8(&tr) == '\r' || ff_text_peek_r8(&tr) == '\n')
+        ff_text_r8(&tr);
     for (i=0; i<2; i++) {
+        char buf[128];
+        if (ff_subtitles_read_line(&tr, buf, sizeof(buf)) < 0)
+            break;
         if ((num == i || num + 1 == i)
-            && sscanf(ptr, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1)
+            && sscanf(buf, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1)
             return AVPROBE_SCORE_MAX;
-        num = atoi(ptr);
-        ptr += ff_subtitles_next_line(ptr);
+        num = atoi(buf);
     }
     return 0;
 }
@@ -79,6 +80,8 @@ static int srt_read_header(AVFormatContext *s)
     AVBPrint buf;
     AVStream *st = avformat_new_stream(s, NULL);
     int res = 0;
+    FFTextReader tr;
+    ff_text_init_avio(&tr, s->pb);
 
     if (!st)
         return AVERROR(ENOMEM);
@@ -88,11 +91,11 @@ static int srt_read_header(AVFormatContext *s)
 
     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
 
-    while (!avio_feof(s->pb)) {
-        ff_subtitles_read_chunk(s->pb, &buf);
+    while (!ff_text_eof(&tr)) {
+        ff_subtitles_read_text_chunk(&tr, &buf);
 
         if (buf.len) {
-            int64_t pos = avio_tell(s->pb);
+            int64_t pos = ff_text_pos(&tr);
             int64_t pts;
             int duration;
             const char *ptr = buf.str;
diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index cebd453..05d52c3 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -86,6 +86,23 @@ void ff_text_read(FFTextReader *r, char *buf, size_t size)
         *buf++ = ff_text_r8(r);
 }
 
+int ff_text_eof(FFTextReader *r)
+{
+    return r->buf_pos >= r->buf_len && (!r->pb || avio_feof(r->pb));
+}
+
+int ff_text_peek_r8(FFTextReader *r)
+{
+    int c;
+    if (r->buf_pos < r->buf_len)
+        return r->buf[r->buf_pos];
+    c = ff_text_r8(r);
+    r->buf_pos = 0;
+    r->buf_len = 1;
+    r->buf[0] = c;
+    return c;
+}
+
 AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
                                     const uint8_t *event, int len, int merge)
 {
@@ -303,7 +320,7 @@ static inline int is_eol(char c)
     return c == '\r' || c == '\n';
 }
 
-void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
+void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf)
 {
     char eol_buf[5], last_was_cr = 0;
     int n = 0, i = 0, nb_eol = 0;
@@ -311,7 +328,7 @@ void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
     av_bprint_clear(buf);
 
     for (;;) {
-        char c = avio_r8(pb);
+        char c = ff_text_r8(tr);
 
         if (!c)
             break;
@@ -344,3 +361,33 @@ void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
         n++;
     }
 }
+
+void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
+{
+    FFTextReader tr;
+    tr.buf_pos = tr.buf_len = 0;
+    tr.type = 0;
+    tr.pb = pb;
+    ff_subtitles_read_text_chunk(&tr, buf);
+}
+
+ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
+{
+    size_t cur = 0;
+    if (!size)
+        return 0;
+    while (cur + 1 < size) {
+        unsigned char c = ff_text_r8(tr);
+        if (!c)
+            return AVERROR_INVALIDDATA;
+        if (c == '\r' || c == '\n')
+            break;
+        buf[cur++] = c;
+        buf[cur] = '\0';
+    }
+    if (ff_text_peek_r8(tr) == '\r')
+        ff_text_r8(tr);
+    if (ff_text_peek_r8(tr) == '\n')
+        ff_text_r8(tr);
+    return cur;
+}
diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h
index 317e40a..d70c9ef 100644
--- a/libavformat/subtitles.h
+++ b/libavformat/subtitles.h
@@ -22,6 +22,7 @@
 #define AVFORMAT_SUBTITLES_H
 
 #include <stdint.h>
+#include <stddef.h>
 #include "avformat.h"
 #include "libavutil/bprint.h"
 
@@ -82,6 +83,18 @@ int64_t ff_text_pos(FFTextReader *r);
 int ff_text_r8(FFTextReader *r);
 
 /**
+ * Return non-zero if EOF was reached.
+ */
+int ff_text_eof(FFTextReader *r);
+
+/**
+ * Like ff_text_r8(), but don't remove the byte from the buffer.
+ *
+ * @note ff_text_eof might incorrectly return true after calling this
+ */
+int ff_text_peek_r8(FFTextReader *r);
+
+/**
  * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are
  * written.
  */
@@ -145,19 +158,24 @@ int ff_smil_extract_next_chunk(AVIOContext *pb, AVBPrint *buf, char *c);
 const char *ff_smil_get_attr_ptr(const char *s, const char *attr);
 
 /**
- * @brief Read a subtitles chunk.
+ * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext.
+ */
+void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf);
+
+/**
+ * @brief Read a subtitles chunk from FFTextReader.
  *
  * A chunk is defined by a multiline "event", ending with a second line break.
  * The trailing line breaks are trimmed. CRLF are supported.
  * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb
  * will focus on the 'n' of the "next" string.
  *
- * @param pb  I/O context
+ * @param tr  I/O context
  * @param buf an initialized buf where the chunk is written
  *
  * @note buf is cleared before writing into it.
  */
-void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf);
+void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf);
 
 /**
  * Get the number of characters to increment to jump to the next line, or to
@@ -178,4 +196,16 @@ static av_always_inline int ff_subtitles_next_line(const char *ptr)
     return n;
 }
 
+/**
+ * Read a line of text. Discards line ending characters.
+ * The function handles the following line breaks schemes:
+ * LF, CRLF (MS), or standalone CR (old MacOS).
+ *
+ * Returns the number of bytes written to buf. Always writes a terminating 0,
+ * similar as with snprintf.
+ *
+ * @note returns a negative error code if a \0 byte is found
+ */
+ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size);
+
 #endif /* AVFORMAT_SUBTITLES_H */
-- 
2.1.0



More information about the ffmpeg-devel mailing list