[FFmpeg-devel] [PATCH 2/2] refactored semidec

wm4 nfxjfg at googlemail.com
Fri Apr 10 09:50:37 CEST 2015


On Thu,  9 Apr 2015 23:54:23 -0700
Yayoi <yayoi.ukai at gmail.com> wrote:

> ---
>  libavcodec/Makefile        |  2 +-
>  libavcodec/htmlsubtitles.c |  6 ++++++
>  libavcodec/samidec.c       | 52 ++++++++++++++++------------------------------
>  3 files changed, 25 insertions(+), 35 deletions(-)
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 8384458..8e780ad 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -422,7 +422,7 @@ OBJS-$(CONFIG_RV20_DECODER)            += rv10.o
>  OBJS-$(CONFIG_RV20_ENCODER)            += rv20enc.o
>  OBJS-$(CONFIG_RV30_DECODER)            += rv30.o rv34.o rv30dsp.o rv34dsp.o
>  OBJS-$(CONFIG_RV40_DECODER)            += rv40.o rv34.o rv34dsp.o rv40dsp.o
> -OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o
> +OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o htmlsubtitles.o
>  OBJS-$(CONFIG_S302M_DECODER)           += s302m.o
>  OBJS-$(CONFIG_S302M_ENCODER)           += s302menc.o
>  OBJS-$(CONFIG_SANM_DECODER)            += sanm.o
> diff --git a/libavcodec/htmlsubtitles.c b/libavcodec/htmlsubtitles.c
> index 7eeec98..54a9707 100644
> --- a/libavcodec/htmlsubtitles.c
> +++ b/libavcodec/htmlsubtitles.c
> @@ -93,6 +93,11 @@ void ff_htmlmarkup_to_ass(AVCodecContext *avctx, AVBPrint *dst, const char *in)
>                  av_bprint_chars(dst, *in, 1);
>              break;
>          case '<':
> +            if (!av_strncasecmp(in, "<BR", 3)){
> +                    av_bprintf(dst, "\\N");
> +                    in += 4;
> +            }
> +
>              tag_close = in[1] == '/';
>              len = 0;
>              if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) {
> @@ -175,5 +180,6 @@ void ff_htmlmarkup_to_ass(AVCodecContext *avctx, AVBPrint *dst, const char *in)
>      while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2))
>          dst->len -= 2;
>      dst->str[dst->len] = 0;
> +
>      rstrip_spaces_buf(dst);
>  }
> \ No newline at end of file
> diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
> index 47850e2..df84bd5 100644
> --- a/libavcodec/samidec.c
> +++ b/libavcodec/samidec.c
> @@ -28,6 +28,13 @@
>  #include "libavutil/avstring.h"
>  #include "libavutil/bprint.h"
>  
> +
> +#include "libavutil/common.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/parseutils.h"
> +#include "avcodec.h"
> +#include "htmlsubtitles.h"
> +
>  typedef struct {
>      AVBPrint source;
>      AVBPrint content;
> @@ -40,28 +47,27 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
>      int ret = 0;
>      char *tag = NULL;
>      char *dupsrc = av_strdup(src);
> -    char *p = dupsrc;
> +    char *in = dupsrc;
>  
>      av_bprint_clear(&sami->content);
>      for (;;) {
>          char *saveptr = NULL;
> -        int prev_chr_is_space = 0;
>          AVBPrint *dst = &sami->content;
>  
>          /* parse & extract paragraph tag */
> -        p = av_stristr(p, "<P");
> -        if (!p)
> +        in = av_stristr(in, "<P");
> +        if (!in)
>              break;
> -        if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
> -            p++;
> +        if (in[2] != '>' && !av_isspace(in[2])) { // avoid confusion with tags such as <PRE>
> +            in++;
>              continue;
>          }
>          if (dst->len) // add a separator with the previous paragraph if there was one
>              av_bprintf(dst, "\\N");
> -        tag = av_strtok(p, ">", &saveptr);
> +        tag = av_strtok(in, ">", &saveptr);
>          if (!tag || !saveptr)
>              break;
> -        p = saveptr;
> +        in = saveptr;
>  
>          /* check if the current paragraph is the "source" (speaker name) */
>          if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
> @@ -70,36 +76,14 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
>          }
>  
>          /* if empty event -> skip subtitle */
> -        while (av_isspace(*p))
> -            p++;
> -        if (!strncmp(p, " ", 6)) {
> +        while (av_isspace(*in))
> +            in++;
> +        if (!strncmp(in, " ", 6)) {
>              ret = -1;
>              goto end;
>          }
>  
> -        /* extract the text, stripping most of the tags */
> -        while (*p) {
> -            if (*p == '<') {
> -                if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
> -                    break;
> -                if (!av_strncasecmp(p, "<BR", 3))
> -                    av_bprintf(dst, "\\N");
> -                p++;
> -                while (*p && *p != '>')
> -                    p++;
> -                if (!*p)
> -                    break;
> -                if (*p == '>')
> -                    p++;
> -                continue;
> -            }
> -            if (!av_isspace(*p))
> -                av_bprint_chars(dst, *p, 1);
> -            else if (!prev_chr_is_space)
> -                av_bprint_chars(dst, ' ', 1);
> -            prev_chr_is_space = av_isspace(*p);
> -            p++;
> -        }
> +        ff_htmlmarkup_to_ass(avctx, dst, in);
>      }
>  
>      av_bprint_clear(&sami->full);

So... what exactly does this do? Use the same parser for srt and sami?
How does this work out, aren't they different formats? (Such things
could be written in the commit message.)


More information about the ffmpeg-devel mailing list