[FFmpeg-devel] [PATCH v2] avformat/oggdec: Add page CRC verification

Mattias Wadman mattias.wadman at gmail.com
Tue Apr 28 14:15:09 EEST 2020


On Tue, Apr 28, 2020 at 1:06 PM Mattias Wadman <mattias.wadman at gmail.com> wrote:
>
> Fixes seek issue with ogg files that have segment data that happens to be
> encoded as a "OggS" page syncword. Very unlikely but seems to happen.
>
> Have been observed to happen with ffmpeg+libvorbis and oggenc encoding
> to 441khz stereo at 160kbps.
> ---
>  libavformat/oggdec.c | 136 +++++++++++++++++++++++++++++--------------
>  1 file changed, 92 insertions(+), 44 deletions(-)
>
> diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
> index 95190589ab..0213f1fa12 100644
> --- a/libavformat/oggdec.c
> +++ b/libavformat/oggdec.c
> @@ -31,6 +31,8 @@
>  #include <stdio.h>
>  #include "libavutil/avassert.h"
>  #include "libavutil/intreadwrite.h"
> +#include "libavutil/crc.h"
> +#include "libavcodec/bytestream.h"
>  #include "oggdec.h"
>  #include "avformat.h"
>  #include "internal.h"
> @@ -205,32 +207,30 @@ static const struct ogg_codec *ogg_find_codec(uint8_t *buf, int size)
>   * situation where a new audio stream spawn (identified with a new serial) and
>   * must replace the previous one (track switch).
>   */
> -static int ogg_replace_stream(AVFormatContext *s, uint32_t serial, int nsegs)
> +static int ogg_replace_stream(AVFormatContext *s, uint32_t serial, uint8_t *segmentsdata, int size)
>  {
>      struct ogg *ogg = s->priv_data;
>      struct ogg_stream *os;
>      const struct ogg_codec *codec;
>      int i = 0;
> +    int magiclen = 8;
>
> -    if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
> -        uint8_t magic[8];
> -        int64_t pos = avio_tell(s->pb);
> -        avio_skip(s->pb, nsegs);
> -        if (avio_read(s->pb, magic, sizeof(magic)) != sizeof(magic))
> -            return AVERROR_INVALIDDATA;
> -        avio_seek(s->pb, pos, SEEK_SET);
> -        codec = ogg_find_codec(magic, sizeof(magic));
> -        if (!codec) {
> -            av_log(s, AV_LOG_ERROR, "Cannot identify new stream\n");
> -            return AVERROR_INVALIDDATA;
> -        }
> -        for (i = 0; i < ogg->nstreams; i++) {
> -            if (ogg->streams[i].codec == codec)
> -                break;
> -        }
> -        if (i >= ogg->nstreams)
> -            return ogg_new_stream(s, serial);
> -    } else if (ogg->nstreams != 1) {
> +    if (size < magiclen)
> +        return AVERROR_INVALIDDATA;
> +
> +    codec = ogg_find_codec(segmentsdata, magiclen);
> +    if (!codec) {
> +        av_log(s, AV_LOG_ERROR, "Cannot identify new stream\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +    for (i = 0; i < ogg->nstreams; i++) {
> +        if (ogg->streams[i].codec == codec)
> +            break;
> +    }
> +    if (i >= ogg->nstreams)
> +        return ogg_new_stream(s, serial);
> +
> +    if (ogg->nstreams != 1) {
>          avpriv_report_missing_feature(s, "Changing stream parameters in multistream ogg");
>          return AVERROR_PATCHWELCOME;
>      }

Not sure what the correct behaviour should be here now. Currently this
check is only done if
the io context is not seekable. But now ogg_replace_stream does need
to seek as it has all data.

> @@ -339,14 +339,24 @@ static int ogg_read_page(AVFormatContext *s, int *sid)
>      AVIOContext *bc = s->pb;
>      struct ogg *ogg = s->priv_data;
>      struct ogg_stream *os;
> -    int ret, i = 0;
> -    int flags, nsegs;
> +    int ret, i;
> +    int version, flags, nsegs;
>      uint64_t gp;
>      uint32_t serial;
> +    uint32_t crc;
>      int size, idx;
>      uint8_t sync[4];
> -    int sp = 0;
> -
> +    uint8_t header[23];
> +    GetByteContext headergb;
> +    uint8_t segments[255];
> +    uint8_t *segmentsdata;
> +    int sp;
> +    const AVCRC *crc_table;
> +    uint32_t ccrc;
> +
> +again:
> +    sp = 0;
> +    i = 0;
>      ret = avio_read(bc, sync, 4);
>      if (ret < 4)
>          return ret < 0 ? ret : AVERROR_EOF;
> @@ -378,47 +388,87 @@ static int ogg_read_page(AVFormatContext *s, int *sid)
>          return AVERROR_INVALIDDATA;
>      }
>
> -    if (avio_r8(bc) != 0) {      /* version */
> -        av_log (s, AV_LOG_ERROR, "ogg page, unsupported version\n");
> +    ret = avio_read(bc, header, sizeof(header));
> +    if (ret < sizeof(header))
> +        return AVERROR_INVALIDDATA;
> +    nsegs = header[22];
> +    ret = avio_read(bc, segments, nsegs);
> +    if (ret < nsegs)
> +        return AVERROR_INVALIDDATA;
> +    size = 0;
> +    for (i = 0; i < nsegs; i++)
> +        size += segments[i];
> +
> +    bytestream2_init(&headergb, header, sizeof(header));
> +    version = bytestream2_get_byte(&headergb);
> +    flags   = bytestream2_get_byte(&headergb);
> +    gp      = bytestream2_get_le64(&headergb);
> +    serial  = bytestream2_get_le32(&headergb);
> +    bytestream2_skip(&headergb, 4); // seq le32
> +    crc     = bytestream2_get_le32(&headergb);
> +
> +    segmentsdata = av_malloc(size);
> +    if (!segmentsdata)
> +        return AVERROR(ENOMEM);
> +    ret = avio_read(bc, segmentsdata, size);
> +    if (ret < size) {
> +        av_freep(&segmentsdata);

Lots if av_freep(&segmentsdata); now. Maybe should refactor to use
RETURN_ERROR macro?

>          return AVERROR_INVALIDDATA;
>      }
>
> -    flags  = avio_r8(bc);
> -    gp     = avio_rl64(bc);
> -    serial = avio_rl32(bc);
> -    avio_skip(bc, 8); /* seq, crc */
> -    nsegs  = avio_r8(bc);
> +    // Reset CRC in header to zero and calculate for whole page
> +    crc_table = av_crc_get_table(AV_CRC_32_IEEE);
> +    memset(&header[18], 0, 4);
> +    ccrc = 0;
> +    ccrc = av_crc(crc_table, ccrc, "OggS", 4);
> +    ccrc = av_crc(crc_table, ccrc, header, sizeof(header));
> +    ccrc = av_crc(crc_table, ccrc, segments, nsegs);
> +    ccrc = av_crc(crc_table, ccrc, segmentsdata, size);
> +    // Default AV_CRC_32_IEEE table is BE
> +    ccrc = av_bswap32(ccrc);
> +
> +    if (ccrc != crc) {
> +        av_log(s, AV_LOG_TRACE, "ogg page, invalid checksum %x != %x\n", ccrc, crc);
> +        if (s->error_recognition & AV_EF_CRCCHECK) {
> +            av_freep(&segmentsdata);
> +            // TODO: smarter use of read data?
> +            goto again;
> +        }
> +    }
>
> -    if (avio_feof(bc))
> -        return AVERROR_EOF;
> +    if (version != 0) {
> +        av_log(s, AV_LOG_ERROR, "ogg page, unsupported version %d\n", version);
> +        av_freep(&segmentsdata);
> +        return AVERROR_INVALIDDATA;
> +    }
>
>      idx = ogg_find_stream(ogg, serial);
>      if (idx < 0) {
>          if (data_packets_seen(ogg))
> -            idx = ogg_replace_stream(s, serial, nsegs);
> +            idx = ogg_replace_stream(s, serial, segmentsdata, size);
>          else
>              idx = ogg_new_stream(s, serial);
>
>          if (idx < 0) {
>              av_log(s, AV_LOG_ERROR, "failed to create or replace stream\n");
> +            av_freep(&segmentsdata);
>              return idx;
>          }
>      }
>
>      os = ogg->streams + idx;
>      ogg->page_pos =
> -    os->page_pos = avio_tell(bc) - 27;
> +    os->page_pos = avio_tell(bc) - 27 - size - nsegs;;

Thinking maybe should store page pos earlier and skip all the
arithmetic here? .. now i see a double ";" hmm

>
>      if (os->psize > 0) {
>          ret = ogg_new_buf(ogg, idx);
> -        if (ret < 0)
> +        if (ret < 0) {
> +            av_freep(&segmentsdata);
>              return ret;
> +        }
>      }
>
> -    ret = avio_read(bc, os->segments, nsegs);
> -    if (ret < nsegs)
> -        return ret < 0 ? ret : AVERROR_EOF;
> -
> +    memcpy(os->segments, segments, nsegs);
>      os->nsegs = nsegs;
>      os->segp  = 0;
>
> @@ -456,10 +506,8 @@ static int ogg_read_page(AVFormatContext *s, int *sid)
>          os->buf = nb;
>      }
>
> -    ret = avio_read(bc, os->buf + os->bufpos, size);
> -    if (ret < size)
> -        return ret < 0 ? ret : AVERROR_EOF;
> -
> +    memcpy(os->buf + os->bufpos, segmentsdata, size);
> +    av_freep(&segmentsdata);
>      os->bufpos += size;
>      os->granule = gp;
>      os->flags   = flags;
> --
> 2.18.0
>


More information about the ffmpeg-devel mailing list