[FFmpeg-devel] [PATCH 1/2] avcodec/s302m: enable non-PCM decoding

Thu Jan 25 06:59:49 EET 2024

Gyan Doshi:
> Set up framework for non-PCM decoding in-place and
> add support for Dolby-E decoding.
> 
> Useful for direct transcoding of non-PCM audio in live inputs.
> ---
>  configure          |   1 +
>  doc/decoders.texi  |  40 +++
>  libavcodec/s302m.c | 609 +++++++++++++++++++++++++++++++++++++--------
>  3 files changed, 543 insertions(+), 107 deletions(-)
> 
> diff --git a/configure b/configure
> index c8ae0a061d..8db3fa3f4b 100755
> --- a/configure
> +++ b/configure
> @@ -2979,6 +2979,7 @@ rv20_decoder_select="h263_decoder"
>  rv20_encoder_select="h263_encoder"
>  rv30_decoder_select="golomb h264pred h264qpel mpegvideodec rv34dsp"
>  rv40_decoder_select="golomb h264pred h264qpel mpegvideodec rv34dsp"
> +s302m_decoder_select="dolby_e_decoder"
>  screenpresso_decoder_deps="zlib"
>  shorten_decoder_select="bswapdsp"
>  sipr_decoder_select="lsp"
> diff --git a/doc/decoders.texi b/doc/decoders.texi
> index 293c82c2ba..9f85c876bf 100644
> --- a/doc/decoders.texi
> +++ b/doc/decoders.texi
> @@ -347,6 +347,46 @@ configuration. You need to explicitly configure the build with
>  An FFmpeg native decoder for Opus exists, so users can decode Opus
>  without this library.
>  
> + at section s302m
> +
> +SMPTE ST 302 decoder.
> +
> +SMPTE ST 302 is a method for storing AES3 data format within an MPEG Transport
> +Stream. AES3 streams can contain LPCM streams of 2, 4, 6 or 8 channels with a
> +bit depth of 16, 20 or 24-bits at a sample rate of 48 kHz.
> +They can also contain non-PCM codec streams such as AC-3 or Dolby-E.
> +

This sounds like we should add bitstream filters to extract the proper
underlying streams instead.
(I see only two problems with this approach: The BSF API needs to set
the CodecID of the output during init, but at this point no packet has
reached the BSF to determine it. And changing codec IDs mid-stream is
also not supported.)

> +Decoding non-PCM streams directly requires that the necessary stream decoder be
> +present in the build. At present, only Dolby-E decoding is supported.
> +
> + at subsection Options
> +
> +The following options are supported by the s302m decoder.
> +
> + at table @option
> + at item non_pcm_mode @var{mode}
> +Specify how to process non-PCM streams
> +
> + at table @samp
> + at item copy
> +Treat data as if it were LPCM.
> + at item drop
> +Discard the stream.
> + at item decode_copy
> +Decode if possible eise treat the same as @code{copy}.
> + at item decode_drop
> +Decode if possible eise treat the same as @code{drop}.
> + at end table
> +
> +The default value is @code{decode_drop}. This option does not affect the processing of
> +LPCM streams.
> +
> + at item non_pcm_options @var{options}
> +Set options for non-PCM decoder using a list of key=value pairs separated by ":".
> +Consult the docs for the non-PCM decoder for its options.
> +
> + at end table
> +
>  @c man end AUDIO DECODERS
>  
>  @chapter Subtitles Decoders
> diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
> index f1b41608f3..d6a75cfa73 100644
> --- a/libavcodec/s302m.c
> +++ b/libavcodec/s302m.c
> @@ -24,21 +24,264 @@
>  #include "libavutil/intreadwrite.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/log.h"
> +#include "libavutil/dict.h"
>  #include "libavutil/reverse.h"
>  #include "avcodec.h"
>  #include "codec_internal.h"
> +#include "get_bits.h"
>  #include "decode.h"
>  
>  #define AES3_HEADER_LEN 4
>  
> +#define NONPCMSYNC_16MARKER      0x4E1F0F8720
> +#define NONPCMSYNC_20MARKER      0x4E1F60F872A0
> +#define NONPCMSYNC_24MARKER      0x7E1F690F872A50
> +
> +#define NONPCMSYNC_16_IN_20MARKER      0x04E1F00F8720
> +#define NONPCMSYNC_20_IN_24MARKER      0x04E1F600F872A0
> +
> +#define IS_NONPCMSYNC_16(state)   ((state & 0xFFFF0FFFF0)     == NONPCMSYNC_16MARKER)
> +#define IS_NONPCMSYNC_20(state)   ((state & 0xFFFFF0FFFFF0)   == NONPCMSYNC_20MARKER)
> +#define IS_NONPCMSYNC_24(state)   ((state & 0xFFFFFF0FFFFFF0) == NONPCMSYNC_24MARKER)
> +
> +#define IS_NONPCMSYNC_16_IN_20(state)   ((state & 0x0FFFF00FFFF0)   == NONPCMSYNC_16_IN_20MARKER)
> +#define IS_NONPCMSYNC_20_IN_24(state)   ((state & 0x0FFFFF00FFFFF0) == NONPCMSYNC_20_IN_24MARKER)
> +
> +#define IS_NONPCMSYNC(bit,state)  ( ((bit == 16) &&  IS_NONPCMSYNC_16(state)) || \
> +                                    ((bit == 20) && (IS_NONPCMSYNC_20(state) || IS_NONPCMSYNC_16_IN_20(state))) || \
> +                                    ((bit == 24) && (IS_NONPCMSYNC_24(state) || IS_NONPCMSYNC_20_IN_24(state))) \
> +                                  )
> +
> +enum non_pcm_modes {
> +    NON_PCM_COPY,
> +    NON_PCM_DROP,
> +    NON_PCM_DEC_ELSE_COPY,
> +    NON_PCM_DEC_ELSE_DROP,
> +};
> +
>  typedef struct S302Context {
>      AVClass *class;
> +
> +    int avctx_props_set;
> +
> +    int channels;
> +    int bits;
> +
>      int non_pcm_mode;
> +    int non_pcm_data_type;
> +    int non_pcm_bits;
> +    int non_pcm_dec;
> +
> +    AVCodecContext *non_pcm_ctx;
> +    AVDictionary   *non_pcm_opts;
> +    AVPacket *packet;
> +    AVFrame  *frame;
>  } S302Context;
>  
> +static av_cold int s302m_init(AVCodecContext *avctx)
> +{
> +    S302Context *s = avctx->priv_data;
> +
> +    s->non_pcm_data_type = -1;
> +
> +    return 0;
> +}
> +
> +static int s302m_non_pcm_inspect(AVCodecContext *avctx, const uint8_t *buf, int buf_size,
> +                                  int *offset, int *length)
> +{
> +    S302Context *s = avctx->priv_data;
> +    GetBitContext gb;
> +    int ret, aes_frm_size, data_type, length_code = 0;
> +    uint64_t state = 0;
> +    uint32_t size;
> +
> +    if (s->channels != 2) {
> +        goto end;
> +    }
> +
> +    ret = init_get_bits8(&gb, buf, buf_size);
> +    if (ret < 0)
> +        return ret;
> +
> +    aes_frm_size = (s->bits + 4) * 2 / 8;
> +    if (buf_size < aes_frm_size * 2)  // not enough to contain data_type & length_code
> +        return AVERROR_INVALIDDATA;
> +
> +    state = get_bits64(&gb, aes_frm_size * 8);
> +
> +    while (!IS_NONPCMSYNC(s->bits,state) && (get_bits_left(&gb) >= 8))
> +        state = (state << 8) | get_bits(&gb, 8);

Reading byte-aligned data with a GetBit context is very suboptimal.

> +
> +    if (IS_NONPCMSYNC(s->bits,state)) {
> +        if (get_bits_left(&gb) < aes_frm_size * 8) {
> +            av_log(avctx, AV_LOG_ERROR, "truncated non-pcm frame detected\n");
> +            return AVERROR_INVALIDDATA;
> +        }
> +
> +        if (s->bits == 16) {
> +            s->non_pcm_bits = 16;
> +        } else if (s->bits == 20) {
> +            if (IS_NONPCMSYNC_16_IN_20(state))
> +                s->non_pcm_bits = 16;
> +            else
> +                s->non_pcm_bits = 20;
> +        } else if (s->bits == 24) {
> +            if (IS_NONPCMSYNC_20_IN_24(state))
> +                s->non_pcm_bits = 20;
> +            else
> +                s->non_pcm_bits = 24;
> +        }
> +
> +        skip_bits(&gb, s->bits - 16);
> +
> +        data_type = ff_reverse[(uint8_t)get_bits(&gb, 5)] >> 3;
> +
> +        if (s->non_pcm_data_type == -1) {
> +            s->non_pcm_data_type = data_type;
> +            av_log(avctx, AV_LOG_INFO, "stream has non-pcm data of type %d with %d-bit words in aes3 payload of size %d bits\n", data_type, s->non_pcm_bits, s->bits);
> +        } else if (s->non_pcm_data_type != data_type) {
> +            av_log(avctx, AV_LOG_DEBUG, "type mismatch of non-pcm type in packet %d vs stream %d. Dropping.\n", data_type, s->non_pcm_data_type);
> +            return AVERROR_INVALIDDATA;
> +        }
> +
> +        if (s->non_pcm_mode == NON_PCM_COPY)
> +            return 0;
> +        else if (s->non_pcm_mode == NON_PCM_DROP)
> +            return AVERROR_INVALIDDATA;
> +
> +        skip_bits(&gb, 15);
> +
> +        size = get_bits(&gb, s->bits);
> +
> +        length_code = ((ff_reverse[(uint8_t)((size & 0xFF)          )] << 16) |
> +                       (ff_reverse[(uint8_t)((size & 0xFF00)   >> 8 )] << 8 ) |
> +                       (ff_reverse[(uint8_t)((size & 0xFF0000) >> 16)]      ) ) >> (24 - s->non_pcm_bits);
> +
> +        skip_bits(&gb, 4);
> +
> +        *offset = get_bits_count(&gb)/8;
> +        *length = length_code;
> +
> +        av_log(avctx, AV_LOG_TRACE, "located non-pcm packet at offset %d length code %d.\n", AES3_HEADER_LEN + *offset, length_code);
> +
> +        return data_type;
> +    }
> +
> +end:
> +    if (s->non_pcm_data_type == -1) {
> +        s->non_pcm_data_type = 32;  // indicate stream should be treated as LPCM
> +        return 0;
> +    } else
> +        return AVERROR_INVALIDDATA;
> +}
> +
> +static int s302m_setup_non_pcm_handling(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
> +{
> +    S302Context *s = avctx->priv_data;
> +    const AVCodec *codec;
> +    enum AVCodecID codec_id;
> +    AVDictionary *dec_opts = NULL;
> +    int ret;
> +
> +    if (s->non_pcm_mode > NON_PCM_DROP) {
> +        switch (s->non_pcm_data_type) {
> +        case 0x1C:
> +            codec_id = AV_CODEC_ID_DOLBY_E;
> +            break;
> +        default:
> +            avpriv_report_missing_feature(avctx, "decode of non-pcm data type %d", s->non_pcm_data_type);
> +            ret = AVERROR_PATCHWELCOME;
> +            goto fail;
> +        }
> +
> +        codec = avcodec_find_decoder(codec_id);
> +        if (!codec) {
> +            ret = AVERROR_DECODER_NOT_FOUND;
> +            goto fail;
> +        }
> +
> +        s->non_pcm_ctx = avcodec_alloc_context3(codec);
> +        if (!s->non_pcm_ctx) {
> +            ret = AVERROR(ENOMEM);
> +            goto fail;
> +        }
> +
> +        av_dict_copy(&dec_opts, s->non_pcm_opts, 0);
> +
> +        ret = avcodec_open2(s->non_pcm_ctx, codec, &dec_opts);
> +        av_dict_free(&dec_opts);
> +        if (ret < 0)
> +            goto fail;
> +
> +        s->packet = av_packet_alloc();
> +        if (!s->packet) {
> +            ret = AVERROR(ENOMEM);
> +            goto fail;
> +        }
> +
> +        s->frame = av_frame_alloc();
> +        if (!s->frame) {
> +            ret = AVERROR(ENOMEM);
> +            goto fail;
> +        }
> +    } else
> +        return 0;
> +
> +    s->non_pcm_dec = 1;
> +    return 0;
> +
> +fail:
> +    avcodec_free_context(&s->non_pcm_ctx);
> +    av_packet_free(&s->packet);
> +    av_frame_free(&s->frame);
> +
> +    if (s->non_pcm_mode == NON_PCM_DEC_ELSE_COPY)
> +        s->non_pcm_mode = NON_PCM_COPY;
> +    else if (s->non_pcm_mode == NON_PCM_DEC_ELSE_DROP)
> +        s->non_pcm_mode = NON_PCM_DROP;
> +
> +    return ret;
> +}
> +
> +static int s302m_get_non_pcm_pkt_size(AVCodecContext *avctx, int buf_size, int offset,
> +                                      int length_code, int *dec_pkt_size)
> +{
> +    S302Context *s = avctx->priv_data;
> +    int nb_words, word_size, aesframe_size, s302m_read_size;
> +
> +    if (offset < 0 || offset >= buf_size)
> +        return AVERROR_INVALIDDATA;
> +
> +    switch (s->non_pcm_data_type) {
> +    case 0x1C:
> +        goto dolby_e;
> +    default:
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +dolby_e:
> +    {
> +    nb_words = length_code / s->non_pcm_bits;
> +    nb_words += nb_words & 1;
> +
> +    word_size = s->non_pcm_bits + 7 >> 3;
> +    aesframe_size = (s->bits + 4) * 2 / 8;  // 2 subframes, each with payload + VUCF bits
> +
> +    *dec_pkt_size = nb_words * word_size;
> +    s302m_read_size = aesframe_size * nb_words/2;
> +
> +    if (offset + s302m_read_size > buf_size)
> +        return AVERROR_INVALIDDATA;
> +
> +    return s302m_read_size;
> +    }
> +}
> +
>  static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
>                                      int buf_size)
>  {
> +    S302Context *s = avctx->priv_data;
>      uint32_t h;
>      int frame_size, channels, bits;
>  
> @@ -66,33 +309,15 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
>          return AVERROR_INVALIDDATA;
>      }
>  
> -    /* Set output properties */
> -    avctx->bits_per_raw_sample = bits;
> -    if (bits > 16)
> -        avctx->sample_fmt = AV_SAMPLE_FMT_S32;
> -    else
> -        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
> -
> -    av_channel_layout_uninit(&avctx->ch_layout);
> -    switch(channels) {
> -        case 2:
> -            avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
> -            break;
> -        case 4:
> -            avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_QUAD;
> -            break;
> -        case 6:
> -            avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1_BACK;
> -            break;
> -        case 8:
> -            av_channel_layout_from_mask(&avctx->ch_layout,
> -                                        AV_CH_LAYOUT_5POINT1_BACK | AV_CH_LAYOUT_STEREO_DOWNMIX);
> -            break;
> -        default:
> -            avctx->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
> -            avctx->ch_layout.nb_channels = channels;
> -            break;
> -    }
> +    if (!s->channels)
> +        s->channels = channels;
> +    else if (s->channels != channels)
> +        return AVERROR_INVALIDDATA;
> +
> +    if (!s->bits)
> +        s->bits = bits;
> +    else if (s->bits != bits)
> +        return AVERROR_INVALIDDATA;
>  
>      return frame_size;
>  }
> @@ -103,119 +328,286 @@ static int s302m_decode_frame(AVCodecContext *avctx, AVFrame *frame,
>      S302Context *s = avctx->priv_data;
>      const uint8_t *buf = avpkt->data;
>      int buf_size       = avpkt->size;
> -    int block_size, ret, channels;
> -    int i;
> -    int non_pcm_data_type = -1;
> +    int block_size, ret, channels, frame_size;
> +    int non_pcm_offset = -1, non_pcm_length = 0;
> +    int dec_pkt_size = 0;
> +
> +    if (s->non_pcm_mode == NON_PCM_DROP && s->non_pcm_data_type != -1 && s->non_pcm_data_type != 32)
> +        return avpkt->size;
>  
> -    int frame_size = s302m_parse_frame_header(avctx, buf, buf_size);
> +    frame_size = s302m_parse_frame_header(avctx, buf, buf_size);
>      if (frame_size < 0)
>          return frame_size;
>  
>      buf_size -= AES3_HEADER_LEN;
>      buf      += AES3_HEADER_LEN;
>  
> -    /* get output buffer */
> -    block_size = (avctx->bits_per_raw_sample + 4) / 4;
> -    channels = avctx->ch_layout.nb_channels;
> -    frame->nb_samples = 2 * (buf_size / block_size) / channels;
> -    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> -        return ret;
> +    // set non-pcm status if not determined
> +    // else extract offset and length if non-pcm can be decoded
> +    if (s->non_pcm_data_type == -1 || s->non_pcm_dec) {
> +        ret = s302m_non_pcm_inspect(avctx, buf, buf_size, &non_pcm_offset, &non_pcm_length);
> +        if (ret >= 0 && s->non_pcm_data_type != 32 && !s->non_pcm_dec)
> +            ret = s302m_setup_non_pcm_handling(avctx, buf, buf_size);
> +        else if (ret < 0)
> +            return avpkt->size;
> +    }
> +
> +    if (s->non_pcm_data_type == -1)
> +        return AVERROR_INVALIDDATA;  // we should know data type in order to proceed with output
> +
> +    if (!s->non_pcm_dec && !s->avctx_props_set) {
> +        /* Set output properties */
> +        avctx->bits_per_raw_sample = s->non_pcm_bits ? s->non_pcm_bits : s->bits;
> +        if (avctx->bits_per_raw_sample > 16)
> +            avctx->sample_fmt = AV_SAMPLE_FMT_S32;
> +        else
> +            avctx->sample_fmt = AV_SAMPLE_FMT_S16;
>  
> -    avctx->bit_rate = 48000 * channels * (avctx->bits_per_raw_sample + 4) +
> -                      32 * 48000 / frame->nb_samples;
> -    buf_size = (frame->nb_samples * channels / 2) * block_size;
> +        av_channel_layout_uninit(&avctx->ch_layout);
> +        switch(s->channels) {
> +            case 2:
> +                avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
> +                break;
> +            case 4:
> +                avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_QUAD;
> +                break;
> +            case 6:
> +                avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1_BACK;
> +                break;
> +            case 8:
> +                av_channel_layout_from_mask(&avctx->ch_layout,
> +                                            AV_CH_LAYOUT_5POINT1_BACK | AV_CH_LAYOUT_STEREO_DOWNMIX);
> +                break;
> +            default:
> +                avctx->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
> +                avctx->ch_layout.nb_channels = channels;
> +                break;
> +        }
> +
> +        avctx->sample_rate = 48000;
> +        s->avctx_props_set = 1;
> +    }
> +
> +    if (s->non_pcm_dec) {
> +        buf_size = s302m_get_non_pcm_pkt_size(avctx, buf_size, non_pcm_offset, non_pcm_length, &dec_pkt_size);
> +        if (buf_size < 0)
> +            return AVERROR_INVALIDDATA;
> +        buf += non_pcm_offset;
> +
> +        if (dec_pkt_size > s->packet->size) {
> +            ret = av_grow_packet(s->packet, dec_pkt_size - s->packet->size);
> +            if (ret < 0)
> +                return ret;
> +        }
> +        ret = av_packet_make_writable(s->packet);
> +        if (ret < 0)
> +            return ret;
> +        memset(s->packet->data, 0, s->packet->size);
> +
> +        ret = av_packet_copy_props(s->packet, avpkt);
> +        if (ret < 0)
> +            return ret;
> +    } else {
> +        /* get output buffer */
> +        block_size = (s->bits + 4) / 4;
> +        channels = s->channels;
> +        frame->nb_samples = 2 * (buf_size / block_size) / channels;
> +        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> +            return ret;
> +
> +        buf_size = (frame->nb_samples * channels / 2) * block_size;
> +
> +        avctx->bit_rate = 48000 * s->channels * ((s->non_pcm_bits ? s->non_pcm_bits : s->bits) + 4) +
> +                          32 * 48000 / frame->nb_samples;
> +    }
> +
> +    if (s->bits == 24) {
> +        uint8_t  *p   = NULL;
> +        uint32_t *f32 = NULL;
> +
> +        if (s->non_pcm_dec)
> +            p = (uint8_t *)s->packet->data;
> +        else
> +            f32 = (uint32_t *)frame->data[0];
>  
> -    if (avctx->bits_per_raw_sample == 24) {
> -        uint32_t *o = (uint32_t *)frame->data[0];
>          for (; buf_size > 6; buf_size -= 7) {
> -            *o++ = ((unsigned)ff_reverse[buf[2]]        << 24) |
> -                   (ff_reverse[buf[1]]        << 16) |
> -                   (ff_reverse[buf[0]]        <<  8);
> -            *o++ = ((unsigned)ff_reverse[buf[6] & 0xf0] << 28) |
> -                   (ff_reverse[buf[5]]        << 20) |
> -                   (ff_reverse[buf[4]]        << 12) |
> -                   (ff_reverse[buf[3] & 0x0f] <<  4);
> +            uint8_t b[6];
> +
> +            b[0] = (ff_reverse[buf[2]       ]      ) ;
> +            b[1] = (ff_reverse[buf[1]       ]      ) ;
> +            b[2] = (ff_reverse[buf[0]       ]      ) ;
> +            b[3] = (ff_reverse[buf[6] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[5] & 0x0f] >>  4) ;
> +            b[4] = (ff_reverse[buf[5] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[4] & 0x0f] >>  4) ;
> +            b[5] = (ff_reverse[buf[4] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[3] & 0x0f] >>  4) ;
> +
> +            if (s->non_pcm_bits == 20) {
> +                b[2] &= 0xf0;
> +                b[5] &= 0xf0;
> +            }
> +
> +            if (s->non_pcm_dec)
> +                for (int i = 0; i < 6; i++)
> +                    *p++ = b[i];
> +            else {
> +                *f32++ = (b[0] << 24) |
> +                         (b[1] << 16) |
> +                         (b[2] <<  8) ;
> +                *f32++ = (b[3] << 24) |
> +                         (b[4] << 16) |
> +                         (b[5] <<  8) ;
> +            }
>              buf += 7;
>          }
> -        o = (uint32_t *)frame->data[0];
> -        if (channels == 2)
> -            for (i=0; i<frame->nb_samples * 2 - 6; i+=2) {
> -                if (o[i] || o[i+1] || o[i+2] || o[i+3])
> -                    break;
> -                if (o[i+4] == 0x96F87200U && o[i+5] == 0xA54E1F00) {
> -                    non_pcm_data_type = (o[i+6] >> 16) & 0x1F;
> -                    break;
> +    } else if (s->bits == 20) {
> +        uint8_t  *p   = NULL;
> +        uint16_t *f16 = NULL;
> +        uint32_t *f32 = NULL;
> +
> +        if (s->non_pcm_dec)
> +            p = (uint8_t *)s->packet->data;
> +        else if (s->non_pcm_bits == 16)
> +            f16 = (uint16_t *)frame->data[0];
> +        else
> +            f32 = (uint32_t *)frame->data[0];
> +
> +        for (; buf_size > 5; buf_size -= 6) {
> +            uint8_t b[6];
> +
> +            b[0] = (ff_reverse[buf[2] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[1] & 0x0f] >>  4) ;
> +            b[1] = (ff_reverse[buf[1] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[0] & 0x0f] >>  4) ;
> +            b[2] = (ff_reverse[buf[0] & 0xf0] <<  4) ;
> +            b[3] = (ff_reverse[buf[5] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[4] & 0x0f] >>  4) ;
> +            b[4] = (ff_reverse[buf[4] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[3] & 0x0f] >>  4) ;
> +            b[5] = (ff_reverse[buf[3] & 0xf0] <<  4) ;
> +
> +            if (s->non_pcm_dec) {
> +                for (int i = 0; i < 6; i++) {
> +                    if (s->non_pcm_bits == 16 && (i % 3 == 2))
> +                        continue;
> +                    *p++ = b[i];
>                  }
> +            } else if (s->non_pcm_bits == 16) {
> +                *f16++ = (b[0] << 8) |
> +                         (b[1]     ) ;
> +                *f16++ = (b[3] << 8) |
> +                         (b[4]     ) ;
> +            } else {
> +                *f32++ = (b[0] << 24) |
> +                         (b[1] << 16) |
> +                         (b[2] <<  8) ;
> +                *f32++ = (b[3] << 24) |
> +                         (b[4] << 16) |
> +                         (b[5] <<  8) ;
>              }
> -    } else if (avctx->bits_per_raw_sample == 20) {
> -        uint32_t *o = (uint32_t *)frame->data[0];
> -        for (; buf_size > 5; buf_size -= 6) {
> -            *o++ = ((unsigned)ff_reverse[buf[2] & 0xf0] << 28) |
> -                   (ff_reverse[buf[1]]        << 20) |
> -                   (ff_reverse[buf[0]]        << 12);
> -            *o++ = ((unsigned)ff_reverse[buf[5] & 0xf0] << 28) |
> -                   (ff_reverse[buf[4]]        << 20) |
> -                   (ff_reverse[buf[3]]        << 12);
>              buf += 6;
>          }
> -        o = (uint32_t *)frame->data[0];
> -        if (channels == 2)
> -            for (i=0; i<frame->nb_samples * 2 - 6; i+=2) {
> -                if (o[i] || o[i+1] || o[i+2] || o[i+3])
> -                    break;
> -                if (o[i+4] == 0x6F872000U && o[i+5] == 0x54E1F000) {
> -                    non_pcm_data_type = (o[i+6] >> 16) & 0x1F;
> -                    break;
> -                }
> -            }
>      } else {
> -        uint16_t *o = (uint16_t *)frame->data[0];
> +        uint8_t  *p   = NULL;
> +        uint16_t *f16 = NULL;
> +
> +        if (s->non_pcm_dec)
> +            p = (uint8_t *)s->packet->data;
> +        else
> +            f16 = (uint16_t *)frame->data[0];
> +
>          for (; buf_size > 4; buf_size -= 5) {
> -            *o++ = (ff_reverse[buf[1]]        <<  8) |
> -                    ff_reverse[buf[0]];
> -            *o++ = (ff_reverse[buf[4] & 0xf0] << 12) |
> -                   (ff_reverse[buf[3]]        <<  4) |
> -                   (ff_reverse[buf[2]]        >>  4);
> +            uint8_t b[4];
> +
> +            b[0] = (ff_reverse[buf[1]       ]      ) ;
> +            b[1] = (ff_reverse[buf[0]       ]      ) ;
> +            b[2] = (ff_reverse[buf[4] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[3] & 0x0f] >>  4) ;
> +            b[3] = (ff_reverse[buf[3] & 0xf0] <<  4) |
> +                   (ff_reverse[buf[2] & 0x0f] >>  4) ;
> +
> +            if (s->non_pcm_dec)
> +                for (int i = 0; i < 4; i++)
> +                    *p++ = b[i];
> +            else {
> +                *f16++ = (b[0] << 8) |
> +                         (b[1]     ) ;

AV_RB16(b)

> +                *f16++ = (b[2] << 8) |
> +                         (b[3]     ) ;
> +            }
>              buf += 5;
>          }
> -        o = (uint16_t *)frame->data[0];
> -        if (channels == 2)
> -            for (i=0; i<frame->nb_samples * 2 - 6; i+=2) {
> -                if (o[i] || o[i+1] || o[i+2] || o[i+3])
> -                    break;
> -                if (o[i+4] == 0xF872U && o[i+5] == 0x4E1F) {
> -                    non_pcm_data_type = (o[i+6] & 0x1F);
> -                    break;
> -                }
> -            }
>      }
>  
> -    if (non_pcm_data_type != -1) {
> -        if (s->non_pcm_mode == 3) {
> -            av_log(avctx, AV_LOG_ERROR,
> -                   "S302 non PCM mode with data type %d not supported\n",
> -                   non_pcm_data_type);
> -            return AVERROR_PATCHWELCOME;
> +    if (s->non_pcm_dec) {
> +        ret = avcodec_send_packet(s->non_pcm_ctx, s->packet);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "error %d submitting non-pcm packet with pts %"PRId64" for decoding\n", ret, s->packet->pts);
> +            return ret;
>          }
> -        if (s->non_pcm_mode & 1) {
> -            return avpkt->size;
> +        ret = avcodec_receive_frame(s->non_pcm_ctx, s->frame);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "error %d receiving non-pcm decoded frame for packet with pts %"PRId64"\n", ret, s->packet->pts);
> +            return ret;
>          }
> -    }
>  
> -    avctx->sample_rate = 48000;
> +        if (!s->avctx_props_set) {
> +            avctx->sample_fmt  = s->non_pcm_ctx->sample_fmt;
> +            avctx->sample_rate = s->non_pcm_ctx->sample_rate;
> +
> +            av_channel_layout_uninit(&avctx->ch_layout);
> +            ret = av_channel_layout_copy(&avctx->ch_layout, &s->non_pcm_ctx->ch_layout);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_ERROR, "error %d when copying channel layout from non-pcm decoder context to parent context.\n", ret);
> +                return ret;
> +            }
> +            s->avctx_props_set = 1;
> +        }
> +
> +        frame->nb_samples = s->frame->nb_samples;
> +        ret = ff_get_buffer(avctx, frame, 0);
> +        if (ret < 0)
> +            return ret;
> +
> +        for (int ch = 0; ch < s->frame->ch_layout.nb_channels; ch++)
> +            memcpy(frame->extended_data[ch], s->frame->extended_data[ch],
> +                   av_get_bytes_per_sample(s->non_pcm_ctx->sample_fmt) * s->frame->nb_samples);

Would you please explain to me why this extra frame s->frame exists at
all? (Is it just the assert due to the missing FrameDecodeData? If so,
then this should be changed instead.)

> +    }
>  
>      *got_frame_ptr = 1;
>  
>      return avpkt->size;
>  }
>  
> +static void s302m_flush(AVCodecContext *avctx)
> +{
> +    S302Context *s = avctx->priv_data;
> +
> +    if (s->non_pcm_dec && s->non_pcm_ctx)
> +        avcodec_flush_buffers(s->non_pcm_ctx);
> +}
> +
> +static av_cold int s302m_close(AVCodecContext *avctx)
> +{
> +    S302Context *s = avctx->priv_data;
> +
> +    avcodec_free_context(&s->non_pcm_ctx);
> +    av_packet_free(&s->packet);
> +    av_frame_free(&s->frame);
> +    av_dict_free(&s->non_pcm_opts);

non_pcm_opts is an av_opt-enabled field and is therefore freed generically.

> +
> +    return 0;
> +}
> +
>  #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_DECODING_PARAM
>  static const AVOption s302m_options[] = {
> -    {"non_pcm_mode", "Chooses what to do with NON-PCM", offsetof(S302Context, non_pcm_mode), AV_OPT_TYPE_INT, {.i64 = 3}, 0, 3, FLAGS, "non_pcm_mode"},
> -    {"copy"        , "Pass NON-PCM through unchanged"     , 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 3, FLAGS, "non_pcm_mode"},
> -    {"drop"        , "Drop NON-PCM"                       , 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 3, FLAGS, "non_pcm_mode"},
> -    {"decode_copy" , "Decode if possible else passthrough", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 3, FLAGS, "non_pcm_mode"},
> -    {"decode_drop" , "Decode if possible else drop"       , 0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 3, FLAGS, "non_pcm_mode"},
> +    {"non_pcm_mode", "Chooses what to do with NON-PCM", offsetof(S302Context, non_pcm_mode), AV_OPT_TYPE_INT, {.i64 = NON_PCM_DEC_ELSE_DROP}, NON_PCM_COPY, NON_PCM_DEC_ELSE_DROP, FLAGS, "non_pcm_mode"},
> +    {"copy"        , "Pass NON-PCM through unchanged"     , 0, AV_OPT_TYPE_CONST, {.i64 = NON_PCM_COPY},          0, 3, FLAGS, "non_pcm_mode"},
> +    {"drop"        , "Drop NON-PCM"                       , 0, AV_OPT_TYPE_CONST, {.i64 = NON_PCM_DROP},          0, 3, FLAGS, "non_pcm_mode"},
> +    {"decode_copy" , "Decode if possible else passthrough", 0, AV_OPT_TYPE_CONST, {.i64 = NON_PCM_DEC_ELSE_COPY}, 0, 3, FLAGS, "non_pcm_mode"},
> +    {"decode_drop" , "Decode if possible else drop"       , 0, AV_OPT_TYPE_CONST, {.i64 = NON_PCM_DEC_ELSE_DROP}, 0, 3, FLAGS, "non_pcm_mode"},
> +    {"non_pcm_options", "Set options for non-pcm decoder",  offsetof(S302Context, non_pcm_opts), AV_OPT_TYPE_DICT, {.str = NULL}, 0, 0, FLAGS},
>      {NULL}
>  };
>  
> @@ -231,6 +623,9 @@ const FFCodec ff_s302m_decoder = {
>      CODEC_LONG_NAME("SMPTE 302M"),
>      .p.type         = AVMEDIA_TYPE_AUDIO,
>      .p.id           = AV_CODEC_ID_S302M,
> +    .init           = s302m_init,
> +    .close          = s302m_close,
> +    .flush          = s302m_flush,
>      .p.priv_class   = &s302m_class,
>      .priv_data_size = sizeof(S302Context),
>      FF_CODEC_DECODE_CB(s302m_decode_frame),