[FFmpeg-devel] [PATCH 2/2] avformat/mxf: support MCA audio information

Thu Sep 23 01:02:41 EEST 2021

On Fri, 17 Sep 2021, Marc-Antoine Arnaud wrote:

> ---
> libavformat/mxf.h    |   1 +
> libavformat/mxfdec.c | 277 ++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 272 insertions(+), 6 deletions(-)

I guess the questionable part of this patch is the internal reordering of
audio channels. This might or might not be what the user expects.

Admittedly ffmpeg/libavformat is limited in handling custom audio channel 
orderings, maybe for now the best approach would be to add an option 
which can be used to enable channel reordering, but not do it by default? 
Or maybe do?

Can you split this to two parts? One part is stuff without the 
channel ordering, and the second is the channel reordering?

>
> diff --git a/libavformat/mxf.h b/libavformat/mxf.h
> index fe9c52732c..cddbcb13c9 100644
> --- a/libavformat/mxf.h
> +++ b/libavformat/mxf.h
> @@ -50,6 +50,7 @@ enum MXFMetadataSetType {
>     TaggedValue,
>     TapeDescriptor,
>     AVCSubDescriptor,
> +    MCASubDescriptor,
> };
>
> enum MXFFrameLayout {
> diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
> index c28549f6a9..27893cda76 100644
> --- a/libavformat/mxfdec.c
> +++ b/libavformat/mxfdec.c
> @@ -45,17 +45,21 @@
>  */
>
> #include <inttypes.h>
> +#include <stdbool.h>

ffmpeg tend to not use this, so stick to simple int with 1/0.

>
> #include "libavutil/aes.h"
> #include "libavutil/avstring.h"
> #include "libavutil/mastering_display_metadata.h"
> #include "libavutil/mathematics.h"
> #include "libavcodec/bytestream.h"
> +#include "libavcodec/internal.h"
> +#include "libavutil/channel_layout.h"
> #include "libavutil/intreadwrite.h"
> #include "libavutil/parseutils.h"
> #include "libavutil/timecode.h"
> #include "libavutil/opt.h"
> #include "avformat.h"
> +#include "avlanguage.h"
> #include "internal.h"
> #include "mxf.h"
>
> @@ -177,6 +181,8 @@ typedef struct {
>     int body_sid;
>     MXFWrappingScheme wrapping;
>     int edit_units_per_packet; /* how many edit units to read at a time (PCM, ClipWrapped) */
> +    bool require_reordering;
> +    int channel_ordering[FF_SANE_NB_CHANNELS];
> } MXFTrack;
>
> typedef struct MXFDescriptor {
> @@ -205,6 +211,8 @@ typedef struct MXFDescriptor {
>     unsigned int vert_subsampling;
>     UID *file_descriptors_refs;
>     int file_descriptors_count;
> +    UID *sub_descriptors_refs;
> +    int sub_descriptors_count;
>     int linked_track_id;
>     uint8_t *extradata;
>     int extradata_size;
> @@ -217,6 +225,15 @@ typedef struct MXFDescriptor {
>     size_t coll_size;
> } MXFDescriptor;
>
> +typedef struct MXFMCASubDescriptor {
> +    MXFMetadataSet meta;
> +    UID uid;
> +    UID mca_link_id;
> +    UID mca_group_link_id;
> +    UID mca_label_dictionnary_id;
> +    char *language;
> +} MXFMCASubDescriptor;
> +
> typedef struct MXFIndexTableSegment {
>     MXFMetadataSet meta;
>     int edit_unit_byte_count;
> @@ -311,6 +328,9 @@ static const uint8_t mxf_system_item_key_cp[]              = { 0x06,0x0e,0x2b,0x
> static const uint8_t mxf_system_item_key_gc[]              = { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x03,0x01,0x14 };
> static const uint8_t mxf_klv_key[]                         = { 0x06,0x0e,0x2b,0x34 };
> static const uint8_t mxf_apple_coll_prefix[]               = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x0e,0x20,0x04,0x01,0x05,0x03,0x01 };
> +static const uint8_t mxf_audio_channel[]                   = { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01 };
> +static const uint8_t mxf_soundfield_group[]                = { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02 };
> +
> /* complete keys to match */
> static const uint8_t mxf_crypto_source_container_ul[]      = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x09,0x06,0x01,0x01,0x02,0x02,0x00,0x00,0x00 };
> static const uint8_t mxf_encrypted_triplet_key[]           = { 0x06,0x0e,0x2b,0x34,0x02,0x04,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x7e,0x01,0x00 };
> @@ -323,6 +343,15 @@ static const uint8_t mxf_indirect_value_utf16be[]          = { 0x42,0x01,0x10,0x
> static const uint8_t mxf_apple_coll_max_cll[]              = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x0e,0x20,0x04,0x01,0x05,0x03,0x01,0x01 };
> static const uint8_t mxf_apple_coll_max_fall[]             = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x0e,0x20,0x04,0x01,0x05,0x03,0x01,0x02 };
>
> +static const uint8_t mxf_mca_label_dictionnary_id[]        = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x01,0x00,0x00,0x00 };

dictionary

> +static const uint8_t mxf_mca_tag_symbol[]                  = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x02,0x00,0x00,0x00 };
> +static const uint8_t mxf_mca_tag_name[]                    = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x03,0x00,0x00,0x00 };
> +static const uint8_t mxf_mca_link_id[]                     = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x05,0x00,0x00,0x00 };
> +static const uint8_t mxf_soundfield_group_link_id[]        = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x06,0x00,0x00,0x00 };
> +static const uint8_t mxf_mca_rfc5646_spoken_language[]     = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0d,0x03,0x01,0x01,0x02,0x03,0x15,0x00,0x00 };
> +
> +static const uint8_t mxf_sub_descriptor[]                  = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x09,0x06,0x01,0x01,0x04,0x06,0x10,0x00,0x00 };
> +
> static const uint8_t mxf_mastering_display_prefix[13]      = { FF_MXF_MasteringDisplay_PREFIX };
> static const uint8_t mxf_mastering_display_uls[4][16] = {
>     FF_MXF_MasteringDisplayPrimaries,
> @@ -343,6 +372,11 @@ static void mxf_free_metadataset(MXFMetadataSet **ctx, int freectx)
>         av_freep(&((MXFDescriptor *)*ctx)->mastering);
>         av_freep(&((MXFDescriptor *)*ctx)->coll);
>         av_freep(&((MXFDescriptor *)*ctx)->file_descriptors_refs);
> +        av_freep(&((MXFDescriptor *)*ctx)->sub_descriptors_refs);
> +        break;
> +    case MCASubDescriptor:
> +        if (((MXFMCASubDescriptor *)*ctx)->language)
> +            av_freep(&((MXFMCASubDescriptor *)*ctx)->language);
>         break;
>     case Sequence:
>         av_freep(&((MXFSequence *)*ctx)->structural_components_refs);
> @@ -899,6 +933,30 @@ static int mxf_read_strong_ref_array(AVIOContext *pb, UID **refs, int *count)
>     return 0;
> }
>
> +static inline int mxf_read_us_ascii_string(AVIOContext *pb, int size, char** str)
> +{
> +    int ret;
> +    size_t buf_size;
> +
> +    if (size < 0)

if (size < 0 || size > INT_MAX - 1)

> +        return AVERROR(EINVAL);
> +
> +    buf_size = size + 1;
> +    av_free(*str);
> +    *str = av_malloc(buf_size);
> +    if (!*str)
> +        return AVERROR(ENOMEM);
> +
> +    ret = avio_get_str(pb, size, *str, buf_size);
> +
> +    if (ret < 0) {
> +        av_freep(str);
> +        return ret;
> +    }
> +
> +    return ret;
> +}
> +
> static inline int mxf_read_utf16_string(AVIOContext *pb, int size, char** str, int be)
> {
>     int ret;
> @@ -1353,11 +1411,43 @@ static int mxf_read_generic_descriptor(void *arg, AVIOContext *pb, int tag, int
>                 descriptor->coll->MaxFALL = avio_rb16(pb);
>             }
>         }
> +
> +        if (IS_KLV_KEY(uid, mxf_sub_descriptor)) {
> +            mxf_read_strong_ref_array(pb, &descriptor->sub_descriptors_refs, &descriptor->sub_descriptors_count);
> +            break;

Uneeded break.

> +        }
>         break;
>     }
>     return 0;
> }
>
> +static int mxf_read_mca_sub_descriptor(void *arg, AVIOContext *pb, int tag, int size, UID uid, int64_t klv_offset)
> +{
> +    MXFMCASubDescriptor *mca_sub_descriptor = arg;
> +
> +    if (IS_KLV_KEY(uid, mxf_mca_label_dictionnary_id)) {
> +        avio_read(pb, mca_sub_descriptor->mca_label_dictionnary_id, 16);
> +    }
> +    if (IS_KLV_KEY(uid, mxf_mca_link_id)) {
> +        avio_read(pb, mca_sub_descriptor->mca_link_id, 16);
> +    }
> +    if (IS_KLV_KEY(uid, mxf_soundfield_group_link_id)) {
> +        avio_read(pb, mca_sub_descriptor->mca_group_link_id, 16);
> +    }

You don't have to open braces for single-line blocks, ffmpeg tends to 
follow this style, so preferably you should too.

> +
> +    if (IS_KLV_KEY(uid, mxf_mca_rfc5646_spoken_language)) {
> +        char *str = NULL;
> +        int ret = 0;
> +
> +        if ((ret = mxf_read_us_ascii_string(pb, size, &str)) < 0) \
> +            return ret;
> +
> +        mca_sub_descriptor->language = str;

Simply

if (IS_KLV_KEY(uid, mxf_mca_rfc5646_spoken_language))
    return mxf_read_us_ascii_string(pb, size, &mca_sub_descriptor->language);

> +    }
> +
> +    return 0;
> +}
> +
> static int mxf_read_indirect_value(void *arg, AVIOContext *pb, int size)
> {
>     MXFTaggedValue *tagged_value = arg;
> @@ -1487,6 +1577,56 @@ static const MXFCodecUL mxf_data_essence_container_uls[] = {
>     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0, AV_CODEC_ID_NONE },
> };
>
> +typedef struct MXFSoundfieldGroupUL {
> +    UID uid;
> +    int64_t id;
> +} MXFSoundfieldGroupUL;
> +
> +static const MXFSoundfieldGroupUL mxf_soundfield_groups[] = {
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x01,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_5POINT1 }, // 5.1
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x02,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_7POINT1 }, // 7.1 DS
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x03,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_7POINT1_WIDE }, // 7.1 SDS
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x04,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_6POINT1 }, // 6.1
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x05,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_MONO }, // 1.0 Monoral
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x01,0x00,0x00,0x00 }, AV_CH_LAYOUT_STEREO }, // Standard stereo
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x02,0x00,0x00,0x00 }, AV_CH_LAYOUT_STEREO }, // Dual mono
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x04,0x00,0x00,0x00 }, AV_CH_LAYOUT_SURROUND }, // 3.0
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x05,0x00,0x00,0x00 }, AV_CH_LAYOUT_4POINT0 }, // 4.0
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x06,0x00,0x00,0x00 }, AV_CH_LAYOUT_5POINT0 }, // 5.0
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x07,0x00,0x00,0x00 }, AV_CH_LAYOUT_6POINT0 }, // 6.0
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x08,0x00,0x00,0x00 }, AV_CH_LAYOUT_7POINT0 }, // 7.0
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x09,0x00,0x00,0x00 }, AV_CH_LAYOUT_STEREO }, // LrRt
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x0a,0x00,0x00,0x00 }, AV_CH_LAYOUT_5POINT1 }, // 5.1 EX
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x0b,0x00,0x00,0x00 }, AV_CH_LAYOUT_MONO }, // Hearing accessibility
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x0c,0x00,0x00,0x00 }, AV_CH_LAYOUT_MONO }, // Visual accessibility
> +    { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0 },
> +};
> +
> +typedef struct MXFChannelOrderingUL {
> +    UID uid;
> +    int index;
> +    enum AVAudioServiceType service_type;
> +} MXFChannelOrderingUL;
> +
> +static const MXFChannelOrderingUL mxf_channel_ordering[] = {
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x01,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_NB }, // Left audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x02,0x00,0x00,0x00,0x00 }, 1, AV_AUDIO_SERVICE_TYPE_NB }, // Right audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x03,0x00,0x00,0x00,0x00 }, 2, AV_AUDIO_SERVICE_TYPE_NB }, // Center audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x04,0x00,0x00,0x00,0x00 }, 5, AV_AUDIO_SERVICE_TYPE_NB }, // LFE audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x05,0x00,0x00,0x00,0x00 }, 3, AV_AUDIO_SERVICE_TYPE_NB }, // Left surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x06,0x00,0x00,0x00,0x00 }, 4, AV_AUDIO_SERVICE_TYPE_NB }, // Right surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x07,0x00,0x00,0x00,0x00 }, 3, AV_AUDIO_SERVICE_TYPE_NB }, // Left side surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x08,0x00,0x00,0x00,0x00 }, 4, AV_AUDIO_SERVICE_TYPE_NB }, // Right side surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x09,0x00,0x00,0x00,0x00 }, 6, AV_AUDIO_SERVICE_TYPE_NB }, // Left rear surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0a,0x00,0x00,0x00,0x00 }, 7, AV_AUDIO_SERVICE_TYPE_NB }, // Right rear surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0b,0x00,0x00,0x00,0x00 }, 6, AV_AUDIO_SERVICE_TYPE_NB }, // Left center audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0c,0x00,0x00,0x00,0x00 }, 7, AV_AUDIO_SERVICE_TYPE_NB }, // Right center audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0d,0x00,0x00,0x00,0x00 }, 6, AV_AUDIO_SERVICE_TYPE_NB }, // Center surround audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0e,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED }, // Hearing impaired audio channel
> +    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0f,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED }, // Visually impaired narrative audio channel
> +    { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_NB },
> +};
> +
> static MXFWrappingScheme mxf_get_wrapping_kind(UID *essence_container_ul)
> {
>     int val;
> @@ -2287,6 +2427,25 @@ static enum AVColorRange mxf_get_color_range(MXFContext *mxf, MXFDescriptor *des
>     return AVCOL_RANGE_UNSPECIFIED;
> }
>
> +static int is_pcm(enum AVCodecID codec_id)
> +{
> +    /* we only care about "normal" PCM codecs until we get samples */
> +    return codec_id >= AV_CODEC_ID_PCM_S16LE && codec_id < AV_CODEC_ID_PCM_S24DAUD;
> +}
> +
> +static void set_language(AVFormatContext *s, const char *rfc5646, AVDictionary **met)
> +{
> +    // language abbr should contain at least 2 chars
> +    if (rfc5646 && strlen(rfc5646) > 1) {
> +        const char primary_tag[3] = { rfc5646[0], rfc5646[1], '\0' }; // ignore country code if any

RFC5646 allows 3 char codes as well, no? So maybe this would be better:

char primary_tag[4] =
   {rfc5646[0], rfc5646[1], rfc5646[2] != '-' ? rfc5646[2] : '\0', '\0'}

> +        const char *iso6392       = ff_convert_lang_to(primary_tag,
> +                                                       AV_LANG_ISO639_2_BIBL);
> +        if (iso6392)
> +            if (av_dict_set(met, "language", iso6392, 0) < 0)
> +                av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");

return av_dict_set().

In general you don't want to log av_dict_set failure, because it means 
ENOMEM. Preferably it should be propagated back...

> +    }
> +}
> +
> static int mxf_parse_structural_metadata(MXFContext *mxf)
> {
>     MXFPackage *material_package = NULL;
> @@ -2322,7 +2481,9 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
>         const MXFCodecUL *pix_fmt_ul = NULL;
>         AVStream *st;
>         AVTimecode tc;
> +        enum AVAudioServiceType *ast;
>         int flags;
> +        int current_channel;
>
>         if (!(material_track = mxf_resolve_strong_ref(mxf, &material_package->tracks_refs[i], Track))) {
>             av_log(mxf->fc, AV_LOG_ERROR, "could not resolve material track strong ref\n");
> @@ -2681,6 +2842,87 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
>                 st->internal->need_parsing = AVSTREAM_PARSE_FULL;
>             }
>             st->codecpar->bits_per_coded_sample = av_get_bits_per_sample(st->codecpar->codec_id);
> +
> +            current_channel = 0;
> +
> +            if (descriptor->channels >= FF_SANE_NB_CHANNELS) {
> +                av_log(mxf->fc, AV_LOG_ERROR, "max number of channels %s reached\n", FF_SANE_NB_CHANNELS);

%s?

> +                return AVERROR_INVALIDDATA;
> +            }
> +
> +            for (j = 0; j < descriptor->channels; ++j) {
> +                source_track->channel_ordering[j] = j;
> +            }
> +
> +            for (j = 0; j < descriptor->sub_descriptors_count; j++) {
> +                MXFMCASubDescriptor *mca_sub_descriptor = mxf_resolve_strong_ref(mxf, &descriptor->sub_descriptors_refs[j], MCASubDescriptor);
> +                if (mca_sub_descriptor == NULL) {
> +                    continue;
> +                }
> +
> +                // Soundfield group
> +                if (IS_KLV_KEY(mca_sub_descriptor->mca_label_dictionnary_id, mxf_soundfield_group)) {
> +                    MXFSoundfieldGroupUL* group_ptr = (MXFSoundfieldGroupUL*)&mxf_soundfield_groups[0];
> +
> +                    while (group_ptr->uid[0]) {
> +                        if (IS_KLV_KEY(group_ptr->uid, mca_sub_descriptor->mca_label_dictionnary_id)) {
> +                            st->codecpar->channel_layout = group_ptr->id;
> +                            break;
> +                        }
> +                        group_ptr++;
> +                    }
> +                }
> +
> +                // Audio channel
> +                if (IS_KLV_KEY(mca_sub_descriptor->mca_label_dictionnary_id, mxf_audio_channel)) {
> +                    MXFChannelOrderingUL* channel_ordering_ptr = (MXFChannelOrderingUL*)&mxf_channel_ordering[0];
> +
> +                    while (channel_ordering_ptr->uid[0]) {
> +                        if (IS_KLV_KEY(channel_ordering_ptr->uid, mca_sub_descriptor->mca_label_dictionnary_id)) {
> +                            source_track->channel_ordering[current_channel] = channel_ordering_ptr->index;
> +
> +                            if(channel_ordering_ptr->service_type != AV_AUDIO_SERVICE_TYPE_NB) {
> +                                ast = (enum AVAudioServiceType*)av_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE, sizeof(*ast));
> +                                *ast = channel_ordering_ptr->service_type;
> +                            }
> +
> +                            current_channel += 1;
> +                            break;
> +                        }
> +                        channel_ordering_ptr++;
> +                    }
> +                }
> +
> +                // set language from MCA spoken language information
> +                if (mca_sub_descriptor->language) {
> +                    set_language(mxf->fc, mca_sub_descriptor->language, &st->metadata);
> +                }
> +            }
> +
> +            // check if the mapping is not required
> +            source_track->require_reordering = false;
> +            for (j = 0; j < descriptor->channels; ++j) {
> +                if (source_track->channel_ordering[j] != j) {
> +                    source_track->require_reordering = true;
> +                    break;
> +                }
> +            }
> +
> +            if (source_track->require_reordering && is_pcm(st->codecpar->codec_id)) {
> +                current_channel = 0;
> +                av_log(mxf->fc, AV_LOG_DEBUG, "MCA Audio mapping (");
> +                for(j = 0; j < descriptor->channels; ++j) {
> +                    for(int k = 0; k < descriptor->channels; ++k) {
> +                        if(source_track->channel_ordering[k] == current_channel) {
> +                            av_log(mxf->fc, AV_LOG_DEBUG, "%d -> %d", source_track->channel_ordering[k], k);
> +                            if (current_channel != descriptor->channels - 1)
> +                                av_log(mxf->fc, AV_LOG_DEBUG, ", ");
> +                            current_channel += 1;
> +                        }
> +                    }
> +                }
> +                av_log(mxf->fc, AV_LOG_DEBUG, ")\n");
> +            }
>         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
>             enum AVMediaType type;
>             container_ul = mxf_get_codec_ul(mxf_data_essence_container_uls, essence_container_ul);
> @@ -2881,6 +3123,8 @@ static const MXFMetadataReadTableEntry mxf_metadata_read_table[] = {
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5c,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* VANC/VBI - SMPTE 436M */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5e,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG2AudioDescriptor */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x64,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* DC Timed Text Descriptor */
> +    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x6c,0x00 }, mxf_read_mca_sub_descriptor, sizeof(MXFMCASubDescriptor), MCASubDescriptor }, /* Soundfield Group Label Subdescriptor */
> +    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x6b,0x00 }, mxf_read_mca_sub_descriptor, sizeof(MXFMCASubDescriptor), MCASubDescriptor }, /* Audio Channel Label Subdescriptor */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3A,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Static Track */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3B,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Generic Track */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x14,0x00 }, mxf_read_timecode_component, sizeof(MXFTimecodeComponent), TimecodeComponent },
> @@ -3180,12 +3424,6 @@ static void mxf_compute_essence_containers(AVFormatContext *s)
>     }
> }
>
> -static int is_pcm(enum AVCodecID codec_id)
> -{
> -    /* we only care about "normal" PCM codecs until we get samples */
> -    return codec_id >= AV_CODEC_ID_PCM_S16LE && codec_id < AV_CODEC_ID_PCM_S24DAUD;
> -}
> -
> static MXFIndexTable *mxf_find_index_table(MXFContext *mxf, int index_sid)
> {
>     int i;
> @@ -3612,6 +3850,27 @@ static int mxf_set_pts(MXFContext *mxf, AVStream *st, AVPacket *pkt)
>     return 0;
> }
>
> +static void mxf_audio_remapping(int* channel_ordering, uint8_t* data, int size, int sample_size, int channels)
> +{
> +    int sample_offset = channels * sample_size;
> +    int number_of_samples = size / sample_offset;
> +    uint8_t* tmp = av_malloc(sample_offset);

unchecked allocation.

> +    uint8_t* data_ptr = data;
> +
> +    for (int sample = 0; sample < number_of_samples; ++sample) {
> +        memcpy(tmp, data_ptr, sample_offset);
> +
> +        for (int channel = 0; channel < channels; ++channel) {
> +            for (int sample_index = 0; sample_index < sample_size; ++sample_index) {
> +                data_ptr[sample_size * channel_ordering[channel] + sample_index] = tmp[sample_size * channel + sample_index];
> +            }
> +        }
> +
> +        data_ptr += sample_offset;
> +    }
> +    av_free(tmp);
> +}
> +
> static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
> {
>     KLVPacket klv;
> @@ -3726,6 +3985,12 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
>                 return ret;
>             }
>
> +            // for audio, process audio remapping if MCA label requires it
> +            if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && track->require_reordering) {
> +                int byte_per_sample = st->codecpar->bits_per_coded_sample / 8;
> +                mxf_audio_remapping(track->channel_ordering, pkt->data, pkt->size, byte_per_sample, st->codecpar->channels);
> +            }
> +
>             /* seek for truncated packets */
>             avio_seek(s->pb, klv.next_klv, SEEK_SET);

Regards,
Marton