[FFmpeg-devel] [PATCH v2] avformat: Add support for embedding cover art in Ogg files
Zsolt Vadász
zsolt_vadasz at protonmail.com
Sun Jan 15 21:49:10 EET 2023
Signed-off-by: Zsolt Vadasz <zsolt_vadasz at protonmail.com>
---
libavformat/flac_picture.c | 132 +++++++++++++++++++++++
libavformat/flac_picture.h | 5 +
libavformat/flacenc.c | 90 +---------------
libavformat/oggenc.c | 207 ++++++++++++++++++++++++++++++-------
4 files changed, 308 insertions(+), 126 deletions(-)
diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c
index b33fee75b4..30152a2ba9 100644
--- a/libavformat/flac_picture.c
+++ b/libavformat/flac_picture.c
@@ -20,6 +20,9 @@
*/
#include "libavutil/intreadwrite.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/pixdesc.h"
#include "libavcodec/bytestream.h"
#include "libavcodec/png.h"
#include "avformat.h"
@@ -188,3 +191,132 @@ fail:
return ret;
}
+
+int ff_flac_write_picture(struct AVFormatContext *s,
+ int isogg,
+ unsigned *attached_types,
+ int audio_stream_idx, // unused if !isogg
+ AVPacket *pkt)
+{
+ AVIOContext *pb = s->pb;
+ const AVPixFmtDescriptor *pixdesc;
+ const CodecMime *mime = ff_id3v2_mime_tags;
+ AVDictionaryEntry *e;
+ const char *mimetype = NULL, *desc = "";
+ const AVStream *st = s->streams[pkt->stream_index];
+ int i, mimelen, desclen, type = 0, blocklen;
+
+ if (!pkt->data)
+ return 0;
+
+ while (mime->id != AV_CODEC_ID_NONE) {
+ if (mime->id == st->codecpar->codec_id) {
+ mimetype = mime->str;
+ break;
+ }
+ mime++;
+ }
+ if (!mimetype) {
+ av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
+ "write an attached picture.\n", st->index);
+ return AVERROR(EINVAL);
+ }
+ mimelen = strlen(mimetype);
+
+ /* get the picture type */
+ e = av_dict_get(st->metadata, "comment", NULL, 0);
+ for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
+ if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
+ type = i;
+ break;
+ }
+ }
+
+ if (((*attached_types) & (1 << type)) & 0x6) {
+ av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
+ return AVERROR(EINVAL);
+ }
+
+ if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
+ st->codecpar->width != 32 ||
+ st->codecpar->height != 32)) {
+ av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
+ return AVERROR(EINVAL);
+ }
+
+ *attached_types |= (1 << type);
+
+ /* get the description */
+ if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
+ desc = e->value;
+ desclen = strlen(desc);
+
+ blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
+ if (blocklen >= 1<<24) {
+ av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
+ return AVERROR(EINVAL);
+ }
+
+ if(!isogg) {
+ avio_w8(pb, 0x06);
+ avio_wb24(pb, blocklen);
+
+ avio_wb32(pb, type);
+
+ avio_wb32(pb, mimelen);
+ avio_write(pb, mimetype, mimelen);
+
+ avio_wb32(pb, desclen);
+ avio_write(pb, desc, desclen);
+
+ avio_wb32(pb, st->codecpar->width);
+ avio_wb32(pb, st->codecpar->height);
+ if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+ avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
+ else
+ avio_wb32(pb, 0);
+ avio_wb32(pb, 0);
+
+ avio_wb32(pb, pkt->size);
+ avio_write(pb, pkt->data, pkt->size);
+ } else {
+ uint8_t *metadata_block_picture, *ptr;
+ int encoded_len, ret;
+ char *encoded;
+ AVStream *audio_stream = s->streams[audio_stream_idx];
+
+ metadata_block_picture = av_mallocz(blocklen);
+ ptr = metadata_block_picture;
+ bytestream_put_be32(&ptr, type);
+
+ bytestream_put_be32(&ptr, mimelen);
+ bytestream_put_buffer(&ptr, mimetype, mimelen);
+
+ bytestream_put_be32(&ptr, desclen);
+ bytestream_put_buffer(&ptr, desc, desclen);
+
+ bytestream_put_be32(&ptr, st->codecpar->width);
+ bytestream_put_be32(&ptr, st->codecpar->height);
+ if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+ bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc));
+ else
+ bytestream_put_be32(&ptr, 0);
+ bytestream_put_be32(&ptr, 0);
+
+ bytestream_put_be32(&ptr, pkt->size);
+ bytestream_put_buffer(&ptr, pkt->data, pkt->size);
+
+ encoded_len = AV_BASE64_SIZE(blocklen);
+ encoded = av_mallocz(encoded_len);
+ av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen);
+ av_free(metadata_block_picture);
+
+ ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0);
+ av_free(encoded);
+ av_packet_unref(pkt);
+
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
diff --git a/libavformat/flac_picture.h b/libavformat/flac_picture.h
index db074e531d..efa11aee32 100644
--- a/libavformat/flac_picture.h
+++ b/libavformat/flac_picture.h
@@ -39,5 +39,10 @@
*/
int ff_flac_parse_picture(AVFormatContext *s, uint8_t **buf, int buf_size,
int truncate_workaround);
+int ff_flac_write_picture(struct AVFormatContext *s,
+ int isogg,
+ unsigned *attached_types,
+ int audio_stream_idx,
+ AVPacket *pkt);
#endif /* AVFORMAT_FLAC_PICTURE_H */
diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index d7930f4a6e..ec26113bb2 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c
@@ -32,6 +32,7 @@
#include "internal.h"
#include "version.h"
#include "vorbiscomment.h"
+#include "flac_picture.h"
typedef struct FlacMuxerContext {
@@ -78,94 +79,9 @@ static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
return 0;
}
-static int flac_write_picture(struct AVFormatContext *s, AVPacket *pkt)
-{
- FlacMuxerContext *c = s->priv_data;
- AVIOContext *pb = s->pb;
- const AVPixFmtDescriptor *pixdesc;
- const CodecMime *mime = ff_id3v2_mime_tags;
- AVDictionaryEntry *e;
- const char *mimetype = NULL, *desc = "";
- const AVStream *st = s->streams[pkt->stream_index];
- int i, mimelen, desclen, type = 0, blocklen;
-
- if (!pkt->data)
- return 0;
-
- while (mime->id != AV_CODEC_ID_NONE) {
- if (mime->id == st->codecpar->codec_id) {
- mimetype = mime->str;
- break;
- }
- mime++;
- }
- if (!mimetype) {
- av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
- "write an attached picture.\n", st->index);
- return AVERROR(EINVAL);
- }
- mimelen = strlen(mimetype);
-
- /* get the picture type */
- e = av_dict_get(st->metadata, "comment", NULL, 0);
- for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
- if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
- type = i;
- break;
- }
- }
-
- if ((c->attached_types & (1 << type)) & 0x6) {
- av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
- return AVERROR(EINVAL);
- }
-
- if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
- st->codecpar->width != 32 ||
- st->codecpar->height != 32)) {
- av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
- return AVERROR(EINVAL);
- }
-
- c->attached_types |= (1 << type);
-
- /* get the description */
- if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
- desc = e->value;
- desclen = strlen(desc);
-
- blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
- if (blocklen >= 1<<24) {
- av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
- return AVERROR(EINVAL);
- }
-
- avio_w8(pb, 0x06);
- avio_wb24(pb, blocklen);
-
- avio_wb32(pb, type);
-
- avio_wb32(pb, mimelen);
- avio_write(pb, mimetype, mimelen);
-
- avio_wb32(pb, desclen);
- avio_write(pb, desc, desclen);
-
- avio_wb32(pb, st->codecpar->width);
- avio_wb32(pb, st->codecpar->height);
- if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
- avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
- else
- avio_wb32(pb, 0);
- avio_wb32(pb, 0);
-
- avio_wb32(pb, pkt->size);
- avio_write(pb, pkt->data, pkt->size);
- return 0;
-}
-
static int flac_finish_header(struct AVFormatContext *s)
{
+ FlacMuxerContext *c = s->priv_data;
int i, ret, padding = s->metadata_header_padding;
if (padding < 0)
padding = 8192;
@@ -178,7 +94,7 @@ static int flac_finish_header(struct AVFormatContext *s)
AVPacket *pkt = st->priv_data;
if (!pkt)
continue;
- ret = flac_write_picture(s, pkt);
+ ret = ff_flac_write_picture(s, 0, &c->attached_types, -1, pkt);
av_packet_unref(pkt);
if (ret < 0 && (s->error_recognition & AV_EF_EXPLODE))
return ret;
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 5003314adb..c604e493f0 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -23,18 +23,27 @@
#include <stdint.h>
+#include "libavcodec/codec_id.h"
+#include "libavutil/avutil.h"
#include "libavutil/crc.h"
+#include "libavutil/log.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
#include "libavutil/random_seed.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/bswap.h"
#include "libavcodec/xiph.h"
#include "libavcodec/bytestream.h"
#include "libavcodec/flac.h"
#include "avformat.h"
+#include "id3v2.h"
#include "avio_internal.h"
#include "internal.h"
#include "version.h"
#include "vorbiscomment.h"
+#include "flac_picture.h"
#define MAX_PAGE_SIZE 65025
@@ -77,6 +86,11 @@ typedef struct OGGContext {
int pref_size; ///< preferred page size (0 => fill all segments)
int64_t pref_duration; ///< preferred page duration (0 => fill all segments)
int serial_offset;
+
+ PacketList queue;
+ int audio_stream_idx;
+ int waiting_pics;
+ unsigned attached_types;
} OGGContext;
#define OFFSET(x) offsetof(OGGContext, x)
@@ -468,12 +482,14 @@ static void ogg_write_pages(AVFormatContext *s, int flush)
ogg->page_list = p;
}
-static int ogg_init(AVFormatContext *s)
+static int ogg_finish_init(AVFormatContext *s)
{
OGGContext *ogg = s->priv_data;
OGGStreamContext *oggstream = NULL;
int i, j;
+ ogg->waiting_pics = 0;
+
if (ogg->pref_size)
av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
@@ -481,29 +497,6 @@ static int ogg_init(AVFormatContext *s)
AVStream *st = s->streams[i];
unsigned serial_num = i + ogg->serial_offset;
- if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
- if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
- /* Opus requires a fixed 48kHz clock */
- avpriv_set_pts_info(st, 64, 1, 48000);
- else
- avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
- }
-
- if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
- st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
- st->codecpar->codec_id != AV_CODEC_ID_SPEEX &&
- st->codecpar->codec_id != AV_CODEC_ID_FLAC &&
- st->codecpar->codec_id != AV_CODEC_ID_OPUS &&
- st->codecpar->codec_id != AV_CODEC_ID_VP8) {
- av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
- return AVERROR(EINVAL);
- }
-
- if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
- st->codecpar->codec_id != AV_CODEC_ID_VP8) {
- av_log(s, AV_LOG_ERROR, "No extradata present\n");
- return AVERROR_INVALIDDATA;
- }
oggstream = av_mallocz(sizeof(*oggstream));
if (!oggstream)
return AVERROR(ENOMEM);
@@ -561,10 +554,11 @@ static int ogg_init(AVFormatContext *s)
int header_type = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 3 : 0x81;
int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
- if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
- st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
- (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
- av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
+ if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC) &&
+ avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
+ st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
+ (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
+ av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
oggstream->header[1] = NULL;
return AVERROR_INVALIDDATA;
}
@@ -601,7 +595,59 @@ static int ogg_init(AVFormatContext *s)
return 0;
}
-static int ogg_write_header(AVFormatContext *s)
+static int ogg_init(AVFormatContext *s)
+{
+ OGGContext *ogg = s->priv_data;
+ int i;
+
+ ogg->waiting_pics = 0;
+ ogg->attached_types = 0;
+
+ if (ogg->pref_size)
+ av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AVStream *st = s->streams[i];
+
+ if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+ ogg->audio_stream_idx = i;
+ if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
+ /* Opus requires a fixed 48kHz clock */
+ avpriv_set_pts_info(st, 64, 1, 48000);
+ else
+ avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+ }
+
+ if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+ st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
+ st->codecpar->codec_id != AV_CODEC_ID_SPEEX &&
+ st->codecpar->codec_id != AV_CODEC_ID_FLAC &&
+ st->codecpar->codec_id != AV_CODEC_ID_OPUS &&
+ st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+ st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+ st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+ av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
+ return AVERROR(EINVAL);
+ }
+
+ if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
+ st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+ st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+ st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+ av_log(s, AV_LOG_ERROR, "No extradata present\n");
+ return AVERROR_INVALIDDATA;
+ }
+ if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+ (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+ ogg->waiting_pics++;
+ }
+
+ if (!ogg->waiting_pics)
+ return ogg_finish_init(s);
+ return 0;
+}
+
+static int ogg_finish_header(AVFormatContext *s)
{
OGGStreamContext *oggstream = NULL;
int i, j;
@@ -631,6 +677,14 @@ static int ogg_write_header(AVFormatContext *s)
return 0;
}
+static int ogg_write_header(AVFormatContext *s)
+{
+ OGGContext *ogg = s->priv_data;
+ if (!ogg->waiting_pics)
+ return ogg_finish_header(s);
+ return 0;
+}
+
static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
{
AVStream *st = s->streams[pkt->stream_index];
@@ -683,20 +737,92 @@ static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
return 0;
}
+static int ogg_queue_flush(AVFormatContext *s)
+{
+ OGGContext *c = s->priv_data;
+ AVPacket *const pkt = ffformatcontext(s)->pkt;
+ int ret, write = 1;
+ ret = ogg_finish_init(s);
+ if (ret < 0)
+ write = 0;
+ ret = ogg_finish_header(s);
+ if (ret < 0)
+ write = 0;
+
+ while (c->queue.head) {
+ avpriv_packet_list_get(&c->queue, pkt);
+ if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
+ write = 0;
+ av_packet_unref(pkt);
+ }
+ return ret;
+}
+
static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
{
- int i;
+ OGGContext *c = s->priv_data;
+ int i, ret;
+
+ if (pkt && pkt->size) {
+ if (pkt->stream_index == c->audio_stream_idx) {
+ if (c->waiting_pics) {
+ /* buffer audio packets until we get all the pictures */
+ ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
+ if (ret < 0) {
+ av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+ c->waiting_pics = 0;
+ ret = ogg_queue_flush(s);
+ if (ret < 0)
+ return ret;
+ return ogg_write_packet_internal(s, pkt);
+ }
+ } else
+ return ogg_write_packet_internal(s, pkt);
+ } else {
+ AVStream *st = s->streams[pkt->stream_index];
- if (pkt && pkt->size)
- return ogg_write_packet_internal(s, pkt);
+ if (!c->waiting_pics ||
+ !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+ return 0;
- for (i = 0; i < s->nb_streams; i++) {
- OGGStreamContext *oggstream = s->streams[i]->priv_data;
- if (oggstream->page.segments_count)
- ogg_buffer_page(s, oggstream);
- }
+ /* warn only once for each stream */
+ if (st->nb_frames == 1) {
+ av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+ " ignoring.\n", pkt->stream_index);
+ }
+ if (st->nb_frames >= 1) {
+ av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n");
+ return 0;
+ }
- ogg_write_pages(s, 2);
+ //st->priv_data = av_packet_clone(pkt);
+ //if (!st->priv_data)
+ // av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+ ret = ff_flac_write_picture(s,
+ 1,
+ &c->attached_types,
+ c->audio_stream_idx,
+ pkt);
+ if (ret < 0) {
+ av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n");
+ return ret;
+ }
+ c->waiting_pics--;
+
+ /* flush the buffered audio packets */
+ if (!c->waiting_pics &&
+ (ret = ogg_queue_flush(s)) < 0)
+ return ret;
+ }
+ } else {
+ for (i = 0; i < s->nb_streams; i++) {
+ OGGStreamContext *oggstream = s->streams[i]->priv_data;
+ if (oggstream->page.segments_count)
+ ogg_buffer_page(s, oggstream);
+ }
+
+ ogg_write_pages(s, 2);
+ }
return 1;
}
@@ -734,7 +860,9 @@ static void ogg_free(AVFormatContext *s)
st->codecpar->codec_id == AV_CODEC_ID_VP8) {
av_freep(&oggstream->header[0]);
}
- av_freep(&oggstream->header[1]);
+ if (st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+ st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
+ av_freep(&oggstream->header[1]);
}
while (p) {
@@ -840,6 +968,7 @@ const AVOutputFormat ff_opus_muxer = {
.extensions = "opus",
.priv_data_size = sizeof(OGGContext),
.audio_codec = AV_CODEC_ID_OPUS,
+ .video_codec = AV_CODEC_ID_PNG,
.init = ogg_init,
.write_header = ogg_write_header,
.write_packet = ogg_write_packet,
--
2.39.0
More information about the ffmpeg-devel
mailing list