[FFmpeg-devel] [PATCH] avformat: Add support for embedding cover art in Ogg files
Zsolt Vadász
zsolt_vadasz at protonmail.com
Thu Dec 29 23:05:28 EET 2022
It's done similarly to how the flac muxer does it, so I reused most of the code and adapted it.
Signed-off-by: Zsolt Vadasz <zsolt_vadasz at protonmail.com>
---
libavformat/oggenc.c | 293 +++++++++++++++++++++++++++++++++++++------
1 file changed, 254 insertions(+), 39 deletions(-)
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 5003314adb..bfc51628f2 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -23,14 +23,22 @@
#include <stdint.h>
+#include "libavcodec/codec_id.h"
+#include "libavutil/avutil.h"
#include "libavutil/crc.h"
+#include "libavutil/log.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
#include "libavutil/random_seed.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/bswap.h"
#include "libavcodec/xiph.h"
#include "libavcodec/bytestream.h"
#include "libavcodec/flac.h"
#include "avformat.h"
+#include "id3v2.h"
#include "avio_internal.h"
#include "internal.h"
#include "version.h"
@@ -77,6 +85,10 @@ typedef struct OGGContext {
int pref_size; ///< preferred page size (0 => fill all segments)
int64_t pref_duration; ///< preferred page duration (0 => fill all segments)
int serial_offset;
+
+ PacketList queue;
+ int audio_stream_idx;
+ int attached_pics;
} OGGContext;
#define OFFSET(x) offsetof(OGGContext, x)
@@ -468,12 +480,107 @@ static void ogg_write_pages(AVFormatContext *s, int flush)
ogg->page_list = p;
}
-static int ogg_init(AVFormatContext *s)
+static int ogg_attach_pic_to_metadata(AVFormatContext *s, AVPacket *pkt)
+{
+ OGGContext *c = s->priv_data;
+ const AVPixFmtDescriptor *pixdesc;
+ const CodecMime *mime = ff_id3v2_mime_tags;
+ AVDictionaryEntry *e;
+ const char *mimetype = NULL, *desc = "";
+ const AVStream *st = s->streams[pkt->stream_index];
+ AVStream *audio_stream = s->streams[c->audio_stream_idx];
+ unsigned int i, mimelen, desclen, type = 0, blocklen;
+ uint8_t *ptr, *metadata_block_picture = NULL;
+ int encoded_len, ret;
+ char *encoded;
+
+ if (!pkt->data)
+ return 0;
+
+ while (mime->id != AV_CODEC_ID_NONE) {
+ if (mime->id == st->codecpar->codec_id) {
+ mimetype = mime->str;
+ break;
+ }
+ mime++;
+ }
+ if (!mimetype) {
+ av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
+ "write an attached picture.\n", st->index);
+ return AVERROR(EINVAL);
+ }
+ mimelen = strlen(mimetype);
+
+ /* get the picture type */
+ e = av_dict_get(st->metadata, "comment", NULL, 0);
+ for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
+ if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
+ type = i;
+ break;
+ }
+ }
+
+ if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
+ st->codecpar->width != 32 ||
+ st->codecpar->height != 32)) {
+ av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
+ return AVERROR(EINVAL);
+ }
+
+ /* get the description */
+ if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
+ desc = e->value;
+ desclen = strlen(desc);
+
+ blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
+ if (blocklen >= 1<<24) {
+ av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
+ return AVERROR(EINVAL);
+ }
+
+ metadata_block_picture = av_mallocz(blocklen);
+ ptr = metadata_block_picture;
+ bytestream_put_be32(&ptr, type);
+
+ bytestream_put_be32(&ptr, mimelen);
+ bytestream_put_buffer(&ptr, mimetype, mimelen);
+
+ bytestream_put_be32(&ptr, desclen);
+ bytestream_put_buffer(&ptr, desc, desclen);
+
+ bytestream_put_be32(&ptr, st->codecpar->width);
+ bytestream_put_be32(&ptr, st->codecpar->height);
+ if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+ bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc));
+ else
+ bytestream_put_be32(&ptr, 0);
+ bytestream_put_be32(&ptr, 0);
+
+ bytestream_put_be32(&ptr, pkt->size);
+ bytestream_put_buffer(&ptr, pkt->data, pkt->size);
+
+ encoded_len = AV_BASE64_SIZE(blocklen);
+ encoded = av_mallocz(encoded_len);
+ av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen);
+ av_free(metadata_block_picture);
+
+ ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0);
+ av_free(encoded);
+ av_packet_unref(pkt);
+
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static int ogg_finish_init(AVFormatContext *s)
{
OGGContext *ogg = s->priv_data;
OGGStreamContext *oggstream = NULL;
int i, j;
+ ogg->attached_pics = 0;
+
if (ogg->pref_size)
av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
@@ -481,29 +588,6 @@ static int ogg_init(AVFormatContext *s)
AVStream *st = s->streams[i];
unsigned serial_num = i + ogg->serial_offset;
- if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
- if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
- /* Opus requires a fixed 48kHz clock */
- avpriv_set_pts_info(st, 64, 1, 48000);
- else
- avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
- }
-
- if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
- st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
- st->codecpar->codec_id != AV_CODEC_ID_SPEEX &&
- st->codecpar->codec_id != AV_CODEC_ID_FLAC &&
- st->codecpar->codec_id != AV_CODEC_ID_OPUS &&
- st->codecpar->codec_id != AV_CODEC_ID_VP8) {
- av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
- return AVERROR(EINVAL);
- }
-
- if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
- st->codecpar->codec_id != AV_CODEC_ID_VP8) {
- av_log(s, AV_LOG_ERROR, "No extradata present\n");
- return AVERROR_INVALIDDATA;
- }
oggstream = av_mallocz(sizeof(*oggstream));
if (!oggstream)
return AVERROR(ENOMEM);
@@ -561,10 +645,11 @@ static int ogg_init(AVFormatContext *s)
int header_type = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 3 : 0x81;
int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
- if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
- st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
- (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
- av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
+ if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC) &&
+ avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
+ st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
+ (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
+ av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
oggstream->header[1] = NULL;
return AVERROR_INVALIDDATA;
}
@@ -601,7 +686,58 @@ static int ogg_init(AVFormatContext *s)
return 0;
}
-static int ogg_write_header(AVFormatContext *s)
+static int ogg_init(AVFormatContext *s)
+{
+ OGGContext *ogg = s->priv_data;
+ int i;
+
+ ogg->attached_pics = 0;
+
+ if (ogg->pref_size)
+ av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AVStream *st = s->streams[i];
+
+ if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+ ogg->audio_stream_idx = i;
+ if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
+ /* Opus requires a fixed 48kHz clock */
+ avpriv_set_pts_info(st, 64, 1, 48000);
+ else
+ avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+ }
+
+ if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+ st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
+ st->codecpar->codec_id != AV_CODEC_ID_SPEEX &&
+ st->codecpar->codec_id != AV_CODEC_ID_FLAC &&
+ st->codecpar->codec_id != AV_CODEC_ID_OPUS &&
+ st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+ st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+ st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+ av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
+ return AVERROR(EINVAL);
+ }
+
+ if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
+ st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+ st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+ st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+ av_log(s, AV_LOG_ERROR, "No extradata present\n");
+ return AVERROR_INVALIDDATA;
+ }
+ if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+ (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+ ogg->attached_pics++;
+ }
+
+ if (!ogg->attached_pics)
+ return ogg_finish_init(s);
+ return 0;
+}
+
+static int ogg_finish_header(AVFormatContext *s)
{
OGGStreamContext *oggstream = NULL;
int i, j;
@@ -631,6 +767,14 @@ static int ogg_write_header(AVFormatContext *s)
return 0;
}
+static int ogg_write_header(AVFormatContext *s)
+{
+ OGGContext *ogg = s->priv_data;
+ if (!ogg->attached_pics)
+ return ogg_finish_header(s);
+ return 0;
+}
+
static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
{
AVStream *st = s->streams[pkt->stream_index];
@@ -683,20 +827,88 @@ static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
return 0;
}
+static int ogg_queue_flush(AVFormatContext *s)
+{
+ OGGContext *c = s->priv_data;
+ AVPacket *const pkt = ffformatcontext(s)->pkt;
+ int ret, write = 1;
+ ret = ogg_finish_init(s);
+ if (ret < 0)
+ write = 0;
+ ret = ogg_finish_header(s);
+ if (ret < 0)
+ write = 0;
+
+ while (c->queue.head) {
+ avpriv_packet_list_get(&c->queue, pkt);
+ if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
+ write = 0;
+ av_packet_unref(pkt);
+ }
+ return ret;
+}
+
static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
{
- int i;
+ OGGContext *c = s->priv_data;
+ int i, ret;
+
+ if (pkt && pkt->size) {
+ if (pkt->stream_index == c->audio_stream_idx) {
+ if (c->attached_pics) {
+ /* buffer audio packets until we get all the pictures */
+ ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
+ if (ret < 0) {
+ av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+ c->attached_pics = 0;
+ ret = ogg_queue_flush(s);
+ if (ret < 0)
+ return ret;
+ return ogg_write_packet_internal(s, pkt);
+ }
+ } else
+ return ogg_write_packet_internal(s, pkt);
+ } else {
+ AVStream *st = s->streams[pkt->stream_index];
- if (pkt && pkt->size)
- return ogg_write_packet_internal(s, pkt);
+ if (!c->attached_pics ||
+ !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+ return 0;
- for (i = 0; i < s->nb_streams; i++) {
- OGGStreamContext *oggstream = s->streams[i]->priv_data;
- if (oggstream->page.segments_count)
- ogg_buffer_page(s, oggstream);
- }
+ /* warn only once for each stream */
+ if (st->nb_frames == 1) {
+ av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+ " ignoring.\n", pkt->stream_index);
+ }
+ if (st->nb_frames >= 1) {
+ av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n");
+ return 0;
+ }
- ogg_write_pages(s, 2);
+ //st->priv_data = av_packet_clone(pkt);
+ //if (!st->priv_data)
+ // av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+ c->attached_pics--;
+ ret = ogg_attach_pic_to_metadata(s, pkt);
+ if (ret < 0) {
+ av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n");
+ return ret;
+ }
+
+ /* flush the buffered audio packets */
+ if (!c->attached_pics &&
+ (ret = ogg_queue_flush(s)) < 0)
+ return ret;
+ }
+ } else {
+ for (i = 0; i < s->nb_streams; i++) {
+ OGGStreamContext *oggstream = s->streams[i]->priv_data;
+ if (oggstream->page.segments_count)
+ ogg_buffer_page(s, oggstream);
+ }
+
+ ogg_write_pages(s, 2);
+ }
return 1;
}
@@ -734,7 +946,9 @@ static void ogg_free(AVFormatContext *s)
st->codecpar->codec_id == AV_CODEC_ID_VP8) {
av_freep(&oggstream->header[0]);
}
- av_freep(&oggstream->header[1]);
+ if (st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+ st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
+ av_freep(&oggstream->header[1]);
}
while (p) {
@@ -840,6 +1054,7 @@ const AVOutputFormat ff_opus_muxer = {
.extensions = "opus",
.priv_data_size = sizeof(OGGContext),
.audio_codec = AV_CODEC_ID_OPUS,
+ .video_codec = AV_CODEC_ID_PNG,
.init = ogg_init,
.write_header = ogg_write_header,
.write_packet = ogg_write_packet,
--
2.34.1
More information about the ffmpeg-devel
mailing list