[FFmpeg-devel] [PATCH] RFC: Set reasonable subtitle dimensions for timed-text in mov/mp4.
Nicolas George
nicolas.george at normalesup.org
Mon Mar 11 20:44:49 CET 2013
Le decadi 20 ventôse, an CCXXI, Philip Langdale a écrit :
> See https://ffmpeg.org/trac/ffmpeg/ticket/1845.
Please remember not to paste an URL and a punctuation directly together.
> It's crazy, but the full spec for timed-text subtitles, requires
> specifying dimensions and positioning for the subtitle rendering
> area, and doing so in pixels, which pretty much means you have to
> know the size of the video stream being shown, so that the subtitles
> can be meaningfully placed over it.
Let me get it straight. The extradata in the header can declare "this
subtitle is meant to be displayed in a 576×72 rectangle at (72, 468)", and
this will look good if displayed on a 720×576 video, but completely out of
place on a 1920×1080 video. Is that it?
Is it possible to specify some more in the extradata: "this subtitle is
meant to be displayed in a 576×72 rectangle at (72, 468) on top of a 720×576
video"?
> Note that, as far as I know, only the Apple QT Player on OSX or
> Windows respects this sizing information.
What happens if the sizing information specifies 0×0?
> - if (track->enc->extradata_size)
> + if (track->enc->extradata_size) {
> + if (track->enc->extradata_size >= 18) {
> + // Rewrite text box dimensions to match video stream.
IMHO, doing so unconditionally is not acceptable.
> + uint8_t *ed = track->enc->extradata;
> + uint16_t width = track->video_width;
> + uint16_t height = track->video_height;
> + height /= 10;
Why do you need to copy this into local variables?
> + ed[14] = height >> 8;
> + ed[15] = height & 0xFF;
> + ed[16] = width >> 8;
> + ed[17] = width & 0xFF;
We have macros to store integers in a buffer with a specified endianness.
> + }
> avio_write(pb, track->enc->extradata, track->enc->extradata_size);
> + }
>
> return update_size(pb, pos);
> }
> @@ -1633,7 +1645,9 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
> AVDictionaryEntry *rot = av_dict_get(st->metadata, "rotate", NULL, 0);
> rotation = (rot && rot->value) ? atoi(rot->value) : 0;
> }
> - if (rotation == 90) {
> + if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> + write_matrix(pb, 1, 0, 0, 1, 0, (track->video_height * 9) / 10);
> + } else if (rotation == 90) {
Looks unrelated.
> write_matrix(pb, 0, 1, -1, 0, track->enc->height, 0);
> } else if (rotation == 180) {
> write_matrix(pb, -1, 0, 0, -1, track->enc->width, track->enc->height);
> @@ -1643,8 +1657,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
> write_matrix(pb, 1, 0, 0, 1, 0, 0);
> }
> /* Track width and height, for visual only */
> - if(st && (track->enc->codec_type == AVMEDIA_TYPE_VIDEO ||
> - track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
> + if(st && (track->enc->codec_type == AVMEDIA_TYPE_VIDEO)) {
> if(track->mode == MODE_MOV) {
> avio_wb32(pb, track->enc->width << 16);
> avio_wb32(pb, track->height << 16);
> @@ -1655,6 +1668,9 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
> avio_wb32(pb, sample_aspect_ratio * track->enc->width*0x10000);
> avio_wb32(pb, track->height*0x10000);
> }
> + } else if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> + avio_wb32(pb, track->video_width * 0x10000);
> + avio_wb32(pb, (track->video_height * 0x10000) / 10);
> }
> else {
> avio_wb32(pb, 0);
> @@ -1786,7 +1802,6 @@ static int mov_write_udta_sdp(AVIOContext *pb, MOVTrack *track)
> NULL, NULL, 0, 0, ctx);
> av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", track->track_id);
> len = strlen(buf);
> -
Spurious.
> avio_wb32(pb, len + 24);
> ffio_wfourcc(pb, "udta");
> avio_wb32(pb, len + 16);
> @@ -1803,6 +1818,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVMuxContext *mov,
> int64_t pos = avio_tell(pb);
> avio_wb32(pb, 0); /* size */
> ffio_wfourcc(pb, "trak");
> +
Ditto.
> mov_write_tkhd_tag(pb, track, st);
> if (supports_edts(mov))
> mov_write_edts_tag(pb, track); // PSP Movies and several other cases require edts box
> @@ -3672,6 +3688,20 @@ static int mov_write_header(AVFormatContext *s)
> }
> }
>
> + for (i = 0; i < mov->nb_streams; i++) {
> + MOVTrack *track = &mov->tracks[i];
> + if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> + int j;
> + for (j = 0; j < mov->nb_streams; j++) {
> + if (mov->tracks[j].enc->codec_type == AVMEDIA_TYPE_VIDEO) {
> + track->video_width = mov->tracks[j].enc->width;
> + track->video_height = mov->tracks[j].enc->height;
> + break;
> + }
> + }
> + }
> + }
> +
> if (mov->mode == MODE_ISM) {
> /* If no fragmentation options have been set, set a default. */
> if (!(mov->flags & (FF_MOV_FLAG_FRAG_KEYFRAME |
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index a5db895..42f120d 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -138,6 +138,10 @@ typedef struct MOVIndex {
> int packet_entry;
> int slices;
> } vc1_info;
> +
> + // For subtitle tracks.
> + uint16_t video_width;
> + uint16_t video_height;
> } MOVTrack;
>
> typedef struct MOVMuxContext {
I must say, I would be much more at ease with a solution requiring human
input: just ask the users to add "-s 576x72" to their encoding options.
Regards,
--
Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130311/50398b55/attachment.asc>
More information about the ffmpeg-devel
mailing list