[FFmpeg-devel] [PATCH 16/23] avcodec/hevc/refs: export Stereo 3D side data

Sun Sep 15 06:18:36 EEST 2024

On 9/14/2024 7:12 PM, James Almer wrote:
> On 9/14/2024 7:45 AM, Anton Khirnov wrote:
>> From: James Almer <jamrial at gmail.com>
>>
>> Use the 3D Reference Displays Info SEI message to link a view_id with
>> an eye.
>>
>> Signed-off-by: James Almer <jamrial at gmail.com>
>> ---
>>   libavcodec/hevc/hevcdec.c |  1 +
>>   libavcodec/hevc/refs.c    | 19 +++++++++++++++++++
>>   2 files changed, 20 insertions(+)
>>
>> diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
>> index 692f19e97e..b784b10bcf 100644
>> --- a/libavcodec/hevc/hevcdec.c
>> +++ b/libavcodec/hevc/hevcdec.c
>> @@ -3968,6 +3968,7 @@ static int 
>> hevc_update_thread_context(AVCodecContext *dst,
>>       s->sei.common.mastering_display    = s0- 
>> >sei.common.mastering_display;
>>       s->sei.common.content_light        = s0->sei.common.content_light;
>>       s->sei.common.aom_film_grain       = s0->sei.common.aom_film_grain;
>> +    s->sei.tdrdi                       = s0->sei.tdrdi;
>>       return 0;
>>   }
>> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
>> index b9b08ca416..ac1b07a308 100644
>> --- a/libavcodec/hevc/refs.c
>> +++ b/libavcodec/hevc/refs.c
>> @@ -22,6 +22,7 @@
>>    */
>>   #include "libavutil/mem.h"
>> +#include "libavutil/stereo3d.h"
>>   #include "container_fifo.h"
>>   #include "decode.h"
>> @@ -94,6 +95,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>           // add view ID side data if it's nontrivial
>>           if (vps->nb_layers > 1 || view_id) {
>> +            HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
>>               AVFrameSideData *sd = av_frame_side_data_new(&frame->f- 
>> >side_data,
>>                                                            &frame->f- 
>> >nb_side_data,
>>                                                            
>> AV_FRAME_DATA_VIEW_ID,
>> @@ -101,6 +103,23 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>               if (!sd)
>>                   goto fail;
>>               *(int*)sd->data = view_id;
>> +
>> +            if (tdrdi->num_ref_displays) {
>> +                AVStereo3D *stereo_3d;
>> +
>> +                av_frame_remove_side_data(frame->f, 
>> AV_FRAME_DATA_STEREO3D);
> 
> As this is now being called before ff_progress_frame_get_buffer() (is 
> there a reason you wanted the view_id side data and this one applied to 
> the frame before get_buffer()?), it became a no-op and any stereo 3d 
> side data in the input packet will be appended to the frame, resulting 
> in something like:
> 
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - View ID: view id: 0
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - 
>> frame alternate, view - right, primary_eye - none
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Spherical 
>> Mapping: rectilinear
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - 
>> unspecified, view - packed, primary_eye - none, baseline: 19240, 
>> horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
> 
> We don't really want to lose the information that's coded in the 
> container but not in the bitstream (Which happened in the previous 
> version of the patch too), so we should instead amend the container 
> level side data with the bitstream information.
> 
> Something like:
> 
>> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
>> index ac1b07a308..f4c2b18e83 100644
>> --- a/libavcodec/hevc/refs.c
>> +++ b/libavcodec/hevc/refs.c
>> @@ -93,21 +93,32 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>          if (ret < 0)
>>              return NULL;
>>
>> +        if (!(s->layers_active_output & (1 << s->cur_layer)))
>> +            frame->f->flags |= AV_FRAME_FLAG_DISCARD;
>> +
>> +        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
>> +                                           AV_GET_BUFFER_FLAG_REF);
>> +        if (ret < 0)
>> +            return NULL;
>> +
>>          // add view ID side data if it's nontrivial
>>          if (vps->nb_layers > 1 || view_id) {
>>              HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
>> -            AVFrameSideData *sd = av_frame_side_data_new(&frame->f- 
>> >side_data,
>> -                                                         &frame->f- 
>> >nb_side_data,
>> -                                                         
>> AV_FRAME_DATA_VIEW_ID,
>> -                                                         sizeof(int), 
>> 0);
>> +            AVFrameSideData *sd;
>> +
>> +            av_frame_remove_side_data(frame->f, AV_FRAME_DATA_VIEW_ID);
>> +            sd = av_frame_new_side_data(frame->f, 
>> AV_FRAME_DATA_VIEW_ID, sizeof(int));
>>              if (!sd)
>>                  goto fail;
>>              *(int*)sd->data = view_id;
>>
>>              if (tdrdi->num_ref_displays) {
>> -                AVStereo3D *stereo_3d;
>> +                AVStereo3D *stereo_3d = NULL;
>>
>> -                av_frame_remove_side_data(frame->f, 
>> AV_FRAME_DATA_STEREO3D);
>> +                sd = av_frame_get_side_data(frame->f, 
>> AV_FRAME_DATA_STEREO3D);
>> +                if (sd)
>> +                    stereo_3d = (AVStereo3D *)sd->data;
>> +                else
>>                  stereo_3d = av_stereo3d_create_side_data(frame->f);
>>                  if (!stereo_3d)
>>                      goto fail;
>> @@ -122,14 +133,6 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>              }
>>          }
>>
>> -        if (!(s->layers_active_output & (1 << s->cur_layer)))
>> -            frame->f->flags |= AV_FRAME_FLAG_DISCARD;
>> -
>> -        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
>> -                                           AV_GET_BUFFER_FLAG_REF);
>> -        if (ret < 0)
>> -            return NULL;
>> -
>>          frame->rpl = ff_refstruct_allocz(s->pkt.nb_nals * 
>> sizeof(*frame->rpl));
>>          if (!frame->rpl)
>>              goto fail;
> 
> Which results in
> 
>> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - Spherical 
>> Mapping: rectilinear
>> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - Stereo 3D: type - 
>> frame alternate, view - right, primary_eye - none, baseline: 19240, 
>> horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
>> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - View ID: view id: 0
> 

Turns out doing this results in the output muxer/encoder context having 
this as global side data, because enc_open() in ffmpeg_enc.c initializes 
avctx.decoded_side_data with the first frame. Which would maybe be nice 
if not for the fact you get "view - right" even if you output both.

It does not happen with your patch as is because the loop in enc_open() 
will use the last side data of a given type, and the second one for 
stereo 3d in this scenario is coincidentally the source container one.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 495 bytes
Desc: OpenPGP digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240915/c044629b/attachment.sig>