[FFmpeg-devel] [PATCH] avcodec/jpeg2000dec: support of 2 fields in 1 AVPacket

Jerome Martinez jerome at mediaarea.net
Fri Feb 2 17:55:12 EET 2024


Before this patch, the FFmpeg MXF parser correctly detects content with 
2 fields in 1 AVPacket as e.g. interlaced 720x486 but the FFmpeg JPEG 
2000 decoder reads the JPEG 2000 SIZ header without understanding that 
the indicated height is the height of 1 field only so overwrites the 
frame size info with e.g. 720x243, and also completely discards the 
second frame, which lead to the decoding of only half of the stored 
content as "progressive" 720x243 flagged interlaced.

Example file:
https://www.digitizationguidelines.gov/guidelines/MXF_sampleFiles/RDD48-sample12-gf-jpeg2000-ntsc-4.2.zip

Before this patch:
Stream #0:0: Video: jpeg2000, yuv422p10le(bottom coded first (swapped)), 
720x243, lossless, SAR 9:20 DAR 4:3, 29.97 tbr, 29.97 tbn, 29.97 tbc

After this patch:
Stream #0:0: Video: jpeg2000, yuv422p10le(bottom coded first (swapped)), 
720x486, lossless, SAR 9:10 DAR 4:3, 29.97 fps, 29.97 tbr, 29.97 tbn
-------------- next part --------------
From 5242971da7d2cf8d8713144e4a7bcc4aa06437c4 Mon Sep 17 00:00:00 2001
From: Jerome Martinez <jerome at mediaarea.net>
Date: Thu, 1 Feb 2024 17:58:02 +0100
Subject: [PATCH] avcodec/jpeg2000dec: support of 2 fields in 1 AVPacket

---
 libavcodec/jpeg2000dec.c | 87 +++++++++++++++++++++++++++++++++++++++++++-----
 libavcodec/jpeg2000dec.h |  5 +++
 2 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 691cfbd891..d8bfca390e 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -194,6 +194,8 @@ static int get_siz(Jpeg2000DecoderContext *s)
     int ret;
     int o_dimx, o_dimy; //original image dimensions.
     int dimx, dimy;
+    int previous_width = s->width;
+    int previous_height = s->height;
 
     if (bytestream2_get_bytes_left(&s->g) < 36) {
         av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for SIZ\n");
@@ -211,7 +213,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
     s->tile_offset_y  = bytestream2_get_be32u(&s->g); // YT0Siz
     ncomponents       = bytestream2_get_be16u(&s->g); // CSiz
 
-    if (av_image_check_size2(s->width, s->height, s->avctx->max_pixels, AV_PIX_FMT_NONE, 0, s->avctx)) {
+    if (av_image_check_size2(s->width, s->height << (s->height >= 0 && s->has_2_fields), s->avctx->max_pixels, AV_PIX_FMT_NONE, 0, s->avctx)) {
         avpriv_request_sample(s->avctx, "Large Dimensions");
         return AVERROR_PATCHWELCOME;
     }
@@ -301,6 +303,19 @@ static int get_siz(Jpeg2000DecoderContext *s)
             return AVERROR(ENOMEM);
     }
 
+    if (s->has_2_fields) {
+        s->height <<= 1;
+        s->image_offset_y <<= 1;
+        s->tile_offset_y <<= 1;
+        if (s->is_second_field && (s->width != previous_width || s->height != previous_height)) {
+            avpriv_request_sample(s->avctx, "Pixel size of the 2 fields of the frame are not same");
+            return AVERROR_PATCHWELCOME;
+        }
+        if (s->image_offset_y || s->tile_offset_y || (s->tile_height << 1) != s->height) {
+            av_log(s->avctx, AV_LOG_WARNING, "Decoding of 2 fields having titles in 1 AVPacket was not tested\n");
+        }
+    }
+
     /* compute image size with reduction factor */
     o_dimx = ff_jpeg2000_ceildivpow2(s->width  - s->image_offset_x,
                                                s->reduction_factor);
@@ -2001,7 +2016,7 @@ static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
                                                                                                   \
             y    = tile->comp[compno].coord[1][0] -                                               \
                    ff_jpeg2000_ceildiv(s->image_offset_y, s->cdy[compno]);                        \
-            line = (PIXEL *)picture->data[plane] + y * (picture->linesize[plane] / sizeof(PIXEL));\
+            line = (PIXEL *)picture->data[plane] + (y + (s->is_second_field ^ s->is_bottom_coded_first)) * (picture->linesize[plane] / sizeof(PIXEL));\
             for (; y < h; y++) {                                                                  \
                 PIXEL *dst;                                                                       \
                                                                                                   \
@@ -2028,7 +2043,7 @@ static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
                         dst += pixelsize;                                                         \
                     }                                                                             \
                 }                                                                                 \
-                line += picture->linesize[plane] / sizeof(PIXEL);                                 \
+                line += (picture->linesize[plane] << s->has_2_fields) / sizeof(PIXEL);            \
             }                                                                                     \
         }                                                                                         \
                                                                                                   \
@@ -2445,8 +2460,8 @@ static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                                 int *got_frame, AVPacket *avpkt)
+static int jpeg2000_decode_frame_picture(AVCodecContext *avctx, AVFrame *picture,
+                                 AVPacket *avpkt)
 {
     Jpeg2000DecoderContext *s = avctx->priv_data;
     int ret;
@@ -2497,7 +2512,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
     }
 
     /* get picture buffer */
-    if ((ret = ff_thread_get_buffer(avctx, picture, 0)) < 0)
+    if ((!s->has_2_fields || !s->is_second_field) && (ret = ff_thread_get_buffer(avctx, picture, 0)) < 0)
         goto end;
     picture->pict_type = AV_PICTURE_TYPE_I;
     picture->flags |= AV_FRAME_FLAG_KEY;
@@ -2520,8 +2535,6 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
 
     jpeg2000_dec_cleanup(s);
 
-    *got_frame = 1;
-
     if (s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
         memcpy(picture->data[1], s->palette, 256 * sizeof(uint32_t));
 
@@ -2532,6 +2545,64 @@ end:
     return ret;
 }
 
+static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
+                                 int *got_frame, AVPacket *avpkt)
+{
+    Jpeg2000DecoderContext *s = avctx->priv_data;
+    int picture_1_size = avpkt->size, picture_2_size = 0;
+    int ret1 = 0, ret2 = 0;
+    int may_have_2_fields_in_1_packet = 0;
+
+    // find if there are 2 JPEG2000 pictures in a single packet
+    s->has_2_fields = 0;
+    s->is_bottom_coded_first = 0;
+    s->is_second_field = 0;
+    switch (avctx->field_order) {
+    case AV_FIELD_TT:
+    case AV_FIELD_TB:
+        may_have_2_fields_in_1_packet = 1;
+        break;
+    case AV_FIELD_BB:
+    case AV_FIELD_BT:
+        may_have_2_fields_in_1_packet = 2;
+        break;
+    }
+    if (may_have_2_fields_in_1_packet) {
+        for (int i = 0; i < avpkt->size - 4; i++) {
+            static const unsigned char EOI_SOI[4] = { 0xFF, 0xD9, 0xFF, 0x4F };
+            if (!memcmp(avpkt->data + i, EOI_SOI, 4)) {
+                if (picture_2_size) {
+                    av_log(s->avctx, AV_LOG_WARNING, "EIO SOI sequence found twice, risk of wrong detection\n");
+                } else {
+                    picture_1_size = i + 2;
+                    picture_2_size = avpkt->size - picture_1_size;
+                    s->has_2_fields = 1;
+                    s->is_bottom_coded_first = may_have_2_fields_in_1_packet - 1;
+                }
+            }
+        }
+    }
+
+    // parsing full frame or first picture
+    avpkt->size -= picture_2_size;
+    ret1 = jpeg2000_decode_frame_picture(avctx, picture, avpkt);
+    
+    // parsing second picture if present
+    if (picture_2_size) {
+        avpkt->data += picture_1_size;
+        avpkt->size = picture_2_size;
+        s->is_second_field = 1;
+        ret2 = jpeg2000_decode_frame_picture(avctx, picture, avpkt);
+
+        // reset
+        avpkt->data -= picture_1_size;
+        avpkt->size += picture_1_size;
+    }
+ 
+    *got_frame = avctx->skip_frame < AVDISCARD_ALL && (ret1 >= 0 || ret2 >= 0); // got_frame is 1 if any of the 2 pictures is fine
+    return ret1 < 0 ? ret1 : (ret2 < 0 ? ret2 : (ret1 + ret2)); // priority on first field error code
+}
+
 #define OFFSET(x) offsetof(Jpeg2000DecoderContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
diff --git a/libavcodec/jpeg2000dec.h b/libavcodec/jpeg2000dec.h
index d0ca6e7a79..ce42812c48 100644
--- a/libavcodec/jpeg2000dec.h
+++ b/libavcodec/jpeg2000dec.h
@@ -114,6 +114,11 @@ typedef struct Jpeg2000DecoderContext {
 
     /*options parameters*/
     int             reduction_factor;
+    
+    /* field info */
+    int8_t          has_2_fields;
+    int8_t          is_bottom_coded_first;
+    int8_t          is_second_field;
 } Jpeg2000DecoderContext;
 
 #endif //AVCODEC_JPEG2000DEC_H
-- 
2.13.3.windows.1



More information about the ffmpeg-devel mailing list