[FFmpeg-devel] [PATCH] ffprobe: fix XML rendering, review XML layout

Stefano Sabatini stefasab at gmail.com
Sat Oct 14 20:24:28 EEST 2023


Fix rendering of int values within a side data element, which was
broken since commit d2d3a83ad93, where the side data element was
correctly marked as a variable fields element. Logic to render a
string variable was implemented already, but it was not implemented
for the int fields path, which was enabled by that commit.

Also, code and schema is changed in order to account for multiple
variable-fields elements - such as side data, contained within the
same parent. Previously it was assumed that a single variable-fields
element was contained within the parent, which was the case for tags,
but is not the case for side-data.

Previously data was rendered as:
<side_data_list>
    <side_data side_data_type="CPB properties" max_bitrate="0" min_bitrate="0" avg_bitrate="0" buffer_size="327680" vbv_delay="-1"/>
</side_data_list>

Now as:
<side_data_list>
   <side_data type="CPB properties">
       <side_datum key="side_data_type" value="CPB properties"/>
       <side_datum key="max_bitrate" value="0"/>
       <side_datum key="min_bitrate" value="0"/>
       <side_datum key="avg_bitrate" value="0"/>
       <side_datum key="buffer_size" value="49152"/>
       <side_datum key="vbv_delay" value="-1"/>
   </side_data>
</side_data_list>

Now variable-fields elements are rendered with a containing element
containing generic key/values elements, enabling use of strict XML
schema.

Fix trac issue:
https://trac.ffmpeg.org/ticket/10613
---
 Changelog                  |  2 ++
 doc/ffprobe.xsd            | 29 ++++++++++++++++------
 fftools/ffprobe.c          | 51 +++++++++++++++++++++++++++-----------
 tests/ref/fate/ffprobe_xml | 26 ++++++++++++-------
 4 files changed, 77 insertions(+), 31 deletions(-)

diff --git a/Changelog b/Changelog
index 0c73f66546..259180e190 100644
--- a/Changelog
+++ b/Changelog
@@ -35,6 +35,8 @@ version <next>:
 - CRI USM demuxer
 - ffmpeg CLI '-top' option deprecated in favor of the setfield filter
 - VAAPI AV1 encoder
+- ffprobe XML output schema changed to account for multiple
+  variable-fields elements within the same parent element
 
 
 version 6.0:
diff --git a/doc/ffprobe.xsd b/doc/ffprobe.xsd
index 87ca265d63..6b815a89df 100644
--- a/doc/ffprobe.xsd
+++ b/doc/ffprobe.xsd
@@ -43,9 +43,13 @@
         </xsd:choice>
     </xsd:complexType>
 
+    <xsd:complexType name="tagsType">
+      <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+    </xsd:complexType>
+
     <xsd:complexType name="packetType">
       <xsd:sequence>
-        <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+        <xsd:element name="tags" type="ffprobe:tagsType" minOccurs="0" maxOccurs="1"/>
         <xsd:element name="side_data_list" type="ffprobe:packetSideDataListType"   minOccurs="0" maxOccurs="1" />
       </xsd:sequence>
 
@@ -69,14 +73,23 @@
             <xsd:element name="side_data" type="ffprobe:packetSideDataType" minOccurs="1" maxOccurs="unbounded"/>
         </xsd:sequence>
     </xsd:complexType>
+
     <xsd:complexType name="packetSideDataType">
-        <xsd:attribute name="side_data_type"              type="xsd:string"/>
-        <xsd:attribute name="side_data_size"              type="xsd:int"   />
+        <xsd:attribute name="type" type="xsd:string"/>
+
+        <xsd:sequence>
+            <xsd:element name="side_datum" type="ffprobe:packetSideDatumType" minOccurs="1" maxOccurs="unbounded"/>
+        </xsd:sequence>
+    </xsd:complexType>
+
+    <xsd:complexType name="packetSideDatumType">
+        <xsd:attribute name="key"   type="xsd:string"/>
+        <xsd:attribute name="value" type="xsd:string"/>
     </xsd:complexType>
 
     <xsd:complexType name="frameType">
       <xsd:sequence>
-            <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+            <xsd:element name="tags" type="ffprobe:tagsType" minOccurs="0" maxOccurs="1"/>
             <xsd:element name="logs" type="ffprobe:logsType" minOccurs="0" maxOccurs="1"/>
             <xsd:element name="side_data_list" type="ffprobe:frameSideDataListType"   minOccurs="0" maxOccurs="1" />
       </xsd:sequence>
@@ -209,7 +222,7 @@
     <xsd:complexType name="streamType">
       <xsd:sequence>
         <xsd:element name="disposition" type="ffprobe:streamDispositionType" minOccurs="0" maxOccurs="1"/>
-        <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+        <xsd:element name="tags" type="ffprobe:tagsType" minOccurs="0" maxOccurs="1"/>
         <xsd:element name="side_data_list" type="ffprobe:packetSideDataListType"   minOccurs="0" maxOccurs="1" />
       </xsd:sequence>
 
@@ -270,7 +283,7 @@
 
     <xsd:complexType name="programType">
       <xsd:sequence>
-        <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+        <xsd:element name="tags" type="ffprobe:tagsType" minOccurs="0" maxOccurs="1"/>
         <xsd:element name="streams" type="ffprobe:streamsType" minOccurs="0" maxOccurs="1"/>
       </xsd:sequence>
 
@@ -283,7 +296,7 @@
 
     <xsd:complexType name="formatType">
       <xsd:sequence>
-        <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+        <xsd:element name="tags" type="ffprobe:tagsType" minOccurs="0" maxOccurs="1"/>
       </xsd:sequence>
 
       <xsd:attribute name="filename"         type="xsd:string" use="required"/>
@@ -325,7 +338,7 @@
 
     <xsd:complexType name="chapterType">
       <xsd:sequence>
-        <xsd:element name="tag" type="ffprobe:tagType" minOccurs="0" maxOccurs="unbounded"/>
+        <xsd:element name="tags" type="ffprobe:tagsType" minOccurs="0" maxOccurs="unbounded"/>
       </xsd:sequence>
 
       <xsd:attribute name="id"         type="xsd:int" use="required"/>
diff --git a/fftools/ffprobe.c b/fftools/ffprobe.c
index 40bb3f46e1..9db266d3fb 100644
--- a/fftools/ffprobe.c
+++ b/fftools/ffprobe.c
@@ -268,8 +268,8 @@ static struct section sections[] = {
     [SECTION_ID_PACKETS_AND_FRAMES] = { SECTION_ID_PACKETS_AND_FRAMES, "packets_and_frames", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PACKET, -1} },
     [SECTION_ID_PACKET] =             { SECTION_ID_PACKET, "packet", 0, { SECTION_ID_PACKET_TAGS, SECTION_ID_PACKET_SIDE_DATA_LIST, -1 } },
     [SECTION_ID_PACKET_TAGS] =        { SECTION_ID_PACKET_TAGS, "tags", SECTION_FLAG_HAS_VARIABLE_FIELDS, { -1 }, .element_name = "tag", .unique_name = "packet_tags" },
-    [SECTION_ID_PACKET_SIDE_DATA_LIST] ={ SECTION_ID_PACKET_SIDE_DATA_LIST, "side_data_list", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PACKET_SIDE_DATA, -1 }, .element_name = "side_data", .unique_name = "packet_side_data_list" },
-    [SECTION_ID_PACKET_SIDE_DATA] =     { SECTION_ID_PACKET_SIDE_DATA, "side_data", SECTION_FLAG_HAS_VARIABLE_FIELDS|SECTION_FLAG_HAS_TYPE, { -1 }, .unique_name = "packet_side_data", .get_type = get_packet_side_data_type },
+    [SECTION_ID_PACKET_SIDE_DATA_LIST] ={ SECTION_ID_PACKET_SIDE_DATA_LIST, "side_data_list", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PACKET_SIDE_DATA, -1 }, .element_name = "side_data_list", .unique_name = "packet_side_data_list" },
+    [SECTION_ID_PACKET_SIDE_DATA] =     { SECTION_ID_PACKET_SIDE_DATA, "side_data", SECTION_FLAG_HAS_VARIABLE_FIELDS|SECTION_FLAG_HAS_TYPE, { -1 }, .unique_name = "packet_side_data", .element_name = "side_datum", .get_type = get_packet_side_data_type },
     [SECTION_ID_PIXEL_FORMATS] =      { SECTION_ID_PIXEL_FORMATS, "pixel_formats", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PIXEL_FORMAT, -1 } },
     [SECTION_ID_PIXEL_FORMAT] =       { SECTION_ID_PIXEL_FORMAT, "pixel_format", 0, { SECTION_ID_PIXEL_FORMAT_FLAGS, SECTION_ID_PIXEL_FORMAT_COMPONENTS, -1 } },
     [SECTION_ID_PIXEL_FORMAT_FLAGS] = { SECTION_ID_PIXEL_FORMAT_FLAGS, "flags", 0, { -1 }, .unique_name = "pixel_format_flags" },
@@ -292,7 +292,7 @@ static struct section sections[] = {
     [SECTION_ID_STREAM_DISPOSITION] = { SECTION_ID_STREAM_DISPOSITION, "disposition", 0, { -1 }, .unique_name = "stream_disposition" },
     [SECTION_ID_STREAM_TAGS] =        { SECTION_ID_STREAM_TAGS, "tags", SECTION_FLAG_HAS_VARIABLE_FIELDS, { -1 }, .element_name = "tag", .unique_name = "stream_tags" },
     [SECTION_ID_STREAM_SIDE_DATA_LIST] ={ SECTION_ID_STREAM_SIDE_DATA_LIST, "side_data_list", SECTION_FLAG_IS_ARRAY, { SECTION_ID_STREAM_SIDE_DATA, -1 }, .element_name = "side_data", .unique_name = "stream_side_data_list" },
-    [SECTION_ID_STREAM_SIDE_DATA] =     { SECTION_ID_STREAM_SIDE_DATA, "side_data", SECTION_FLAG_HAS_TYPE|SECTION_FLAG_HAS_VARIABLE_FIELDS, { -1 }, .unique_name = "stream_side_data", .get_type = get_packet_side_data_type },
+    [SECTION_ID_STREAM_SIDE_DATA] =     { SECTION_ID_STREAM_SIDE_DATA, "side_data", SECTION_FLAG_HAS_TYPE|SECTION_FLAG_HAS_VARIABLE_FIELDS, { -1 }, .unique_name = "stream_side_data", .element_name = "side_datum", .get_type = get_packet_side_data_type },
     [SECTION_ID_SUBTITLE] =           { SECTION_ID_SUBTITLE, "subtitle", 0, { -1 } },
 };
 
@@ -1818,21 +1818,27 @@ static void xml_print_section_header(WriterContext *wctx, void *data)
         xml->within_tag = 0;
         writer_put_str(wctx, ">\n");
     }
-    if (section->flags & SECTION_FLAG_HAS_VARIABLE_FIELDS) {
-        xml->indent_level++;
-    } else {
+
         if (parent_section && (parent_section->flags & SECTION_FLAG_IS_WRAPPER) &&
             wctx->level && wctx->nb_item[wctx->level-1])
             writer_w8(wctx, '\n');
         xml->indent_level++;
 
-        if (section->flags & SECTION_FLAG_IS_ARRAY) {
-            XML_INDENT(); writer_printf(wctx, "<%s>\n", section->name);
+        if (section->flags & (SECTION_FLAG_IS_ARRAY|SECTION_FLAG_HAS_VARIABLE_FIELDS)) {
+            XML_INDENT(); writer_printf(wctx, "<%s", section->name);
+
+            if (section->flags & SECTION_FLAG_HAS_TYPE) {
+                AVBPrint buf;
+                av_bprint_init(&buf, 1, AV_BPRINT_SIZE_UNLIMITED);
+                av_bprint_escape(&buf, section->get_type(data), NULL,
+                                 AV_ESCAPE_MODE_XML, AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
+                writer_printf(wctx, " type=\"%s\"", buf.str);
+            }
+            writer_printf(wctx, ">\n", section->name);
         } else {
             XML_INDENT(); writer_printf(wctx, "<%s ", section->name);
             xml->within_tag = 1;
         }
-    }
 }
 
 static void xml_print_section_footer(WriterContext *wctx)
@@ -1846,8 +1852,6 @@ static void xml_print_section_footer(WriterContext *wctx)
         xml->within_tag = 0;
         writer_put_str(wctx, "/>\n");
         xml->indent_level--;
-    } else if (section->flags & SECTION_FLAG_HAS_VARIABLE_FIELDS) {
-        xml->indent_level--;
     } else {
         XML_INDENT(); writer_printf(wctx, "</%s>\n", section->name);
         xml->indent_level--;
@@ -1863,6 +1867,7 @@ static void xml_print_str(WriterContext *wctx, const char *key, const char *valu
     av_bprint_init(&buf, 1, AV_BPRINT_SIZE_UNLIMITED);
 
     if (section->flags & SECTION_FLAG_HAS_VARIABLE_FIELDS) {
+        xml->indent_level++;
         XML_INDENT();
         av_bprint_escape(&buf, key, NULL,
                          AV_ESCAPE_MODE_XML, AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
@@ -1873,6 +1878,7 @@ static void xml_print_str(WriterContext *wctx, const char *key, const char *valu
         av_bprint_escape(&buf, value, NULL,
                          AV_ESCAPE_MODE_XML, AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
         writer_printf(wctx, " value=\"%s\"/>\n", buf.str);
+        xml->indent_level--;
     } else {
         if (wctx->nb_item[wctx->level])
             writer_w8(wctx, ' ');
@@ -1887,9 +1893,26 @@ static void xml_print_str(WriterContext *wctx, const char *key, const char *valu
 
 static void xml_print_int(WriterContext *wctx, const char *key, long long int value)
 {
-    if (wctx->nb_item[wctx->level])
-        writer_w8(wctx, ' ');
-    writer_printf(wctx, "%s=\"%lld\"", key, value);
+    XMLContext *xml = wctx->priv;
+    const struct section *section = wctx->section[wctx->level];
+
+    if (section->flags & SECTION_FLAG_HAS_VARIABLE_FIELDS) {
+        AVBPrint buf;
+        av_bprint_init(&buf, 1, AV_BPRINT_SIZE_UNLIMITED);
+
+        xml->indent_level++;
+        XML_INDENT();
+        av_bprint_escape(&buf, key, NULL,
+                         AV_ESCAPE_MODE_XML, AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
+        writer_printf(wctx, "<%s key=\"%s\"",
+                      section->element_name, buf.str);
+        writer_printf(wctx, " value=\"%lld\"/>\n", value);
+        xml->indent_level--;
+    } else {
+        if (wctx->nb_item[wctx->level])
+            writer_w8(wctx, ' ');
+        writer_printf(wctx, "%s=\"%lld\"", key, value);
+    }
 }
 
 static Writer xml_writer = {
diff --git a/tests/ref/fate/ffprobe_xml b/tests/ref/fate/ffprobe_xml
index 4e893edaa9..a0839db46d 100644
--- a/tests/ref/fate/ffprobe_xml
+++ b/tests/ref/fate/ffprobe_xml
@@ -34,24 +34,32 @@
     <streams>
         <stream index="0" codec_name="pcm_s16le" codec_type="audio" codec_tag_string="PSD[16]" codec_tag="0x10445350" sample_fmt="s16" sample_rate="44100" channels="1" bits_per_sample="16" initial_padding="0" r_frame_rate="0/0" avg_frame_rate="0/0" time_base="1/44100" start_pts="0" start_time="0.000000" bit_rate="705600" nb_read_frames="6" nb_read_packets="6">
             <disposition default="0" dub="0" original="0" comment="0" lyrics="0" karaoke="0" forced="0" hearing_impaired="0" visual_impaired="0" clean_effects="0" attached_pic="0" timed_thumbnails="0" captions="0" descriptions="0" metadata="0" dependent="0" still_image="0"/>
-            <tag key="E" value="mc²"/>
-            <tag key="encoder" value="Lavc pcm_s16le"/>
+            <tags>
+                <tag key="E" value="mc²"/>
+                <tag key="encoder" value="Lavc pcm_s16le"/>
+            </tags>
         </stream>
         <stream index="1" codec_name="rawvideo" codec_type="video" codec_tag_string="RGB[24]" codec_tag="0x18424752" width="320" height="240" coded_width="320" coded_height="240" closed_captions="0" film_grain="0" has_b_frames="0" sample_aspect_ratio="1:1" display_aspect_ratio="4:3" pix_fmt="rgb24" level="-99" refs="1" r_frame_rate="25/1" avg_frame_rate="25/1" time_base="1/51200" start_pts="0" start_time="0.000000" nb_read_frames="4" nb_read_packets="4">
             <disposition default="1" dub="0" original="0" comment="0" lyrics="0" karaoke="0" forced="0" hearing_impaired="0" visual_impaired="0" clean_effects="0" attached_pic="0" timed_thumbnails="0" captions="0" descriptions="0" metadata="0" dependent="0" still_image="0"/>
-            <tag key="title" value="foobar"/>
-            <tag key="duration_ts" value="field-and-tags-conflict-attempt"/>
-            <tag key="encoder" value="Lavc rawvideo"/>
+            <tags>
+                <tag key="title" value="foobar"/>
+                <tag key="duration_ts" value="field-and-tags-conflict-attempt"/>
+                <tag key="encoder" value="Lavc rawvideo"/>
+            </tags>
         </stream>
         <stream index="2" codec_name="rawvideo" codec_type="video" codec_tag_string="RGB[24]" codec_tag="0x18424752" width="100" height="100" coded_width="100" coded_height="100" closed_captions="0" film_grain="0" has_b_frames="0" sample_aspect_ratio="1:1" display_aspect_ratio="1:1" pix_fmt="rgb24" level="-99" refs="1" r_frame_rate="25/1" avg_frame_rate="25/1" time_base="1/51200" start_pts="0" start_time="0.000000" nb_read_frames="4" nb_read_packets="4">
             <disposition default="0" dub="0" original="0" comment="0" lyrics="0" karaoke="0" forced="0" hearing_impaired="0" visual_impaired="0" clean_effects="0" attached_pic="0" timed_thumbnails="0" captions="0" descriptions="0" metadata="0" dependent="0" still_image="0"/>
-            <tag key="encoder" value="Lavc rawvideo"/>
+            <tags>
+                <tag key="encoder" value="Lavc rawvideo"/>
+            </tags>
         </stream>
     </streams>
 
     <format filename="tests/data/ffprobe-test.nut" nb_streams="3" nb_programs="0" format_name="nut" start_time="0.000000" duration="0.120000" size="1053646" bit_rate="70243066" probe_score="100">
-        <tag key="title" value="ffprobe test file"/>
-        <tag key="comment" value="'A comment with CSV, XML & JSON special chars': <tag value="x">"/>
-        <tag key="comment2" value="I ♥ Üñîçød€"/>
+      <tags>
+          <tag key="title" value="ffprobe test file"/>
+          <tag key="comment" value="'A comment with CSV, XML & JSON special chars': <tag value="x">"/>
+          <tag key="comment2" value="I ♥ Üñîçød€"/>
+      </tags>
     </format>
 </ffprobe>
-- 
2.34.1



More information about the ffmpeg-devel mailing list