[FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions
m.kaindl0208 at gmail.com
m.kaindl0208 at gmail.com
Sat Mar 8 16:58:04 EET 2025
Those functions will be used by classify in the upcoming patches.
Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.
Any Feedback is appreciated!
Signed-off-by: MaximilianKaindl <m.kaindl0208 at gmail.com>
---
libavutil/detection_bbox.c | 54 ++++++++++++++++++++++++++++++++++++++
libavutil/detection_bbox.h | 31 ++++++++++++++++++++++
2 files changed, 85 insertions(+)
diff --git a/libavutil/detection_bbox.c b/libavutil/detection_bbox.c index cb157b355b..378233121d 100644
--- a/libavutil/detection_bbox.c
+++ b/libavutil/detection_bbox.c
@@ -18,6 +18,7 @@
#include "detection_bbox.h"
#include "mem.h"
+#include "libavutil/avstring.h"
AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size) { @@ -71,3 +72,56 @@ AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32
return header;
}
+
+int av_detection_bbox_fill_with_best_labels(char **labels, float
+*probabilities, int num_labels, AVDetectionBBox *bbox, int max_classes_per_box, float confidence_threshold) {
+ int i, j, minpos, ret;
+ float min;
+
+ if (!labels || !probabilities || !bbox) {
+ return AVERROR(EINVAL);
+ }
+
+ for (i = 0; i < num_labels; i++) {
+ if (probabilities[i] >= confidence_threshold) {
+ if (bbox->classify_count >= max_classes_per_box) {
+ // Find lowest probability classification
+ min = av_q2d(bbox->classify_confidences[0]);
+ minpos = 0;
+ for (j = 1; j < bbox->classify_count; j++) {
+ float prob = av_q2d(bbox->classify_confidences[j]);
+ if (prob < min) {
+ min = prob;
+ minpos = j;
+ }
+ }
+
+ if (probabilities[i] > min) {
+ ret = av_detection_bbox_set_content(bbox, labels[i], minpos, probabilities[i]);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ ret = av_detection_bbox_set_content(bbox, labels[i], bbox->classify_count, probabilities[i]);
+ if (ret < 0)
+ return ret;
+ bbox->classify_count++;
+ }
+ }
+ }
+ return 0;
+}
+
+int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
+int index, float probability) {
+ // Set probability
+ bbox->classify_confidences[index] = av_make_q((int)(probability *
+10000), 10000);
+
+ // Copy label with size checking
+ if (av_strlcpy(bbox->classify_labels[index], label, AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) >=
+ AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
+ av_log(NULL, AV_LOG_WARNING, "Label truncated in set_prob_and_label_of_bbox\n");
+ }
+
+ return 0;
+}
diff --git a/libavutil/detection_bbox.h b/libavutil/detection_bbox.h index 011988052c..27d749ad59 100644
--- a/libavutil/detection_bbox.h
+++ b/libavutil/detection_bbox.h
@@ -105,4 +105,35 @@ AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_s
* AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables.
*/
AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes);
+
+/**
+ * Fills an AVDetectionBBox structure with the best labels based on probabilities.
+ *
+ * This function selects up to max_classes_per_box labels with the
+highest probabilities
+ * that exceed the given confidence threshold, and assigns them to the bounding box.
+ *
+ * @param labels Array of label strings
+ * @param probabilities Array of probability values corresponding to
+each label
+ * @param num_labels Number of elements in the labels and probabilities
+arrays
+ * @param bbox Pointer to the AVDetectionBBox structure to be filled
+ * @param max_classes_per_box Maximum number of classes to assign to
+the bounding box
+ * @param confidence_threshold Minimum probability value required for a
+label to be considered
+ * @return 0 on success, negative error code on failure */ int
+av_detection_bbox_fill_with_best_labels(char **labels, float
+*probabilities, int num_labels, AVDetectionBBox *bbox, int
+max_classes_per_box, float confidence_threshold);
+
+/**
+ * Sets the content of an AVDetectionBBox at the specified index.
+ *
+ * This function assigns a label and its associated probability to the
+specified index
+ * in the bounding box's internal storage.
+ *
+ * @param bbox Pointer to the AVDetectionBBox structure to modify
+ * @param label The class label to assign (will be copied internally)
+ * @param index The index at which to store the label and probability
+ * @param probability The confidence score/probability for this label
+ * @return 0 on success
+ */
+int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
+int index, float probability);
+
#endif
--
2.34.1
More information about the ffmpeg-devel
mailing list