[FFmpeg-devel] [PATCH v7 09/12] swscale: introduce new, dynamic scaling API
Niklas Haas
ffmpeg at haasn.xyz
Sat Nov 16 13:25:04 EET 2024
From: Niklas Haas <git at haasn.dev>
As part of a larger, ongoing effort to modernize and partially rewrite
libswscale, it was decided and generally agreed upon to introduce a new
public API for libswscale. This API is designed to be less stateful, more
explicitly defined, and considerably easier to use than the existing one.
Most of the API work has been already accomplished in the previous commits,
this commit merely introduces the ability to use sws_scale_frame()
dynamically, without prior sws_init_context() calls. Instead, the new API
takes frame properties from the frames themselves, and the implementation is
based on the new SwsGraph API, which we simply reinitialize as needed.
This high-level wrapper also recreates the logic that used to live inside
vf_scale for scaling interlaced frames, enabling it to be reused more easily
by end users.
Finally, this function is designed to simply copy refs directly when nothing
needs to be done, substantially improving throughput of the noop fast path.
Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git at haasn.dev>
---
libswscale/graph.c | 23 +++-
libswscale/swscale.c | 196 ++++++++++++++++++++++++++++++++--
libswscale/swscale.h | 89 +++++++++++----
libswscale/swscale_internal.h | 7 +-
libswscale/utils.c | 4 +
libswscale/x86/output.asm | 2 +-
6 files changed, 291 insertions(+), 30 deletions(-)
diff --git a/libswscale/graph.c b/libswscale/graph.c
index ec1015653e..a680b45280 100644
--- a/libswscale/graph.c
+++ b/libswscale/graph.c
@@ -265,6 +265,21 @@ static void get_chroma_pos(SwsGraph *graph, int *h_chr_pos, int *v_chr_pos,
*v_chr_pos = sub_y ? y_pos : -513;
}
+static void legacy_chr_pos(SwsGraph *graph, int *chr_pos, int override, int *warned)
+{
+ if (override == -513 || override == *chr_pos)
+ return;
+
+ if (!*warned) {
+ av_log(NULL, AV_LOG_WARNING,
+ "Setting chroma position directly is deprecated, make sure "
+ "the frame is tagged with the correct chroma location.\n");
+ *warned = 1;
+ }
+
+ *chr_pos = override;
+}
+
static int init_pass(SwsGraph *graph, SwsContext *sws,
SwsImg input, SwsImg output)
{
@@ -409,7 +424,7 @@ static int init_passes(SwsGraph *graph)
const SwsFormat *const src = &graph->src;
const SwsFormat *const dst = &graph->dst;
SwsContext *sws;
- int ret;
+ int ret, warned = 0;
const SwsImg input = { .data = { &sws_input_sentinel }, .fmt = src->format };
const SwsImg output = { .data = { &sws_output_sentinel }, .fmt = dst->format };
@@ -447,6 +462,12 @@ static int init_passes(SwsGraph *graph)
get_chroma_pos(graph, &sws->src_h_chr_pos, &sws->src_v_chr_pos, src);
get_chroma_pos(graph, &sws->dst_h_chr_pos, &sws->dst_v_chr_pos, dst);
+ /* Allow overriding chroma position with the legacy API */
+ legacy_chr_pos(graph, &sws->src_h_chr_pos, ctx->src_h_chr_pos, &warned);
+ legacy_chr_pos(graph, &sws->src_v_chr_pos, ctx->src_v_chr_pos, &warned);
+ legacy_chr_pos(graph, &sws->dst_h_chr_pos, ctx->dst_h_chr_pos, &warned);
+ legacy_chr_pos(graph, &sws->dst_v_chr_pos, ctx->dst_v_chr_pos, &warned);
+
ret = sws_init_context(sws, NULL, NULL);
if (ret < 0)
return ret;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 45172dcea4..d3dac44d04 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1219,21 +1219,205 @@ int sws_receive_slice(SwsContext *sws, unsigned int slice_start,
dst, c->frame_dst->linesize, slice_start, slice_height);
}
+static void get_frame_pointers(const AVFrame *frame, uint8_t *data[4],
+ int linesize[4], int field)
+{
+ for (int i = 0; i < 4; i++) {
+ data[i] = frame->data[i];
+ linesize[i] = frame->linesize[i];
+ }
+
+ if (!(frame->flags & AV_FRAME_FLAG_INTERLACED)) {
+ av_assert1(!field);
+ return;
+ }
+
+ if (field == FIELD_BOTTOM) {
+ /* Odd rows, offset by one line */
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+ for (int i = 0; i < 4; i++) {
+ data[i] += linesize[i];
+ if (desc->flags & AV_PIX_FMT_FLAG_PAL)
+ break;
+ }
+ }
+
+ /* Take only every second line */
+ for (int i = 0; i < 4; i++)
+ linesize[i] <<= 1;
+}
+
+/* Subset of av_frame_ref() that only references (video) data buffers */
+static int frame_ref(AVFrame *dst, const AVFrame *src)
+{
+ /* ref the buffers */
+ for (int i = 0; i < FF_ARRAY_ELEMS(src->buf); i++) {
+ if (!src->buf[i])
+ continue;
+ dst->buf[i] = av_buffer_ref(src->buf[i]);
+ if (!dst->buf[i])
+ return AVERROR(ENOMEM);
+ }
+
+ memcpy(dst->data, src->data, sizeof(src->data));
+ memcpy(dst->linesize, src->linesize, sizeof(src->linesize));
+ return 0;
+}
+
int sws_scale_frame(SwsContext *sws, AVFrame *dst, const AVFrame *src)
{
int ret;
+ SwsInternal *c = sws_internal(sws);
+ if (!src || !dst)
+ return AVERROR(EINVAL);
+
+ if (c->frame_src) {
+ /* Context has been initialized with explicit values, fall back to
+ * legacy API */
+ ret = sws_frame_start(sws, dst, src);
+ if (ret < 0)
+ return ret;
+
+ ret = sws_send_slice(sws, 0, src->height);
+ if (ret >= 0)
+ ret = sws_receive_slice(sws, 0, dst->height);
- ret = sws_frame_start(sws, dst, src);
+ sws_frame_end(sws);
+
+ return ret;
+ }
+
+ ret = sws_frame_setup(sws, dst, src);
if (ret < 0)
return ret;
- ret = sws_send_slice(sws, 0, src->height);
- if (ret >= 0)
- ret = sws_receive_slice(sws, 0, dst->height);
+ if (!src->data[0])
+ return 0;
- sws_frame_end(sws);
+ if (c->graph[FIELD_TOP]->noop &&
+ (!c->graph[FIELD_BOTTOM] || c->graph[FIELD_BOTTOM]->noop) &&
+ src->buf[0] && !dst->buf[0] && !dst->data[0])
+ {
+ /* Lightweight refcopy */
+ ret = frame_ref(dst, src);
+ if (ret < 0)
+ return ret;
+ } else {
+ if (!dst->data[0]) {
+ ret = av_frame_get_buffer(dst, 0);
+ if (ret < 0)
+ return ret;
+ }
- return ret;
+ for (int field = 0; field < 2; field++) {
+ SwsGraph *graph = c->graph[field];
+ uint8_t *dst_data[4], *src_data[4];
+ int dst_linesize[4], src_linesize[4];
+ get_frame_pointers(dst, dst_data, dst_linesize, field);
+ get_frame_pointers(src, src_data, src_linesize, field);
+ sws_graph_run(graph, dst_data, dst_linesize,
+ (const uint8_t **) src_data, src_linesize);
+ if (!graph->dst.interlaced)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int validate_params(SwsContext *ctx)
+{
+#define VALIDATE(field, min, max) \
+ if (ctx->field < min || ctx->field > max) { \
+ av_log(ctx, AV_LOG_ERROR, "'%s' (%d) out of range [%d, %d]\n", \
+ #field, (int) ctx->field, min, max); \
+ return AVERROR(EINVAL); \
+ }
+
+ VALIDATE(threads, 0, 8192);
+ VALIDATE(dither, 0, SWS_DITHER_NB - 1)
+ VALIDATE(alpha_blend, 0, SWS_ALPHA_BLEND_NB - 1)
+ return 0;
+}
+
+int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src)
+{
+ SwsInternal *s = sws_internal(ctx);
+ const char *err_msg;
+ int ret;
+
+ if (!src || !dst)
+ return AVERROR(EINVAL);
+ if ((ret = validate_params(ctx)) < 0)
+ return ret;
+
+ for (int field = 0; field < 2; field++) {
+ SwsFormat src_fmt = ff_fmt_from_frame(src, field);
+ SwsFormat dst_fmt = ff_fmt_from_frame(dst, field);
+
+ if ((src->flags ^ dst->flags) & AV_FRAME_FLAG_INTERLACED) {
+ err_msg = "Cannot convert interlaced to progressive frames or vice versa.\n";
+ ret = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ /* TODO: remove once implemented */
+ if ((dst_fmt.prim != src_fmt.prim || dst_fmt.trc != src_fmt.trc) &&
+ !s->color_conversion_warned)
+ {
+ av_log(ctx, AV_LOG_WARNING, "Conversions between different primaries / "
+ "transfer functions are not currently implemented, expect "
+ "wrong results.\n");
+ s->color_conversion_warned = 1;
+ }
+
+ if (!ff_test_fmt(&src_fmt, 0)) {
+ err_msg = "Unsupported input";
+ ret = AVERROR(ENOTSUP);
+ goto fail;
+ }
+
+ if (!ff_test_fmt(&dst_fmt, 1)) {
+ err_msg = "Unsupported output";
+ ret = AVERROR(ENOTSUP);
+ goto fail;
+ }
+
+ ret = sws_graph_reinit(ctx, &dst_fmt, &src_fmt, field, &s->graph[field]);
+ if (ret < 0) {
+ err_msg = "Failed initializing scaling graph";
+ goto fail;
+ }
+
+ if (s->graph[field]->incomplete && ctx->flags & SWS_STRICT) {
+ err_msg = "Incomplete scaling graph";
+ ret = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ if (!src_fmt.interlaced) {
+ sws_graph_free(&s->graph[FIELD_BOTTOM]);
+ break;
+ }
+
+ continue;
+
+ fail:
+ av_log(ctx, AV_LOG_ERROR, "%s (%s): fmt:%s csp:%s prim:%s trc:%s ->"
+ " fmt:%s csp:%s prim:%s trc:%s\n",
+ err_msg, av_err2str(ret),
+ av_get_pix_fmt_name(src_fmt.format), av_color_space_name(src_fmt.csp),
+ av_color_primaries_name(src_fmt.prim), av_color_transfer_name(src_fmt.trc),
+ av_get_pix_fmt_name(dst_fmt.format), av_color_space_name(dst_fmt.csp),
+ av_color_primaries_name(dst_fmt.prim), av_color_transfer_name(dst_fmt.trc));
+
+ for (int i = 0; i < FF_ARRAY_ELEMS(s->graph); i++)
+ sws_graph_free(&s->graph[i]);
+
+ return ret;
+ }
+
+ return 0;
}
/**
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 3996411dc8..fa3a0f01ab 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -107,6 +107,12 @@ typedef enum SwsFlags {
SWS_LANCZOS = 1 << 9, ///< 3-tap sinc/sinc
SWS_SPLINE = 1 << 10, ///< cubic Keys spline
+ /**
+ * Return an error on underspecified conversions. Without this flag,
+ * unspecified fields are defaulted to sensible values.
+ */
+ SWS_STRICT = 1 << 11,
+
/**
* Emit verbose log of scaling parameters.
*/
@@ -204,7 +210,10 @@ typedef struct SwsContext {
int gamma_flag;
/**
- * Frame property overrides.
+ * Deprecated frame property overrides, for the legacy API only.
+ *
+ * Ignored by sws_scale_frame() when used in dynamic mode, in which
+ * case all properties are instead taken from the frame directly.
*/
int src_w, src_h; ///< Width and height of the source frame
int dst_w, dst_h; ///< Width and height of the destination frame
@@ -216,6 +225,8 @@ typedef struct SwsContext {
int src_h_chr_pos; ///< Source horizontal chroma position
int dst_v_chr_pos; ///< Destination vertical chroma position
int dst_h_chr_pos; ///< Destination horizontal chroma position
+
+ /* Remember to add new fields to graph.c:opts_equal() */
} SwsContext;
/**
@@ -284,12 +295,57 @@ int sws_test_transfer(enum AVColorTransferCharacteristic trc, int output);
*/
int sws_test_frame(const AVFrame *frame, int output);
+/**
+ * Like `sws_scale_frame`, but without actually scaling. It will instead
+ * merely initialize internal state that *would* be required to perform the
+ * operation, as well as returning the correct error code for unsupported
+ * frame combinations.
+ *
+ * @param ctx The scaling context.
+ * @param dst The destination frame to consider.
+ * @param src The source frame to consider.
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src);
+
+/********************
+ * Main scaling API *
+ ********************/
+
/**
* Check if a given conversion is a noop. Returns a positive integer if
* no operation needs to be performed, 0 otherwise.
*/
int sws_is_noop(const AVFrame *dst, const AVFrame *src);
+/**
+ * Scale source data from `src` and write the output to `dst`.
+ *
+ * This function can be used directly on an allocated context, without setting
+ * up any frame properties or calling `sws_init_context()`. Such usage is fully
+ * dynamic and does not require reallocation if the frame properties change.
+ *
+ * Alternatively, this function can be called on a context that has been
+ * explicitly initialized. However, this is provided only for backwards
+ * compatibility. In this usage mode, all frame properties must be correctly
+ * set at init time, and may no longer change after initialization.
+ *
+ * @param ctx The scaling context.
+ * @param dst The destination frame. The data buffers may either be already
+ * allocated by the caller or left clear, in which case they will
+ * be allocated by the scaler. The latter may have performance
+ * advantages - e.g. in certain cases some (or all) output planes
+ * may be references to input planes, rather than copies.
+ * @param src The source frame. If the data buffers are set to NULL, then
+ * this function behaves identically to `sws_frame_setup`.
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/*************************
+ * Legacy (stateful) API *
+ *************************/
+
#define SWS_SRC_V_CHR_DROP_MASK 0x30000
#define SWS_SRC_V_CHR_DROP_SHIFT 16
@@ -352,6 +408,11 @@ int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt);
/**
* Initialize the swscaler context sws_context.
*
+ * This function is considered deprecated, and provided only for backwards
+ * compatibility with sws_scale() and sws_start_frame(). The preferred way to
+ * use libswscale is to set all frame properties correctly and call
+ * sws_scale_frame() directly, without explicitly initializing the context.
+ *
* @return zero or positive value on success, a negative value on
* error
*/
@@ -393,7 +454,8 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
/**
* Scale the image slice in srcSlice and put the resulting scaled
* slice in the image in dst. A slice is a sequence of consecutive
- * rows in an image.
+ * rows in an image. Requires a context that has been previously
+ * been initialized with sws_init_context().
*
* Slices have to be provided in sequential order, either in
* top-bottom or bottom-top order. If slices are provided in
@@ -420,27 +482,11 @@ int sws_scale(SwsContext *c, const uint8_t *const srcSlice[],
const int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *const dst[], const int dstStride[]);
-/**
- * Scale source data from src and write the output to dst.
- *
- * This is merely a convenience wrapper around
- * - sws_frame_start()
- * - sws_send_slice(0, src->height)
- * - sws_receive_slice(0, dst->height)
- * - sws_frame_end()
- *
- * @param c The scaling context
- * @param dst The destination frame. See documentation for sws_frame_start() for
- * more details.
- * @param src The source frame.
- *
- * @return 0 on success, a negative AVERROR code on failure
- */
-int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src);
-
/**
* Initialize the scaling process for a given pair of source/destination frames.
* Must be called before any calls to sws_send_slice() and sws_receive_slice().
+ * Requires a context that has been previously been initialized with
+ * sws_init_context().
*
* This function will retain references to src and dst, so they must both use
* refcounted buffers (if allocated by the caller, in case of dst).
@@ -511,7 +557,8 @@ int sws_receive_slice(SwsContext *c, unsigned int slice_start,
unsigned int slice_height);
/**
- * Get the alignment required for slices
+ * Get the alignment required for slices. Requires a context that has been
+ * previously been initialized with sws_init_context().
*
* @param c The scaling context
* @return alignment required for output slices requested with sws_receive_slice().
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 7c9517975b..80487e5275 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -26,6 +26,7 @@
#include "config.h"
#include "swscale.h"
+#include "graph.h"
#include "libavutil/avassert.h"
#include "libavutil/common.h"
@@ -323,6 +324,9 @@ struct SwsInternal {
int *slice_err;
int nb_slice_ctx;
+ /* Scaling graph, reinitialized dynamically as needed. */
+ SwsGraph *graph[2]; /* top, bottom fields */
+
// values passed to current sws_receive_slice() call
int dst_slice_start;
int dst_slice_height;
@@ -663,6 +667,7 @@ struct SwsInternal {
unsigned int dst_slice_align;
atomic_int stride_unaligned_warned;
atomic_int data_unaligned_warned;
+ int color_conversion_warned;
Half2FloatTables *h2f_tables;
};
@@ -674,7 +679,7 @@ static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInt
#if ARCH_X86_64
/* x86 yuv2gbrp uses the SwsInternal for yuv coefficients
if struct offsets change the asm needs to be updated too */
-static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40316,
+static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40332,
"yuv2rgb_y_offset must be updated in x86 asm");
#endif
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 1b6f54fc30..628a3f1091 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -61,6 +61,7 @@
#include "swscale.h"
#include "swscale_internal.h"
#include "utils.h"
+#include "graph.h"
typedef struct FormatEntry {
uint8_t is_supported_in :1;
@@ -2450,6 +2451,9 @@ void sws_freeContext(SwsContext *sws)
if (!c)
return;
+ for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
+ sws_graph_free(&c->graph[i]);
+
for (i = 0; i < c->nb_slice_ctx; i++)
sws_freeContext(c->slice_ctx[i]);
av_freep(&c->slice_ctx);
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index dec1d27f9a..7a1e5d9bc1 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -582,7 +582,7 @@ yuv2nv12cX_fn yuv2nv21
%if ARCH_X86_64
struc SwsInternal
- .padding: resb 40316 ; offsetof(SwsInternal, yuv2rgb_y_offset)
+ .padding: resb 40332 ; offsetof(SwsInternal, yuv2rgb_y_offset)
.yuv2rgb_y_offset: resd 1
.yuv2rgb_y_coeff: resd 1
.yuv2rgb_v2r_coeff: resd 1
--
2.47.0
More information about the ffmpeg-devel
mailing list