[FFmpeg-devel] [PATCH] sws: add a new scaling API
Anton Khirnov
anton at khirnov.net
Sun Aug 29 19:38:58 EEST 2021
---
Now with a new public function to query required slice alignment, which
fixes the yuv410p->yuv420p conversion issue reported by Michael.
---
libswscale/swscale.c | 294 ++++++++++++++++++++++++++--------
libswscale/swscale.h | 90 +++++++++++
libswscale/swscale_internal.h | 21 +++
libswscale/swscale_unscaled.c | 2 +
libswscale/utils.c | 72 +++++++++
5 files changed, 416 insertions(+), 63 deletions(-)
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 61dfcb4dff..ca5c612b18 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -236,13 +236,16 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
static int swscale(SwsContext *c, const uint8_t *src[],
- int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t *dst[], int dstStride[])
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[],
+ int dstSliceY, int dstSliceH)
{
+ const int scale_dst = dstSliceY > 0 || dstSliceH < c->dstH;
+
/* load a few things into local vars to make the code more readable?
* and faster */
const int dstW = c->dstW;
- const int dstH = c->dstH;
+ int dstH = c->dstH;
const enum AVPixelFormat dstFormat = c->dstFormat;
const int flags = c->flags;
@@ -331,10 +334,15 @@ static int swscale(SwsContext *c, const uint8_t *src[],
}
}
- /* Note the user might start scaling the picture in the middle so this
- * will not get executed. This is not really intended but works
- * currently, so people might do it. */
- if (srcSliceY == 0) {
+ if (scale_dst) {
+ dstY = dstSliceY;
+ dstH = dstY + dstSliceH;
+ lastInLumBuf = -1;
+ lastInChrBuf = -1;
+ } else if (srcSliceY == 0) {
+ /* Note the user might start scaling the picture in the middle so this
+ * will not get executed. This is not really intended but works
+ * currently, so people might do it. */
dstY = 0;
lastInLumBuf = -1;
lastInChrBuf = -1;
@@ -352,8 +360,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
- dstY, dstH, dstY >> c->chrDstVSubSample,
- AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
+ dstY, dstSliceH, dstY >> c->chrDstVSubSample,
+ AV_CEIL_RSHIFT(dstSliceH, c->chrDstVSubSample), scale_dst);
if (srcSliceY == 0) {
hout_slice->plane[0].sliceY = lastInLumBuf + 1;
hout_slice->plane[1].sliceY = lastInChrBuf + 1;
@@ -373,7 +381,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
// First line needed as input
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
- const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
+ const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), c->dstH - 1)]);
// First line needed as input
const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
@@ -477,7 +485,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
c->lumDither8 = ff_dither_8x8_128[dstY & 7];
}
- if (dstY >= dstH - 2) {
+ if (dstY >= c->dstH - 2) {
/* hmm looks like we can't use MMX here without overwriting
* this array's tail */
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
@@ -491,21 +499,22 @@ static int swscale(SwsContext *c, const uint8_t *src[],
desc[i].process(c, &desc[i], dstY, 1);
}
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !needAlpha) {
+ int offset = lastDstY - dstSliceY;
int length = dstW;
int height = dstY - lastDstY;
if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
- fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
+ fillPlane16(dst[3], dstStride[3], length, height, offset,
1, desc->comp[3].depth,
isBE(dstFormat));
} else if (is32BPS(dstFormat)) {
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
- fillPlane32(dst[3], dstStride[3], length, height, lastDstY,
+ fillPlane32(dst[3], dstStride[3], length, height, offset,
1, desc->comp[3].depth,
isBE(dstFormat), desc->flags & AV_PIX_FMT_FLAG_FLOAT);
} else
- fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
+ fillPlane(dst[3], dstStride[3], length, height, offset, 255);
}
#if HAVE_MMXEXT_INLINE
@@ -809,33 +818,42 @@ static void update_palette(SwsContext *c, const uint32_t *pal)
}
}
+static int scale_internal(SwsContext *c,
+ const uint8_t * const srcSlice[], const int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *const dstSlice[], const int dstStride[],
+ int dstSliceY, int dstSliceH);
+
static int scale_gamma(SwsContext *c,
const uint8_t * const srcSlice[], const int srcStride[],
int srcSliceY, int srcSliceH,
- uint8_t * const dst[], const int dstStride[])
+ uint8_t * const dstSlice[], const int dstStride[],
+ int dstSliceY, int dstSliceH)
{
- int ret = sws_scale(c->cascaded_context[0],
- srcSlice, srcStride, srcSliceY, srcSliceH,
- c->cascaded_tmp, c->cascaded_tmpStride);
+ int ret = scale_internal(c->cascaded_context[0],
+ srcSlice, srcStride, srcSliceY, srcSliceH,
+ c->cascaded_tmp, c->cascaded_tmpStride, 0, c->srcH);
if (ret < 0)
return ret;
if (c->cascaded_context[2])
- ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
- c->cascaded_tmpStride, srcSliceY, srcSliceH, c->cascaded1_tmp,
- c->cascaded1_tmpStride);
+ ret = scale_internal(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
+ c->cascaded_tmpStride, srcSliceY, srcSliceH,
+ c->cascaded1_tmp, c->cascaded1_tmpStride, 0, c->dstH);
else
- ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
- c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride);
+ ret = scale_internal(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
+ c->cascaded_tmpStride, srcSliceY, srcSliceH,
+ dstSlice, dstStride, dstSliceY, dstSliceH);
if (ret < 0)
return ret;
if (c->cascaded_context[2]) {
- ret = sws_scale(c->cascaded_context[2], (const uint8_t * const *)c->cascaded1_tmp,
- c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret,
- c->cascaded_context[1]->dstY, dst, dstStride);
+ ret = scale_internal(c->cascaded_context[2], (const uint8_t * const *)c->cascaded1_tmp,
+ c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret,
+ c->cascaded_context[1]->dstY,
+ dstSlice, dstStride, dstSliceY, dstSliceH);
}
return ret;
}
@@ -843,56 +861,64 @@ static int scale_gamma(SwsContext *c,
static int scale_cascaded(SwsContext *c,
const uint8_t * const srcSlice[], const int srcStride[],
int srcSliceY, int srcSliceH,
- uint8_t * const dst[], const int dstStride[])
+ uint8_t * const dstSlice[], const int dstStride[],
+ int dstSliceY, int dstSliceH)
{
- int ret = sws_scale(c->cascaded_context[0],
- srcSlice, srcStride, srcSliceY, srcSliceH,
- c->cascaded_tmp, c->cascaded_tmpStride);
+ int ret = scale_internal(c->cascaded_context[0],
+ srcSlice, srcStride, srcSliceY, srcSliceH,
+ c->cascaded_tmp, c->cascaded_tmpStride,
+ 0, c->cascaded_context[0]->dstH);
if (ret < 0)
return ret;
- ret = sws_scale(c->cascaded_context[1],
- (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride,
- 0, c->cascaded_context[0]->dstH, dst, dstStride);
+ ret = scale_internal(c->cascaded_context[1],
+ (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride,
+ 0, c->cascaded_context[0]->dstH,
+ dstSlice, dstStride, dstSliceY, dstSliceH);
return ret;
}
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int attribute_align_arg sws_scale(struct SwsContext *c,
- const uint8_t * const srcSlice[],
- const int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t *const dst[],
- const int dstStride[])
+static int scale_internal(SwsContext *c,
+ const uint8_t * const srcSlice[], const int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *const dstSlice[], const int dstStride[],
+ int dstSliceY, int dstSliceH)
{
- const int frame_start = !c->sliceDir;
+ const int scale_dst = dstSliceY > 0 || dstSliceH < c->dstH;
+ const int frame_start = scale_dst || !c->sliceDir;
int i, ret;
const uint8_t *src2[4];
uint8_t *dst2[4];
- int macro_height = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
+ int macro_height_src = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
+ int macro_height_dst = isBayer(c->dstFormat) ? 2 : (1 << c->chrDstVSubSample);
// copy strides, so they can safely be modified
int srcStride2[4];
int dstStride2[4];
int srcSliceY_internal = srcSliceY;
- if (!srcStride || !dstStride || !dst || !srcSlice) {
+ if (!srcStride || !dstStride || !dstSlice || !srcSlice) {
av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
return AVERROR(EINVAL);
}
- if ((srcSliceY & (macro_height-1)) ||
- ((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH != c->srcH) ||
+ if ((srcSliceY & (macro_height_src - 1)) ||
+ ((srcSliceH & (macro_height_src - 1)) && srcSliceY + srcSliceH != c->srcH) ||
srcSliceY + srcSliceH > c->srcH) {
av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", srcSliceY, srcSliceH);
return AVERROR(EINVAL);
}
+ if ((dstSliceY & (macro_height_dst - 1)) ||
+ ((dstSliceH & (macro_height_dst - 1)) && dstSliceY + dstSliceH != c->dstH) ||
+ dstSliceY + dstSliceH > c->dstH) {
+ av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", dstSliceY, dstSliceH);
+ return AVERROR(EINVAL);
+ }
+
if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
return AVERROR(EINVAL);
}
- if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
+ if (!check_image_pointers((const uint8_t* const*)dstSlice, c->dstFormat, dstStride)) {
av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
return AVERROR(EINVAL);
}
@@ -902,10 +928,12 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
return 0;
if (c->gamma_flag && c->cascaded_context[0])
- return scale_gamma(c, srcSlice, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+ return scale_gamma(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+ dstSlice, dstStride, dstSliceY, dstSliceH);
if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH)
- return scale_cascaded(c, srcSlice, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+ return scale_cascaded(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+ dstSlice, dstStride, dstSliceY, dstSliceH);
if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
for (i = 0; i < 4; i++)
@@ -915,18 +943,19 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
update_palette(c, (const uint32_t *)srcSlice[1]);
memcpy(src2, srcSlice, sizeof(src2));
- memcpy(dst2, dst, sizeof(dst2));
+ memcpy(dst2, dstSlice, sizeof(dst2));
memcpy(srcStride2, srcStride, sizeof(srcStride2));
memcpy(dstStride2, dstStride, sizeof(dstStride2));
- if (frame_start) {
+ if (frame_start && !scale_dst) {
if (srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
return AVERROR(EINVAL);
}
c->sliceDir = (srcSliceY == 0) ? 1 : -1;
- }
+ } else if (scale_dst)
+ c->sliceDir = 1;
if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
uint8_t *base;
@@ -985,26 +1014,165 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
reset_ptr(src2, c->srcFormat);
reset_ptr((void*)dst2, c->dstFormat);
- if (c->convert_unscaled)
- ret = c->convert_unscaled(c, src2, srcStride2, srcSliceY_internal, srcSliceH,
+ if (c->convert_unscaled) {
+ int offset = srcSliceY_internal;
+ int slice_h = srcSliceH;
+
+ // for dst slice scaling, offset the src pointers to match the dst slice
+ if (scale_dst) {
+ av_assert0(offset == 0);
+ for (i = 0; i < 4 && src2[i]; i++) {
+ if (!src2[i] || (i > 0 && usePal(c->srcFormat)))
+ break;
+ src2[i] += (dstSliceY >> ((i == 1 || i == 2) ? c->chrSrcVSubSample : 0)) * srcStride2[i];
+ }
+ offset = 0;
+ slice_h = dstSliceH;
+ }
+
+ ret = c->convert_unscaled(c, src2, srcStride2, offset, slice_h,
dst2, dstStride2);
- else
- ret = swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH, dst2, dstStride2);
+ } else {
+ ret = swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH,
+ dst2, dstStride2, dstSliceY, dstSliceH);
+ }
if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
- int dstY = c->dstY ? c->dstY : srcSliceY + srcSliceH;
- uint16_t *dst16 = (uint16_t*)(dst2[0] + (dstY - ret) * dstStride2[0]);
- av_assert0(dstY >= ret);
- av_assert0(ret >= 0);
- av_assert0(c->dstH >= dstY);
+ uint16_t *dst16;
+
+ if (scale_dst) {
+ dst16 = (uint16_t *)dst2[0];
+ } else {
+ int dstY = c->dstY ? c->dstY : srcSliceY + srcSliceH;
+
+ av_assert0(dstY >= ret);
+ av_assert0(ret >= 0);
+ av_assert0(c->dstH >= dstY);
+ dst16 = (uint16_t*)(dst2[0] + (dstY - ret) * dstStride2[0]);
+ }
/* replace on the same data */
rgb48Toxyz12(c, dst16, dst16, dstStride2[0]/2, ret);
}
/* reset slice direction at end of frame */
- if (srcSliceY_internal + srcSliceH == c->srcH)
+ if ((srcSliceY_internal + srcSliceH == c->srcH) || scale_dst)
c->sliceDir = 0;
return ret;
}
+
+void sws_frame_end(struct SwsContext *c)
+{
+ av_frame_unref(c->frame_src);
+ av_frame_unref(c->frame_dst);
+ c->src_ranges.nb_ranges = 0;
+}
+
+int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src)
+{
+ int ret, allocated = 0;
+
+ ret = av_frame_ref(c->frame_src, src);
+ if (ret < 0)
+ return ret;
+
+ if (!dst->buf[0]) {
+ dst->width = c->dstW;
+ dst->height = c->dstH;
+ dst->format = c->dstFormat;
+
+ ret = av_frame_get_buffer(dst, 0);
+ if (ret < 0)
+ return ret;
+ allocated = 1;
+ }
+
+ ret = av_frame_ref(c->frame_dst, dst);
+ if (ret < 0) {
+ if (allocated)
+ av_frame_unref(dst);
+
+ return ret;
+ }
+
+ return 0;
+}
+
+int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
+ unsigned int slice_height)
+{
+ int ret;
+
+ ret = ff_range_add(&c->src_ranges, slice_start, slice_height);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+unsigned int sws_receive_slice_alignment(const struct SwsContext *c)
+{
+ return c->dst_slice_align;
+}
+
+int sws_receive_slice(struct SwsContext *c, unsigned int slice_start,
+ unsigned int slice_height)
+{
+ unsigned int align = sws_receive_slice_alignment(c);
+ uint8_t *dst[4];
+
+ /* wait until complete input has been received */
+ if (!(c->src_ranges.nb_ranges == 1 &&
+ c->src_ranges.ranges[0].start == 0 &&
+ c->src_ranges.ranges[0].len == c->srcH))
+ return AVERROR(EAGAIN);
+
+ if ((slice_start > 0 || slice_height < c->dstH) &&
+ (slice_start % align || slice_height % align)) {
+ av_log(c, AV_LOG_ERROR,
+ "Incorrectly aligned output: %u/%u not multiples of %u\n",
+ slice_start, slice_height, align);
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < FF_ARRAY_ELEMS(dst) && c->frame_dst->data[i]; i++) {
+ dst[i] = c->frame_dst->data[i] +
+ c->frame_dst->linesize[i] * (slice_start >> c->chrDstVSubSample);
+ }
+
+ return scale_internal(c, (const uint8_t * const *)c->frame_src->data,
+ c->frame_src->linesize, 0, c->srcH,
+ dst, c->frame_dst->linesize, slice_start, slice_height);
+}
+
+int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src)
+{
+ int ret;
+
+ ret = sws_frame_start(c, dst, src);
+ if (ret < 0)
+ return ret;
+
+ ret = sws_send_slice(c, 0, src->height);
+ if (ret >= 0)
+ ret = sws_receive_slice(c, 0, dst->height);
+
+ sws_frame_end(c);
+
+ return ret;
+}
+
+/**
+ * swscale wrapper, so we don't need to export the SwsContext.
+ * Assumes planar YUV to be in YUV order instead of YVU.
+ */
+int attribute_align_arg sws_scale(struct SwsContext *c,
+ const uint8_t * const srcSlice[],
+ const int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t *const dst[],
+ const int dstStride[])
+{
+ return scale_internal(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+ dst, dstStride, 0, c->dstH);
+}
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 50d6d46553..77067e79dc 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -30,6 +30,7 @@
#include <stdint.h>
#include "libavutil/avutil.h"
+#include "libavutil/frame.h"
#include "libavutil/log.h"
#include "libavutil/pixfmt.h"
#include "version.h"
@@ -218,6 +219,95 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
const int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *const dst[], const int dstStride[]);
+/**
+ * Scale source data from src and write the output to dst.
+ *
+ * This is merely a convenience wrapper around
+ * - sws_frame_start()
+ * - sws_send_slice(0, src->height)
+ * - sws_receive_slice(0, dst->height)
+ * - sws_frame_end()
+ *
+ * @param dst The destination frame. See documentation for sws_frame_start() for
+ * more details.
+ * @param src The source frame.
+ *
+ * @return 0 on success, a negative AVERROR code on failure
+ */
+int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/**
+ * Initialize the scaling process for a given pair of source/destination frames.
+ * Must be called before any calls to sws_send_slice() and sws_receive_slice().
+ *
+ * This function will retain references to src and dst, so they must both use
+ * refcounted buffers (if allocated by the caller, in case of dst).
+ *
+ * @param dst The destination frame.
+ *
+ * The data buffers may either be already allocated by the caller or
+ * left clear, in which case they will be allocated by the scaler.
+ * The latter may have performance advantages - e.g. in certain cases
+ * some output planes may be references to input planes, rather than
+ * copies.
+ *
+ * Output data will be written into this frame in successful
+ * sws_receive_slice() calls.
+ * @param src The source frame. The data buffers must be allocated, but the
+ * frame data does not have to be ready at this point. Data
+ * availability is then signalled by sws_send_slice().
+ * @return 0 on success, a negative AVERROR code on failure
+ *
+ * @see sws_frame_end()
+ */
+int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/**
+ * Finish the scaling process for a pair of source/destination frames previously
+ * submitted with sws_frame_start(). Must be called after all sws_send_slice()
+ * and sws_receive_slice() calls are done, before any new sws_frame_start()
+ * calls.
+ */
+void sws_frame_end(struct SwsContext *c);
+
+/**
+ * Indicate that a horizontal slice of input data is available in the source
+ * frame previously provided to sws_frame_start(). The slices may be provided in
+ * any order, but may not overlap. For vertically subsampled pixel formats, the
+ * slices must be aligned according to subsampling.
+ *
+ * @param slice_start first row of the slice
+ * @param slice_height number of rows in the slice
+ *
+ * @return a non-negative number on success, a negative AVERROR code on failure.
+ */
+int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
+ unsigned int slice_height);
+
+/**
+ * Request a horizontal slice of the output data to be written into the frame
+ * previously provided to sws_frame_start().
+ *
+ * When a slice smaller than the whole output frame is requested, both
+ * slice_start and slice_height must be multiples of
+ * sws_receive_slice_alignment().
+ *
+ * @param slice_start first row of the slice
+ * @param slice_height number of rows in the slice
+ *
+ * @return a non-negative number if the data was successfully written into the output
+ * AVERROR(EAGAIN) if more input data needs to be provided before the
+ * output can be produced
+ * another negative AVERROR code on other kinds of scaling failure
+ */
+int sws_receive_slice(struct SwsContext *c, unsigned int slice_start,
+ unsigned int slice_height);
+
+/**
+ * Query required alignment on output data requested with sws_receive_slice().
+ */
+unsigned int sws_receive_slice_alignment(const struct SwsContext *c);
+
/**
* @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
* @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 673407636a..55fa6cec07 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -27,6 +27,7 @@
#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/common.h"
+#include "libavutil/frame.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/mem_internal.h"
@@ -80,6 +81,19 @@ typedef enum SwsAlphaBlend {
SWS_ALPHA_BLEND_NB,
} SwsAlphaBlend;
+typedef struct Range {
+ unsigned int start;
+ unsigned int len;
+} Range;
+
+typedef struct RangeList {
+ Range *ranges;
+ unsigned int nb_ranges;
+ int ranges_allocated;
+} RangeList;
+
+int ff_range_add(RangeList *r, unsigned int start, unsigned int len);
+
typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[]);
@@ -313,6 +327,11 @@ typedef struct SwsContext {
int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
double param[2]; ///< Input parameters for scaling algorithms that need them.
+ AVFrame *frame_src;
+ AVFrame *frame_dst;
+
+ RangeList src_ranges;
+
/* The cascaded_* fields allow spliting a scaler task into multiple
* sequential steps, this is for example used to limit the maximum
* downscaling factor that needs to be supported in one scaler.
@@ -638,6 +657,8 @@ typedef struct SwsContext {
// then passed as input to further conversion
uint8_t *xyz_scratch;
unsigned int xyz_scratch_allocated;
+
+ unsigned int dst_slice_align;
} SwsContext;
//FIXME check init (where 0)
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index c83af8bb07..7cb2a62f07 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -2009,6 +2009,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
!(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
c->convert_unscaled = ff_yuv2rgb_get_func_ptr(c);
+ c->dst_slice_align = 2;
}
/* yuv420p1x_to_p01x */
if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUVA420P10 ||
@@ -2028,6 +2029,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
!(flags & SWS_BITEXACT)) {
c->convert_unscaled = yvu9ToYv12Wrapper;
+ c->dst_slice_align = 4;
}
/* bgr24toYV12 */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 176fc6fd63..235a846809 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1300,6 +1300,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample);
av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample);
+ c->dst_slice_align = 1 << c->chrDstVSubSample;
+
if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
if (dstW&1) {
av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n");
@@ -1424,6 +1426,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
if (!FF_ALLOCZ_TYPED_ARRAY(c->formatConvBuffer, FFALIGN(srcW * 2 + 78, 16) * 2))
goto nomem;
+ c->frame_src = av_frame_alloc();
+ c->frame_dst = av_frame_alloc();
+ if (!c->frame_src || !c->frame_dst)
+ goto nomem;
+
c->srcBpc = desc_src->comp[0].depth;
if (c->srcBpc < 8)
c->srcBpc = 8;
@@ -2250,6 +2257,11 @@ void sws_freeContext(SwsContext *c)
for (i = 0; i < 4; i++)
av_freep(&c->dither_error[i]);
+ av_frame_free(&c->frame_src);
+ av_frame_free(&c->frame_dst);
+
+ av_freep(&c->src_ranges.ranges);
+
av_freep(&c->vLumFilter);
av_freep(&c->vChrFilter);
av_freep(&c->hLumFilter);
@@ -2364,3 +2376,63 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
}
return context;
}
+
+int ff_range_add(RangeList *rl, unsigned int start, unsigned int len)
+{
+ Range *tmp;
+ unsigned int idx;
+
+ /* find the first existing range after the new one */
+ for (idx = 0; idx < rl->nb_ranges; idx++)
+ if (rl->ranges[idx].start > start)
+ break;
+
+ /* check for overlap */
+ if (idx > 0) {
+ Range *prev = &rl->ranges[idx - 1];
+ if (prev->start + prev->len > start)
+ return AVERROR(EINVAL);
+ }
+ if (idx < rl->nb_ranges) {
+ Range *next = &rl->ranges[idx];
+ if (start + len > next->start)
+ return AVERROR(EINVAL);
+ }
+
+ tmp = av_fast_realloc(rl->ranges, &rl->ranges_allocated,
+ (rl->nb_ranges + 1) * sizeof(*rl->ranges));
+ if (!tmp)
+ return AVERROR(ENOMEM);
+ rl->ranges = tmp;
+
+ memmove(rl->ranges + idx + 1, rl->ranges + idx,
+ sizeof(*rl->ranges) * (rl->nb_ranges - idx));
+ rl->ranges[idx].start = start;
+ rl->ranges[idx].len = len;
+ rl->nb_ranges++;
+
+ /* merge ranges */
+ if (idx > 0) {
+ Range *prev = &rl->ranges[idx - 1];
+ Range *cur = &rl->ranges[idx];
+ if (prev->start + prev->len == cur->start) {
+ prev->len += cur->len;
+ memmove(rl->ranges + idx - 1, rl->ranges + idx,
+ sizeof(*rl->ranges) * (rl->nb_ranges - idx));
+ rl->nb_ranges--;
+ idx--;
+ }
+ }
+ if (idx < rl->nb_ranges - 1) {
+ Range *cur = &rl->ranges[idx];
+ Range *next = &rl->ranges[idx + 1];
+ if (cur->start + cur->len == next->start) {
+ cur->len += next->len;
+ memmove(rl->ranges + idx, rl->ranges + idx + 1,
+ sizeof(*rl->ranges) * (rl->nb_ranges - idx - 1));
+ rl->nb_ranges--;
+ }
+ }
+
+ return 0;
+}
--
2.30.2
More information about the ffmpeg-devel
mailing list