[FFmpeg-devel] [PATCH 09/10] diracdec: run the final decoding stage/idwt for every plane in parallel
Rostislav Pehlivanov
rpehlivanov at ob-encoder.com
Thu Jun 23 19:07:03 CEST 2016
27% performance increase for a 12bit 4k file.
Signed-off-by: Rostislav Pehlivanov <rpehlivanov at obe.tv>
---
libavcodec/diracdec.c | 152 ++++++++++++++++++++++++++------------------------
1 file changed, 80 insertions(+), 72 deletions(-)
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index 63eb4d1..ec45132 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -1804,99 +1804,107 @@ static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int
return 0;
}
-/**
- * Dirac Specification ->
- * 13.0 Transform data syntax. transform_data()
- */
-static int dirac_decode_frame_internal(DiracContext *s)
+static int decode_plane(AVCodecContext *avctx, void *arg, int jobnr, int thread)
{
DWTContext d;
- int y, i, comp, dsty;
- int ret;
+ int i, y, ret, dsty;
+ DiracContext *s = avctx->priv_data;
+ Plane *p = &s->plane[jobnr];
+ uint8_t *frame = s->current_picture->avframe->data[jobnr];
- if (s->low_delay) {
- /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
- for (comp = 0; comp < 3; comp++) {
- Plane *p = &s->plane[comp];
- memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
- }
- if (!s->zero_res) {
- if ((ret = decode_lowdelay(s)) < 0)
- return ret;
- }
+ /* FIXME: small resolutions */
+ for (i = 0; i < 4; i++)
+ s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
+
+ if (!s->zero_res && !s->low_delay)
+ {
+ memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
+ decode_component(s, jobnr); /* [DIRAC_STD] 13.4.1 core_transform_data() */
}
+ ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
+ s->wavelet_depth, s->bit_depth);
+ if (ret < 0)
+ return ret;
- for (comp = 0; comp < 3; comp++) {
- Plane *p = &s->plane[comp];
- uint8_t *frame = s->current_picture->avframe->data[comp];
+ if (!s->num_refs) { /* intra */
+ for (y = 0; y < p->height; y += 16) {
+ int idx = (s->bit_depth - 8) >> 1;
+ ff_spatial_idwt_slice2(&d, y+16); /* decode */
+ s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride,
+ p->stride,
+ p->idwt.buf + y*p->idwt.stride,
+ p->idwt.stride, p->width, 16);
+ }
+ } else { /* inter */
+ int rowheight = p->ybsep*p->stride;
- /* FIXME: small resolutions */
- for (i = 0; i < 4; i++)
- s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
+ select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
- if (!s->zero_res && !s->low_delay)
- {
- memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
- decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
+ for (i = 0; i < s->num_refs; i++) {
+ int ret = interpolate_refplane(s, s->ref_pics[i], jobnr, p->width, p->height);
+ if (ret < 0)
+ return ret;
}
- ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
- s->wavelet_depth, s->bit_depth);
- if (ret < 0)
- return ret;
- if (!s->num_refs) { /* intra */
- for (y = 0; y < p->height; y += 16) {
- int idx = (s->bit_depth - 8) >> 1;
- ff_spatial_idwt_slice2(&d, y+16); /* decode */
- s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride,
- p->stride,
- p->idwt.buf + y*p->idwt.stride,
- p->idwt.stride, p->width, 16);
- }
- } else { /* inter */
- int rowheight = p->ybsep*p->stride;
+ memset(s->mctmp, 0, 4*p->yoffset*p->stride);
- select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
+ dsty = -p->yoffset;
+ for (y = 0; y < s->blheight; y++) {
+ int h = 0,
+ start = FFMAX(dsty, 0);
+ uint16_t *mctmp = s->mctmp + y*rowheight;
+ DiracBlock *blocks = s->blmotion + y*s->blwidth;
- for (i = 0; i < s->num_refs; i++) {
- int ret = interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
- if (ret < 0)
- return ret;
- }
+ init_obmc_weights(s, p, y);
- memset(s->mctmp, 0, 4*p->yoffset*p->stride);
+ if (y == s->blheight-1 || start+p->ybsep > p->height)
+ h = p->height - start;
+ else
+ h = p->ybsep - (start - dsty);
+ if (h < 0)
+ break;
- dsty = -p->yoffset;
- for (y = 0; y < s->blheight; y++) {
- int h = 0,
- start = FFMAX(dsty, 0);
- uint16_t *mctmp = s->mctmp + y*rowheight;
- DiracBlock *blocks = s->blmotion + y*s->blwidth;
+ memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
+ mc_row(s, blocks, mctmp, jobnr, dsty);
- init_obmc_weights(s, p, y);
+ mctmp += (start - dsty)*p->stride + p->xoffset;
+ ff_spatial_idwt_slice2(&d, start + h); /* decode */
+ /* NOTE: add_rect_clamped hasn't been templated hence the shifts.
+ * idwt.stride is passed as pixels, not in bytes as in the rest of the decoder */
+ s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
+ (int16_t*)(p->idwt.buf) + start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
- if (y == s->blheight-1 || start+p->ybsep > p->height)
- h = p->height - start;
- else
- h = p->ybsep - (start - dsty);
- if (h < 0)
- break;
+ dsty += p->ybsep;
+ }
+ }
- memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
- mc_row(s, blocks, mctmp, comp, dsty);
+ return 0;
+}
- mctmp += (start - dsty)*p->stride + p->xoffset;
- ff_spatial_idwt_slice2(&d, start + h); /* decode */
- /* NOTE: add_rect_clamped hasn't been templated hence the shifts.
- * idwt.stride is passed as pixels, not in bytes as in the rest of the decoder */
- s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
- (int16_t*)(p->idwt.buf) + start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
+/**
+ * Dirac Specification ->
+ * 13.0 Transform data syntax. transform_data()
+ */
+static int dirac_decode_frame_internal(DiracContext *s)
+{
+ int ret, comp, res[3];
- dsty += p->ybsep;
- }
+ if (s->low_delay) {
+ /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
+ for (comp = 0; comp < 3; comp++) {
+ Plane *p = &s->plane[comp];
+ memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
+ }
+ if (!s->zero_res) {
+ if ((ret = decode_lowdelay(s)) < 0)
+ return ret;
}
}
+ s->avctx->execute2(s->avctx, decode_plane, NULL, res, 3);
+ for (comp = 0; comp < 3; comp++)
+ if (res[comp])
+ return res[comp];
return 0;
}
--
2.8.1.369.geae769a
More information about the ffmpeg-devel
mailing list