[FFmpeg-devel] [PATCH v1] avcodec/v410dec: add the frame and slice threading support
Limin Wang
lance.lmwang at gmail.com
Fri Nov 22 18:09:09 EET 2019
ping, also ping with v210dec thread support which its reviewed by Michael.
https://patchwork.ffmpeg.org/patch/15836/
If no developer is interested in the module, I'm glad to maintain it,
I think it's better than nobody. Please feedback.
On Fri, Oct 25, 2019 at 11:36:44PM +0800, lance.lmwang at gmail.com wrote:
> From: Limin Wang <lance.lmwang at gmail.com>
>
> 1, Test server configure:
> [root at localhost ~]# cat /proc/cpuinfo |grep "model name"
> model name : Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
> model name : Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
> ...
>
> [root at localhost ~]# free -h
> total used free shared buff/cache available
> Mem: 102G 997M 93G 16M 7.6G 100G
>
> 2, performance profiling
> master:
> ./ffmpeg -y -stream_loop 100 -i ./test.avi -benchmark -f null -
> video:529kB audio:40602kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
> bench: utime=16.932s stime=9.417s rtime=26.341s
> bench: maxrss=271056kB
> frame= 1010 fps= 38 q=-0.0 Lsize=N/A time=00:03:36.54 bitrate=N/A speed=8.22x
>
> patch applied:
> ./ffmpeg -y -threads 2 -thread_type slice+frame -stream_loop 100 -i ./test.avi -benchmark -f null -
> frame= 1010 fps= 52 q=-0.0 Lsize=N/A time=00:03:36.54 bitrate=N/A speed=11.1x
> video:529kB audio:40602kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
> bench: utime=23.164s stime=10.983s rtime=19.503s
> bench: maxrss=338252kB
>
> ./ffmpeg -y -threads 4 -thread_type slice+frame -stream_loop 100 -i ./test.avi -benchmark -f null -
> frame= 1010 fps= 71 q=-0.0 Lsize=N/A time=00:03:36.54 bitrate=N/A speed=15.3x
> video:529kB audio:40602kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
> bench: utime=21.610s stime=11.603s rtime=14.160s
> bench: maxrss=517060kB
>
>
> Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
> ---
> libavcodec/v410dec.c | 72 +++++++++++++++++++++++++++++++-------------
> 1 file changed, 51 insertions(+), 21 deletions(-)
>
> diff --git a/libavcodec/v410dec.c b/libavcodec/v410dec.c
> index 48fab68273..7ad5eb8fb5 100644
> --- a/libavcodec/v410dec.c
> +++ b/libavcodec/v410dec.c
> @@ -24,6 +24,13 @@
> #include "libavutil/intreadwrite.h"
> #include "avcodec.h"
> #include "internal.h"
> +#include "thread.h"
> +
> +typedef struct ThreadData {
> + AVFrame *frame;
> + uint8_t *buf;
> + int stride;
> +} ThreadData;
>
> static av_cold int v410_decode_init(AVCodecContext *avctx)
> {
> @@ -42,31 +49,24 @@ static av_cold int v410_decode_init(AVCodecContext *avctx)
> return 0;
> }
>
> -static int v410_decode_frame(AVCodecContext *avctx, void *data,
> - int *got_frame, AVPacket *avpkt)
> +static int v410_decode_slice(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
> {
> - AVFrame *pic = data;
> - uint8_t *src = avpkt->data;
> + ThreadData *td = arg;
> + AVFrame *pic = td->frame;
> + int stride = td->stride;
> + int thread_count = av_clip(avctx->thread_count, 1, avctx->height/4);
> + int slice_start = (avctx->height * jobnr) / thread_count;
> + int slice_end = (avctx->height * (jobnr+1)) / thread_count;
> + const uint8_t *src = td->buf + stride * slice_start;
> uint16_t *y, *u, *v;
> uint32_t val;
> - int i, j, ret;
> -
> - if (avpkt->size < 4 * avctx->height * avctx->width) {
> - av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
> - return AVERROR(EINVAL);
> - }
> -
> - if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
> - return ret;
> + int i, j;
>
> - pic->key_frame = 1;
> - pic->pict_type = AV_PICTURE_TYPE_I;
> + y = (uint16_t*)pic->data[0] + slice_start * (pic->linesize[0] >> 1);
> + u = (uint16_t*)pic->data[1] + slice_start * (pic->linesize[1] >> 1);
> + v = (uint16_t*)pic->data[2] + slice_start * (pic->linesize[2] >> 1);
>
> - y = (uint16_t *)pic->data[0];
> - u = (uint16_t *)pic->data[1];
> - v = (uint16_t *)pic->data[2];
> -
> - for (i = 0; i < avctx->height; i++) {
> + for (i = slice_start; i < slice_end; i++) {
> for (j = 0; j < avctx->width; j++) {
> val = AV_RL32(src);
>
> @@ -82,6 +82,35 @@ static int v410_decode_frame(AVCodecContext *avctx, void *data,
> v += pic->linesize[2] >> 1;
> }
>
> + return 0;
> +}
> +
> +static int v410_decode_frame(AVCodecContext *avctx, void *data,
> + int *got_frame, AVPacket *avpkt)
> +{
> + ThreadData td;
> + ThreadFrame frame = { .f = data };
> + AVFrame *pic = data;
> + uint8_t *src = avpkt->data;
> + int ret;
> + int thread_count = av_clip(avctx->thread_count, 1, avctx->height/4);
> +
> + td.stride = avctx->width * 4;
> + if (avpkt->size < 4 * avctx->height * avctx->width) {
> + av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
> + return AVERROR(EINVAL);
> + }
> +
> + if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
> + return ret;
> +
> + pic->key_frame = 1;
> + pic->pict_type = AV_PICTURE_TYPE_I;
> +
> + td.buf = src;
> + td.frame = pic;
> + avctx->execute2(avctx, v410_decode_slice, &td, NULL, thread_count);
> +
> *got_frame = 1;
>
> return avpkt->size;
> @@ -94,5 +123,6 @@ AVCodec ff_v410_decoder = {
> .id = AV_CODEC_ID_V410,
> .init = v410_decode_init,
> .decode = v410_decode_frame,
> - .capabilities = AV_CODEC_CAP_DR1,
> + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS |
> + AV_CODEC_CAP_FRAME_THREADS
> };
> --
> 2.21.0
>
More information about the ffmpeg-devel
mailing list