[FFmpeg-devel] [PATCH] avcodec/utvideodec: add SIMD for restore_rgb_planes
Paul B Mahol
onemda at gmail.com
Tue Jun 27 01:15:17 EEST 2017
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavcodec/Makefile | 2 +-
libavcodec/utvideo.h | 2 +
libavcodec/utvideodec.c | 53 +++-----------------
libavcodec/utvideodsp.c | 82 +++++++++++++++++++++++++++++++
libavcodec/utvideodsp.h | 39 +++++++++++++++
libavcodec/x86/Makefile | 2 +
libavcodec/x86/utvideodsp.asm | 101 +++++++++++++++++++++++++++++++++++++++
libavcodec/x86/utvideodsp_init.c | 43 +++++++++++++++++
8 files changed, 277 insertions(+), 47 deletions(-)
create mode 100644 libavcodec/utvideodsp.c
create mode 100644 libavcodec/utvideodsp.h
create mode 100644 libavcodec/x86/utvideodsp.asm
create mode 100644 libavcodec/x86/utvideodsp_init.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f0cba88..b440a00 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttaencdsp.o ttadata.o
OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o
OBJS-$(CONFIG_TXD_DECODER) += txd.o
OBJS-$(CONFIG_ULTI_DECODER) += ulti.o
-OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o
+OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o utvideodsp.o
OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o
OBJS-$(CONFIG_V210_DECODER) += v210dec.o
OBJS-$(CONFIG_V210_ENCODER) += v210enc.o
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index 9559c83..a811785 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -30,6 +30,7 @@
#include "libavutil/common.h"
#include "avcodec.h"
#include "bswapdsp.h"
+#include "utvideodsp.h"
#include "lossless_videodsp.h"
#include "lossless_videoencdsp.h"
@@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5];
typedef struct UtvideoContext {
const AVClass *class;
AVCodecContext *avctx;
+ UTVideoDSPContext utdsp;
BswapDSPContext bdsp;
LLVidDSPContext llviddsp;
LLVidEncDSPContext llvidencdsp;
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 0c6f89e..44841aa 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -333,50 +333,6 @@ fail:
return AVERROR_INVALIDDATA;
}
-static void restore_rgb_planes(AVFrame *frame, int width, int height)
-{
- uint8_t *src_r = (uint8_t *)frame->data[2];
- uint8_t *src_g = (uint8_t *)frame->data[0];
- uint8_t *src_b = (uint8_t *)frame->data[1];
- uint8_t r, g, b;
- int i, j;
-
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
- r = src_r[i];
- g = src_g[i];
- b = src_b[i];
- src_r[i] = r + g - 0x80;
- src_b[i] = b + g - 0x80;
- }
- src_r += frame->linesize[2];
- src_g += frame->linesize[0];
- src_b += frame->linesize[1];
- }
-}
-
-static void restore_rgb_planes10(AVFrame *frame, int width, int height)
-{
- uint16_t *src_r = (uint16_t *)frame->data[2];
- uint16_t *src_g = (uint16_t *)frame->data[0];
- uint16_t *src_b = (uint16_t *)frame->data[1];
- int r, g, b;
- int i, j;
-
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
- r = src_r[i];
- g = src_g[i];
- b = src_b[i];
- src_r[i] = (r + g - 0x200) & 0x3FF;
- src_b[i] = (b + g - 0x200) & 0x3FF;
- }
- src_r += frame->linesize[2] / 2;
- src_g += frame->linesize[0] / 2;
- src_b += frame->linesize[1] / 2;
- }
-}
-
#undef A
#undef B
#undef C
@@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
}
}
}
- restore_rgb_planes(frame.f, avctx->width, avctx->height);
+ c->utdsp.restore_rgb_planes(frame.f->data[2], frame.f->data[0], frame.f->data[1],
+ frame.f->linesize[2], frame.f->linesize[0], frame.f->linesize[1],
+ avctx->width, avctx->height);
break;
case AV_PIX_FMT_GBRAP10:
case AV_PIX_FMT_GBRP10:
@@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
if (ret)
return ret;
}
- restore_rgb_planes10(frame.f, avctx->width, avctx->height);
+ c->utdsp.restore_rgb_planes10((uint16_t *)frame.f->data[2], (uint16_t *)frame.f->data[0], (uint16_t *)frame.f->data[1],
+ frame.f->linesize[2] / 2, frame.f->linesize[0] / 2, frame.f->linesize[1] / 2,
+ avctx->width, avctx->height);
break;
case AV_PIX_FMT_YUV420P:
for (i = 0; i < 3; i++) {
@@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c->avctx = avctx;
+ ff_utvideodsp_init(&c->utdsp);
ff_bswapdsp_init(&c->bdsp);
ff_llviddsp_init(&c->llviddsp);
diff --git a/libavcodec/utvideodsp.c b/libavcodec/utvideodsp.c
new file mode 100644
index 0000000..0831a6b
--- /dev/null
+++ b/libavcodec/utvideodsp.c
@@ -0,0 +1,82 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "utvideodsp.h"
+
+static void restore_rgb_planes_c(uint8_t *src_r,
+ uint8_t *src_g,
+ uint8_t *src_b,
+ ptrdiff_t linesize_r,
+ ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b,
+ int width, int height)
+{
+ uint8_t r, g, b;
+ int i, j;
+
+ for (j = 0; j < height; j++) {
+ for (i = 0; i < width; i++) {
+ r = src_r[i];
+ g = src_g[i];
+ b = src_b[i];
+ src_r[i] = r + g - 0x80;
+ src_b[i] = b + g - 0x80;
+ }
+ src_r += linesize_r;
+ src_g += linesize_g;
+ src_b += linesize_b;
+ }
+}
+
+static void restore_rgb_planes10_c(uint16_t *src_r,
+ uint16_t *src_g,
+ uint16_t *src_b,
+ ptrdiff_t linesize_r,
+ ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b,
+ int width, int height)
+{
+ int r, g, b;
+ int i, j;
+
+ for (j = 0; j < height; j++) {
+ for (i = 0; i < width; i++) {
+ r = src_r[i];
+ g = src_g[i];
+ b = src_b[i];
+ src_r[i] = (r + g - 0x200) & 0x3FF;
+ src_b[i] = (b + g - 0x200) & 0x3FF;
+ }
+ src_r += linesize_r;
+ src_g += linesize_g;
+ src_b += linesize_b;
+ }
+}
+
+av_cold void ff_utvideodsp_init(UTVideoDSPContext *c)
+{
+ c->restore_rgb_planes = restore_rgb_planes_c;
+ c->restore_rgb_planes10 = restore_rgb_planes10_c;
+
+ if (ARCH_X86)
+ ff_utvideodsp_init_x86(c);
+}
diff --git a/libavcodec/utvideodsp.h b/libavcodec/utvideodsp.h
new file mode 100644
index 0000000..a3d2550
--- /dev/null
+++ b/libavcodec/utvideodsp.h
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_UTVIDEODSP_H
+#define AVCODEC_UTVIDEODSP_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include "libavutil/pixfmt.h"
+#include "config.h"
+
+typedef struct UTVideoDSPContext {
+ void (*restore_rgb_planes)(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
+ void (*restore_rgb_planes10)(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
+} UTVideoDSPContext;
+
+void ff_utvideodsp_init(UTVideoDSPContext *c);
+void ff_utvideodsp_init_x86(UTVideoDSPContext *c);
+
+#endif /* AVCODEC_UTVIDEODSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index b86700b..0dbc465 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -65,6 +65,7 @@ OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o
+OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
@@ -171,6 +172,7 @@ X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
X86ASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
X86ASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o
+X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o
X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm
new file mode 100644
index 0000000..2e96f8b
--- /dev/null
+++ b/libavcodec/x86/utvideodsp.asm
@@ -0,0 +1,101 @@
+;******************************************************************************
+;* SIMD-optimized UTVideo functions
+;* Copyright (c) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_128: times 16 db 128
+pw_512: times 8 dw 512
+pw_1023: times 8 dw 1023
+
+SECTION .text
+
+INIT_XMM sse2
+
+; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
+; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
+; int width, int height)
+cglobal restore_rgb_planes, 8,9,4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
+ movsxdifnidn wq, wd
+ add src_rq, wq
+ add src_gq, wq
+ add src_bq, wq
+ neg wq
+ mova m3, [pb_128]
+.nextrow:
+ mov xq, wq
+
+ .loop:
+ mova m0, [src_rq + xq]
+ mova m1, [src_gq + xq]
+ mova m2, [src_bq + xq]
+ psubb m1, m3
+ paddb m0, m1
+ paddb m2, m1
+ mova [src_rq+xq], m0
+ mova [src_bq+xq], m2
+ add xq, mmsize
+ jl .loop
+
+ add src_rq, linesize_rq
+ add src_gq, linesize_gq
+ add src_bq, linesize_bq
+ sub hq, 1
+ jg .nextrow
+ REP_RET
+
+cglobal restore_rgb_planes10, 8,9,5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
+ movsxd wq, wd
+ shl wd, 1
+ shl linesize_rq, 1
+ shl linesize_gq, 1
+ shl linesize_bq, 1
+ add src_rq, wq
+ add src_gq, wq
+ add src_bq, wq
+ mova m3, [pw_512]
+ mova m4, [pw_1023]
+ neg wq
+.nextrow:
+ mov xq, wq
+
+ .loop:
+ mova m0, [src_rq + xq]
+ mova m1, [src_gq + xq]
+ mova m2, [src_bq + xq]
+ paddw m0, m1
+ paddw m2, m1
+ psubw m0, m3
+ psubw m2, m3
+ pand m0, m4
+ pand m2, m4
+ mova [src_rq+xq], m0
+ mova [src_bq+xq], m2
+ add xq, mmsize
+ jl .loop
+
+ add src_rq, linesize_rq
+ add src_gq, linesize_gq
+ add src_bq, linesize_bq
+ sub hq, 1
+ jg .nextrow
+ REP_RET
diff --git a/libavcodec/x86/utvideodsp_init.c b/libavcodec/x86/utvideodsp_init.c
new file mode 100644
index 0000000..d415692
--- /dev/null
+++ b/libavcodec/x86/utvideodsp_init.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/utvideodsp.h"
+
+void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
+
+av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
+ c->restore_rgb_planes = ff_restore_rgb_planes_sse2;
+ c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
+ }
+}
--
2.9.3
More information about the ffmpeg-devel
mailing list