[FFmpeg-devel] [PATCH] avfilter/vf_noise: move inline assembly to own file
Paul B Mahol
onemda at gmail.com
Thu Sep 12 18:16:03 CEST 2013
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavfilter/vf_noise.c | 166 +++------------------------------------------
libavfilter/vf_noise.h | 64 +++++++++++++++++
libavfilter/x86/Makefile | 1 +
libavfilter/x86/vf_noise.c | 145 +++++++++++++++++++++++++++++++++++++++
4 files changed, 219 insertions(+), 157 deletions(-)
create mode 100644 libavfilter/vf_noise.h
create mode 100644 libavfilter/x86/vf_noise.c
diff --git a/libavfilter/vf_noise.c b/libavfilter/vf_noise.c
index 57af5b6..0ee0736 100644
--- a/libavfilter/vf_noise.c
+++ b/libavfilter/vf_noise.c
@@ -26,46 +26,13 @@
#include "libavutil/opt.h"
#include "libavutil/imgutils.h"
-#include "libavutil/lfg.h"
#include "libavutil/parseutils.h"
#include "libavutil/pixdesc.h"
-#include "libavutil/x86/asm.h"
#include "avfilter.h"
#include "formats.h"
#include "internal.h"
#include "video.h"
-
-#define MAX_NOISE 5120
-#define MAX_SHIFT 1024
-#define MAX_RES (MAX_NOISE-MAX_SHIFT)
-
-#define NOISE_UNIFORM 1
-#define NOISE_TEMPORAL 2
-#define NOISE_AVERAGED 8
-#define NOISE_PATTERN 16
-
-typedef struct {
- int strength;
- unsigned flags;
- AVLFG lfg;
- int seed;
- int8_t *noise;
- int8_t *prev_shift[MAX_RES][3];
-} FilterParams;
-
-typedef struct {
- const AVClass *class;
- int nb_planes;
- int linesize[4];
- int bytewidth[4];
- int height[4];
- FilterParams all;
- FilterParams param[4];
- int rand_shift[MAX_RES];
- int rand_shift_init;
- void (*line_noise)(uint8_t *dst, const uint8_t *src, int8_t *noise, int len, int shift);
- void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, int8_t **shift);
-} NoiseContext;
+#include "vf_noise.h"
typedef struct ThreadData {
AVFrame *in, *out;
@@ -203,8 +170,8 @@ static int config_input(AVFilterLink *inlink)
return 0;
}
-static inline void line_noise_c(uint8_t *dst, const uint8_t *src, int8_t *noise,
- int len, int shift)
+void ff_noise_filter_line_c(uint8_t *dst, const uint8_t *src, int8_t *noise,
+ int len, int shift)
{
int i;
@@ -216,70 +183,8 @@ static inline void line_noise_c(uint8_t *dst, const uint8_t *src, int8_t *noise,
}
}
-#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
-
-static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
- int8_t *noise, int len, int shift)
-{
-#if HAVE_MMX_INLINE
- x86_reg mmx_len= len&(~7);
- noise+=shift;
-
- __asm__ volatile(
- "mov %3, %%"REG_a" \n\t"
- "pcmpeqb %%mm7, %%mm7 \n\t"
- "psllw $15, %%mm7 \n\t"
- "packsswb %%mm7, %%mm7 \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_a"), %%mm0 \n\t"
- "movq (%1, %%"REG_a"), %%mm1 \n\t"
- "pxor %%mm7, %%mm0 \n\t"
- "paddsb %%mm1, %%mm0 \n\t"
- "pxor %%mm7, %%mm0 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
- : "%"REG_a
- );
- if (mmx_len!=len)
- line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
-#endif
-}
-
-static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
- int8_t *noise, int len, int shift)
-{
-#if HAVE_MMXEXT_INLINE
- x86_reg mmx_len= len&(~7);
- noise+=shift;
-
- __asm__ volatile(
- "mov %3, %%"REG_a" \n\t"
- "pcmpeqb %%mm7, %%mm7 \n\t"
- "psllw $15, %%mm7 \n\t"
- "packsswb %%mm7, %%mm7 \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_a"), %%mm0 \n\t"
- "movq (%1, %%"REG_a"), %%mm1 \n\t"
- "pxor %%mm7, %%mm0 \n\t"
- "paddsb %%mm1, %%mm0 \n\t"
- "pxor %%mm7, %%mm0 \n\t"
- "movntq %%mm0, (%2, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
- : "%"REG_a
- );
- if (mmx_len != len)
- line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
-#endif
-}
-
-static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
- int len, int8_t **shift)
+void ff_noise_filter_line_avg_c(uint8_t *dst, const uint8_t *src,
+ int len, int8_t **shift)
{
int i;
int8_t *src2 = (int8_t*)src;
@@ -290,50 +195,6 @@ static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
}
}
-static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
- int len, int8_t **shift)
-{
-#if HAVE_MMX_INLINE
- x86_reg mmx_len= len&(~7);
-
- __asm__ volatile(
- "mov %5, %%"REG_a" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%1, %%"REG_a"), %%mm1 \n\t"
- "movq (%0, %%"REG_a"), %%mm0 \n\t"
- "paddb (%2, %%"REG_a"), %%mm1 \n\t"
- "paddb (%3, %%"REG_a"), %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpckhbw %%mm2, %%mm2 \n\t"
- "punpcklbw %%mm1, %%mm1 \n\t"
- "punpckhbw %%mm3, %%mm3 \n\t"
- "pmulhw %%mm0, %%mm1 \n\t"
- "pmulhw %%mm2, %%mm3 \n\t"
- "paddw %%mm1, %%mm1 \n\t"
- "paddw %%mm3, %%mm3 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "paddw %%mm2, %%mm3 \n\t"
- "psrlw $8, %%mm1 \n\t"
- "psrlw $8, %%mm3 \n\t"
- "packuswb %%mm3, %%mm1 \n\t"
- "movq %%mm1, (%4, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
- "r" (dst+mmx_len), "g" (-mmx_len)
- : "%"REG_a
- );
-
- if (mmx_len != len){
- int8_t *shift2[3]={shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len};
- line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
- }
-#endif
-}
-
static void noise(uint8_t *dst, const uint8_t *src,
int dst_linesize, int src_linesize,
int width, int start, int end, NoiseContext *n, int comp)
@@ -418,7 +279,6 @@ static av_cold int init(AVFilterContext *ctx)
{
NoiseContext *n = ctx->priv;
int ret, i;
- int cpu_flags = av_get_cpu_flags();
for (i = 0; i < 4; i++) {
if (n->all.seed >= 0)
@@ -436,18 +296,10 @@ static av_cold int init(AVFilterContext *ctx)
return ret;
}
- n->line_noise = line_noise_c;
- n->line_noise_avg = line_noise_avg_c;
-
- if (HAVE_MMX_INLINE &&
- cpu_flags & AV_CPU_FLAG_MMX) {
- n->line_noise = line_noise_mmx;
- n->line_noise_avg = line_noise_avg_mmx;
- }
- if (HAVE_MMXEXT_INLINE &&
- cpu_flags & AV_CPU_FLAG_MMXEXT)
- n->line_noise = line_noise_mmxext;
-
+ n->line_noise = ff_noise_filter_line_c;
+ n->line_noise_avg = ff_noise_filter_line_avg_c;
+ if (ARCH_X86)
+ ff_noise_init_x86(n);
return 0;
}
diff --git a/libavfilter/vf_noise.h b/libavfilter/vf_noise.h
new file mode 100644
index 0000000..fb796b3
--- /dev/null
+++ b/libavfilter/vf_noise.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VF_NOISE_H
+#define AVFILTER_VF_NOISE_H
+
+#include "libavutil/lfg.h"
+#include "avfilter.h"
+
+#define MAX_NOISE 5120
+#define MAX_SHIFT 1024
+#define MAX_RES (MAX_NOISE-MAX_SHIFT)
+
+#define NOISE_UNIFORM 1
+#define NOISE_TEMPORAL 2
+#define NOISE_AVERAGED 8
+#define NOISE_PATTERN 16
+
+typedef struct FilterParams {
+ int strength;
+ unsigned flags;
+ AVLFG lfg;
+ int seed;
+ int8_t *noise;
+ int8_t *prev_shift[MAX_RES][3];
+} FilterParams;
+
+typedef struct NoiseContext {
+ const AVClass *class;
+ int nb_planes;
+ int linesize[4];
+ int bytewidth[4];
+ int height[4];
+ FilterParams all;
+ FilterParams param[4];
+ int rand_shift[MAX_RES];
+ int rand_shift_init;
+ void (*line_noise)(uint8_t *dst, const uint8_t *src, int8_t *noise, int len, int shift);
+ void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, int8_t **shift);
+} NoiseContext;
+
+void ff_noise_init_x86(NoiseContext *s);
+
+void ff_noise_filter_line_c(uint8_t *dst, const uint8_t *src, int8_t *noise, int len, int shift);
+void ff_noise_filter_line_avg_c(uint8_t *dst, const uint8_t *src, int len, int8_t **shift);
+
+#endif /* AVFILTER_VF_NOISE_H */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 0b19c4b..dc6cdb6 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,4 +1,5 @@
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
+OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c
new file mode 100644
index 0000000..0e660d7
--- /dev/null
+++ b/libavfilter/x86/vf_noise.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavfilter/vf_noise.h"
+
+#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
+
+#if HAVE_MMX_INLINE
+static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
+ int8_t *noise, int len, int shift)
+{
+ x86_reg mmx_len= len&(~7);
+ noise+=shift;
+
+ __asm__ volatile(
+ "mov %3, %%"REG_a" \n\t"
+ "pcmpeqb %%mm7, %%mm7 \n\t"
+ "psllw $15, %%mm7 \n\t"
+ "packsswb %%mm7, %%mm7 \n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%0, %%"REG_a"), %%mm0 \n\t"
+ "movq (%1, %%"REG_a"), %%mm1 \n\t"
+ "pxor %%mm7, %%mm0 \n\t"
+ "paddsb %%mm1, %%mm0 \n\t"
+ "pxor %%mm7, %%mm0 \n\t"
+ "movq %%mm0, (%2, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
+ : "%"REG_a
+ );
+ if (mmx_len!=len)
+ ff_noise_filter_line_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
+}
+
+static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
+ int len, int8_t **shift)
+{
+ x86_reg mmx_len= len&(~7);
+
+ __asm__ volatile(
+ "mov %5, %%"REG_a" \n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%1, %%"REG_a"), %%mm1 \n\t"
+ "movq (%0, %%"REG_a"), %%mm0 \n\t"
+ "paddb (%2, %%"REG_a"), %%mm1 \n\t"
+ "paddb (%3, %%"REG_a"), %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpckhbw %%mm2, %%mm2 \n\t"
+ "punpcklbw %%mm1, %%mm1 \n\t"
+ "punpckhbw %%mm3, %%mm3 \n\t"
+ "pmulhw %%mm0, %%mm1 \n\t"
+ "pmulhw %%mm2, %%mm3 \n\t"
+ "paddw %%mm1, %%mm1 \n\t"
+ "paddw %%mm3, %%mm3 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "paddw %%mm2, %%mm3 \n\t"
+ "psrlw $8, %%mm1 \n\t"
+ "psrlw $8, %%mm3 \n\t"
+ "packuswb %%mm3, %%mm1 \n\t"
+ "movq %%mm1, (%4, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
+ "r" (dst+mmx_len), "g" (-mmx_len)
+ : "%"REG_a
+ );
+
+ if (mmx_len != len){
+ int8_t *shift2[3]={shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len};
+ ff_noise_filter_line_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
+ }
+}
+#endif
+
+#if HAVE_MMXEXT_INLINE
+static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
+ int8_t *noise, int len, int shift)
+{
+ x86_reg mmx_len= len&(~7);
+ noise+=shift;
+
+ __asm__ volatile(
+ "mov %3, %%"REG_a" \n\t"
+ "pcmpeqb %%mm7, %%mm7 \n\t"
+ "psllw $15, %%mm7 \n\t"
+ "packsswb %%mm7, %%mm7 \n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%0, %%"REG_a"), %%mm0 \n\t"
+ "movq (%1, %%"REG_a"), %%mm1 \n\t"
+ "pxor %%mm7, %%mm0 \n\t"
+ "paddsb %%mm1, %%mm0 \n\t"
+ "pxor %%mm7, %%mm0 \n\t"
+ "movntq %%mm0, (%2, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
+ : "%"REG_a
+ );
+ if (mmx_len != len)
+ ff_noise_filter_line_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
+}
+#endif
+
+av_cold void ff_noise_init_x86(NoiseContext *s)
+{
+#if HAVE_MMX_INLINE
+ int cpu_flags = av_get_cpu_flags();
+
+ if (cpu_flags & AV_CPU_FLAG_MMX) {
+ s->line_noise = line_noise_mmx;
+ s->line_noise_avg = line_noise_avg_mmx;
+ }
+#if HAVE_MMXEXT_INLINE
+ if (cpu_flags & AV_CPU_FLAG_MMXEXT)
+ s->line_noise = line_noise_mmxext;
+#endif
+#endif
+}
--
1.7.11.2
More information about the ffmpeg-devel
mailing list