[FFmpeg-devel] [PATCH] Move vorbis_inverse_coupling from dsputil to vorbisdspcontext.
Ronald S. Bultje
rsbultje at gmail.com
Thu Jan 17 17:24:49 CET 2013
From: "Ronald S. Bultje" <rsbultje at gmail.com>
Conveniently (together with Justin's earlier patches), this makes
our vorbis decoder entirely independent of dsputil.
(Untested on ppc/arm.)
---
libavcodec/Makefile | 2 +-
libavcodec/arm/Makefile | 3 ++
libavcodec/arm/dsputil_init_neon.c | 5 --
libavcodec/arm/dsputil_neon.S | 64 -----------------------
libavcodec/arm/vorbisdsp_init_arm.c | 36 +++++++++++++
libavcodec/arm/vorbisdsp_neon.S | 83 +++++++++++++++++++++++++++++
libavcodec/dsputil.c | 4 --
libavcodec/dsputil.h | 2 -
libavcodec/ppc/Makefile | 1 +
libavcodec/ppc/dsputil_altivec.c | 25 ---------
libavcodec/ppc/vorbisdsp_altivec.c | 61 ++++++++++++++++++++++
libavcodec/vorbisdec.c | 6 +--
libavcodec/vorbisdsp.c | 33 ++++++++++++
libavcodec/vorbisdsp.h | 34 ++++++++++++
libavcodec/x86/Makefile | 1 +
libavcodec/x86/dsputil_mmx.c | 63 ----------------------
libavcodec/x86/vorbisdsp_init.c | 101 ++++++++++++++++++++++++++++++++++++
17 files changed, 357 insertions(+), 167 deletions(-)
create mode 100644 libavcodec/arm/vorbisdsp_init_arm.c
create mode 100644 libavcodec/arm/vorbisdsp_neon.S
create mode 100644 libavcodec/ppc/vorbisdsp_altivec.c
create mode 100644 libavcodec/vorbisdsp.c
create mode 100644 libavcodec/vorbisdsp.h
create mode 100644 libavcodec/x86/vorbisdsp_init.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f1a07d0..024e5cf 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -388,7 +388,7 @@ OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o
OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o
OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o
OBJS-$(CONFIG_VMNC_DECODER) += vmnc.o
-OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbis.o \
+OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \
vorbis_data.o xiph.o
OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \
vorbis_data.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index ac486f4..27d9cb1 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -86,6 +86,9 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
arm/rv40dsp_neon.o \
arm/h264cmc_neon.o \
+NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o \
+ arm/vorbisdsp_neon_init.o
+
NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o
NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 34bb619..ee0e9af 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -154,8 +154,6 @@ void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
-void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
-
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len, int mul);
@@ -307,9 +305,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
- if (CONFIG_VORBIS_DECODER)
- c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
-
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index a0d201c..ebc70ac 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -19,7 +19,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "config.h"
#include "libavutil/arm/asm.S"
function ff_clear_block_neon, export=1
@@ -532,69 +531,6 @@ function ff_add_pixels_clamped_neon, export=1
bx lr
endfunc
-#if CONFIG_VORBIS_DECODER
-function ff_vorbis_inverse_coupling_neon, export=1
- vmov.i32 q10, #1<<31
- subs r2, r2, #4
- mov r3, r0
- mov r12, r1
- beq 3f
-
- vld1.32 {d24-d25},[r1,:128]!
- vld1.32 {d22-d23},[r0,:128]!
- vcle.s32 q8, q12, #0
- vand q9, q11, q10
- veor q12, q12, q9
- vand q2, q12, q8
- vbic q3, q12, q8
- vadd.f32 q12, q11, q2
- vsub.f32 q11, q11, q3
-1: vld1.32 {d2-d3}, [r1,:128]!
- vld1.32 {d0-d1}, [r0,:128]!
- vcle.s32 q8, q1, #0
- vand q9, q0, q10
- veor q1, q1, q9
- vst1.32 {d24-d25},[r3, :128]!
- vst1.32 {d22-d23},[r12,:128]!
- vand q2, q1, q8
- vbic q3, q1, q8
- vadd.f32 q1, q0, q2
- vsub.f32 q0, q0, q3
- subs r2, r2, #8
- ble 2f
- vld1.32 {d24-d25},[r1,:128]!
- vld1.32 {d22-d23},[r0,:128]!
- vcle.s32 q8, q12, #0
- vand q9, q11, q10
- veor q12, q12, q9
- vst1.32 {d2-d3}, [r3, :128]!
- vst1.32 {d0-d1}, [r12,:128]!
- vand q2, q12, q8
- vbic q3, q12, q8
- vadd.f32 q12, q11, q2
- vsub.f32 q11, q11, q3
- b 1b
-
-2: vst1.32 {d2-d3}, [r3, :128]!
- vst1.32 {d0-d1}, [r12,:128]!
- it lt
- bxlt lr
-
-3: vld1.32 {d2-d3}, [r1,:128]
- vld1.32 {d0-d1}, [r0,:128]
- vcle.s32 q8, q1, #0
- vand q9, q0, q10
- veor q1, q1, q9
- vand q2, q1, q8
- vbic q3, q1, q8
- vadd.f32 q1, q0, q2
- vsub.f32 q0, q0, q3
- vst1.32 {d2-d3}, [r0,:128]!
- vst1.32 {d0-d1}, [r1,:128]!
- bx lr
-endfunc
-#endif
-
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
diff --git a/libavcodec/arm/vorbisdsp_init_arm.c b/libavcodec/arm/vorbisdsp_init_arm.c
new file mode 100644
index 0000000..056291b
--- /dev/null
+++ b/libavcodec/arm/vorbisdsp_init_arm.c
@@ -0,0 +1,36 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+
+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
+
+void ff_vorbisdsp_init_arm(DSPContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags)) {
+ c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
+ }
+}
diff --git a/libavcodec/arm/vorbisdsp_neon.S b/libavcodec/arm/vorbisdsp_neon.S
new file mode 100644
index 0000000..7df876c
--- /dev/null
+++ b/libavcodec/arm/vorbisdsp_neon.S
@@ -0,0 +1,83 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_vorbis_inverse_coupling_neon, export=1
+ vmov.i32 q10, #1<<31
+ subs r2, r2, #4
+ mov r3, r0
+ mov r12, r1
+ beq 3f
+
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+1: vld1.32 {d2-d3}, [r1,:128]!
+ vld1.32 {d0-d1}, [r0,:128]!
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vst1.32 {d24-d25},[r3, :128]!
+ vst1.32 {d22-d23},[r12,:128]!
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ subs r2, r2, #8
+ ble 2f
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+ b 1b
+
+2: vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ it lt
+ bxlt lr
+
+3: vld1.32 {d2-d3}, [r1,:128]
+ vld1.32 {d0-d1}, [r0,:128]
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ vst1.32 {d2-d3}, [r0,:128]!
+ vst1.32 {d0-d1}, [r1,:128]!
+ bx lr
+endfunc
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 91a4da5..b747e0a 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -36,7 +36,6 @@
#include "mathops.h"
#include "mpegvideo.h"
#include "config.h"
-#include "vorbis.h"
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t ff_squareTbl[512] = {0, };
@@ -2817,9 +2816,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->try_8x8basis= try_8x8basis_c;
c->add_8x8basis= add_8x8basis_c;
-#if CONFIG_VORBIS_DECODER
- c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
-#endif
c->vector_fmul_reverse = vector_fmul_reverse_c;
c->vector_fmul_add = vector_fmul_add_c;
c->vector_clipf = vector_clipf_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 3a5c94a..2de14f1 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -346,8 +346,6 @@ typedef struct DSPContext {
void (*h261_loop_filter)(uint8_t *src, int stride);
- /* assume len is a multiple of 4, and arrays are 16-byte aligned */
- void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index c8c3025..e1ebf26 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -1,6 +1,7 @@
OBJS += ppc/dsputil_ppc.o \
ppc/videodsp_ppc.o \
+OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 9ad73ef..1007fbd 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1283,29 +1283,6 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui
return score;
}
-static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
- int blocksize)
-{
- int i;
- vector float m, a;
- vector bool int t0, t1;
- const vector unsigned int v_31 = //XXX
- vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));
- for (i = 0; i < blocksize; i += 4) {
- m = vec_ld(0, mag+i);
- a = vec_ld(0, ang+i);
- t0 = vec_cmple(m, (vector float)vec_splat_u32(0));
- t1 = vec_cmple(a, (vector float)vec_splat_u32(0));
- a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));
- t0 = (vector bool int)vec_and(a, t1);
- t1 = (vector bool int)vec_andc(a, t1);
- a = vec_sub(m, (vector float)t1);
- m = vec_add(m, (vector float)t0);
- vec_stl(a, 0, ang+i);
- vec_stl(m, 0, mag+i);
- }
-}
-
/* next one assumes that ((line_size % 8) == 0) */
static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
@@ -1403,6 +1380,4 @@ void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
- if (CONFIG_VORBIS_DECODER)
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
}
diff --git a/libavcodec/ppc/vorbisdsp_altivec.c b/libavcodec/ppc/vorbisdsp_altivec.c
new file mode 100644
index 0000000..cebf0f7
--- /dev/null
+++ b/libavcodec/ppc/vorbisdsp_altivec.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2006 Luca Barbato <lu_zero at gentoo.org>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#include "libavcodec/vorbisdsp.h"
+
+#if HAVE_ALTIVEC
+static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
+ int blocksize)
+{
+ int i;
+ vector float m, a;
+ vector bool int t0, t1;
+ const vector unsigned int v_31 = //XXX
+ vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));
+ for (i = 0; i < blocksize; i += 4) {
+ m = vec_ld(0, mag+i);
+ a = vec_ld(0, ang+i);
+ t0 = vec_cmple(m, (vector float)vec_splat_u32(0));
+ t1 = vec_cmple(a, (vector float)vec_splat_u32(0));
+ a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));
+ t0 = (vector bool int)vec_and(a, t1);
+ t1 = (vector bool int)vec_andc(a, t1);
+ a = vec_sub(m, (vector float)t1);
+ m = vec_add(m, (vector float)t0);
+ vec_stl(a, 0, ang+i);
+ vec_stl(m, 0, mag+i);
+ }
+}
+#endif /* HAVE_ALTIVEC */
+
+void ff_vorbisdsp_init_ppc(VorbisDSPContext* c)
+{
+#if HAVE_ALTIVEC
+ if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
+ }
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 9bea908..6b75ae3 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -29,12 +29,12 @@
#include "libavutil/float_dsp.h"
#include "avcodec.h"
#include "get_bits.h"
-#include "dsputil.h"
#include "fft.h"
#include "fmtconvert.h"
#include "internal.h"
#include "vorbis.h"
+#include "vorbisdsp.h"
#include "xiph.h"
#define V_NB_BITS 8
@@ -125,7 +125,7 @@ typedef struct vorbis_context_s {
AVCodecContext *avccontext;
AVFrame frame;
GetBitContext gb;
- DSPContext dsp;
+ VorbisDSPContext dsp;
AVFloatDSPContext fdsp;
FmtConvertContext fmt_conv;
@@ -981,7 +981,7 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
int hdr_type, ret;
vc->avccontext = avccontext;
- ff_dsputil_init(&vc->dsp, avccontext);
+ ff_vorbisdsp_init(&vc->dsp);
avpriv_float_dsp_init(&vc->fdsp, avccontext->flags & CODEC_FLAG_BITEXACT);
ff_fmt_convert_init(&vc->fmt_conv, avccontext);
diff --git a/libavcodec/vorbisdsp.c b/libavcodec/vorbisdsp.c
new file mode 100644
index 0000000..1764438
--- /dev/null
+++ b/libavcodec/vorbisdsp.c
@@ -0,0 +1,33 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "vorbisdsp.h"
+#include "vorbis.h"
+
+void ff_vorbisdsp_init(VorbisDSPContext *dsp)
+{
+ dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
+
+ if (ARCH_X86)
+ ff_vorbisdsp_init_x86(dsp);
+ if (ARCH_PPC)
+ ff_vorbisdsp_init_ppc(dsp);
+ if (ARCH_ARM)
+ ff_vorbisdsp_init_arm(dsp);
+}
diff --git a/libavcodec/vorbisdsp.h b/libavcodec/vorbisdsp.h
new file mode 100644
index 0000000..98153a0
--- /dev/null
+++ b/libavcodec/vorbisdsp.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VORBISDSP_H
+#define AVCODEC_VORBISDSP_H
+
+typedef struct VorbisDSPContext {
+ /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+} VorbisDSPContext;
+
+void ff_vorbisdsp_init(VorbisDSPContext *dsp);
+
+/* for internal use only */
+void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp);
+void ff_vorbisdsp_init_arm(VorbisDSPContext *dsp);
+void ff_vorbisdsp_init_ppc(VorbisDSPContext *dsp);
+
+#endif /* AVCODEC_VORBISDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index b5a7694..6069968 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
+OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 7ac6ecb..1a18bb2 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1833,65 +1833,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
avg_pixels8_mmxext(dst, src, stride, 8);
}
-static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
-{
- int i;
- __asm__ volatile ("pxor %%mm7, %%mm7":);
- for (i = 0; i < blocksize; i += 2) {
- __asm__ volatile (
- "movq %0, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
- "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
- "pslld $31, %%mm2 \n\t" // keep only the sign bit
- "pxor %%mm2, %%mm1 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "pand %%mm1, %%mm3 \n\t"
- "pandn %%mm1, %%mm4 \n\t"
- "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
- "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
- "movq %%mm3, %1 \n\t"
- "movq %%mm0, %0 \n\t"
- : "+m"(mag[i]), "+m"(ang[i])
- :: "memory"
- );
- }
- __asm__ volatile ("femms");
-}
-
-static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
-{
- int i;
-
- __asm__ volatile (
- "movaps %0, %%xmm5 \n\t"
- :: "m"(ff_pdw_80000000[0])
- );
- for (i = 0; i < blocksize; i += 4) {
- __asm__ volatile (
- "movaps %0, %%xmm0 \n\t"
- "movaps %1, %%xmm1 \n\t"
- "xorps %%xmm2, %%xmm2 \n\t"
- "xorps %%xmm3, %%xmm3 \n\t"
- "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
- "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
- "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
- "xorps %%xmm2, %%xmm1 \n\t"
- "movaps %%xmm3, %%xmm4 \n\t"
- "andps %%xmm1, %%xmm3 \n\t"
- "andnps %%xmm1, %%xmm4 \n\t"
- "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
- "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
- "movaps %%xmm3, %1 \n\t"
- "movaps %%xmm0, %0 \n\t"
- : "+m"(mag[i]), "+m"(ang[i])
- :: "memory"
- );
- }
-}
-
static void vector_clipf_sse(float *dst, const float *src,
float min, float max, int len)
{
@@ -2242,8 +2183,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
-
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
@@ -2267,8 +2206,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
}
}
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
-
c->vector_clipf = vector_clipf_sse;
#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c
new file mode 100644
index 0000000..5243095
--- /dev/null
+++ b/libavcodec/x86/vorbisdsp_init.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2006 Loren Merritt <lorenm at u.washington.edu>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+#include "dsputil_mmx.h" // for ff_pdw_80000000
+
+#if HAVE_INLINE_ASM
+#if ARCH_X86_32
+static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
+{
+ int i;
+ __asm__ volatile ("pxor %%mm7, %%mm7":);
+ for (i = 0; i < blocksize; i += 2) {
+ __asm__ volatile (
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+ "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+ "pslld $31, %%mm2 \n\t" // keep only the sign bit
+ "pxor %%mm2, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pand %%mm1, %%mm3 \n\t"
+ "pandn %%mm1, %%mm4 \n\t"
+ "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+ "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm0, %0 \n\t"
+ : "+m"(mag[i]), "+m"(ang[i])
+ :: "memory"
+ );
+ }
+ __asm__ volatile ("femms");
+}
+#endif
+
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+ int i;
+
+ __asm__ volatile (
+ "movaps %0, %%xmm5 \n\t"
+ :: "m"(ff_pdw_80000000[0])
+ );
+ for (i = 0; i < blocksize; i += 4) {
+ __asm__ volatile (
+ "movaps %0, %%xmm0 \n\t"
+ "movaps %1, %%xmm1 \n\t"
+ "xorps %%xmm2, %%xmm2 \n\t"
+ "xorps %%xmm3, %%xmm3 \n\t"
+ "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+ "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+ "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
+ "xorps %%xmm2, %%xmm1 \n\t"
+ "movaps %%xmm3, %%xmm4 \n\t"
+ "andps %%xmm1, %%xmm3 \n\t"
+ "andnps %%xmm1, %%xmm4 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+ "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+ "movaps %%xmm3, %1 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ : "+m"(mag[i]), "+m"(ang[i])
+ :: "memory"
+ );
+ }
+}
+#endif
+
+void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
+{
+#if HAVE_INLINE_ASM
+ int mm_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+ if (mm_flags & AV_CPU_FLAG_3DNOW)
+ dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+#endif /* ARCH_X86_32 */
+ if (mm_flags & AV_CPU_FLAG_SSE)
+ dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
+#endif /* HAVE_INLINE_ASM */
+}
--
1.7.11.3
More information about the ffmpeg-devel
mailing list