[MPlayer-cvslog] r36489 - trunk/libaf/af_format.c
reimar
subversion at mplayerhq.hu
Sat Oct 26 11:17:28 CEST 2013
Author: reimar
Date: Sat Oct 26 11:17:28 2013
New Revision: 36489
Log:
ARM NEON optimization for float->int conversion.
Not optimal, but since lrintf is incredibly slow on
ARM it is > 10x faster than the old code.
A fallback solution that (incorrectly) defines lrintf(x)
as (int)(x) might make sense to avoid these kind of issues
for the pure C code, however we would still need to know whether
lrintf is slow or not.
Though maybe the better solution was if all architectures provided
a non-braindead implementation of lrintf.
Modified:
trunk/libaf/af_format.c
Modified: trunk/libaf/af_format.c
==============================================================================
--- trunk/libaf/af_format.c Sat Oct 26 10:30:29 2013 (r36488)
+++ trunk/libaf/af_format.c Sat Oct 26 11:17:28 2013 (r36489)
@@ -494,8 +494,34 @@ static void float2int(const float* in, v
((int8_t *)out)[i] = av_clip_int8(lrintf(128.0f * in[i]));
break;
case(2):
+#if HAVE_NEON
+ {
+ const float *in_end = in + len;
+ while (in < in_end - 7) {
+ __asm__(
+ "vld1.32 {q0,q1}, [%0]!\n\t"
+ "vcvt.s32.f32 q0, q0, #31\n\t"
+ "vqrshrn.s32 d0, q0, #15\n\t"
+ "vcvt.s32.f32 q1, q1, #31\n\t"
+ "vqrshrn.s32 d1, q1, #15\n\t"
+ "vst1.16 {q0}, [%1]!\n\t"
+ : "+r"(in), "+r"(out)
+ :: "q0", "q1", "memory");
+ }
+ while (in < in_end) {
+ __asm__(
+ "vld1.32 {d0[0]}, [%0]!\n\t"
+ "vcvt.s32.f32 d0, d0, #31\n\t"
+ "vqrshrn.s32 d0, q0, #15\n\t"
+ "vst1.16 {d0[0]}, [%1]!\n\t"
+ : "+r"(in), "+r"(out)
+ :: "d0", "memory");
+ }
+ }
+#else
for(i=0;i<len;i++)
((int16_t*)out)[i] = av_clip_int16(lrintf(32768.0f * in[i]));
+#endif
break;
case(3):
for(i=0;i<len;i++){
More information about the MPlayer-cvslog
mailing list