[FFmpeg-devel] [PATCH] SSE-optimized vector_clipf()
Loren Merritt
lorenm
Sun Aug 9 20:06:17 CEST 2009
This version is faster than x87 for me, though of course slower than
non-simd sse:
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
uint32_t maxi, uint32_t maxisign)
{
if(a > mini) a = mini;
if((a^(1<<31)) > maxisign) a = maxi;
return a;
}
static void vector_clipf_c(float *dst, float min, float max, int len) {
if( min<0 && max>0 ) {
uint32_t mini = *(uint32_t*)&min;
uint32_t maxi = *(uint32_t*)&max;
uint32_t maxisign = maxi ^ (1<<31);
uint32_t *dsti = (uint32_t*)dst;
int i;
for(i=0; i<len; i+=8) {
dsti[i+0] = clipf_c_one(dsti[i+0], mini, maxi, maxisign);
dsti[i+1] = clipf_c_one(dsti[i+1], mini, maxi, maxisign);
dsti[i+2] = clipf_c_one(dsti[i+2], mini, maxi, maxisign);
dsti[i+3] = clipf_c_one(dsti[i+3], mini, maxi, maxisign);
dsti[i+4] = clipf_c_one(dsti[i+4], mini, maxi, maxisign);
dsti[i+5] = clipf_c_one(dsti[i+5], mini, maxi, maxisign);
dsti[i+6] = clipf_c_one(dsti[i+6], mini, maxi, maxisign);
dsti[i+7] = clipf_c_one(dsti[i+7], mini, maxi, maxisign);
}
} else {
// generic code here, or optimize for other combinations of signs
}
}
--Loren Merritt
More information about the ffmpeg-devel
mailing list