[MPlayer-dev-eng] not sure of the best way to integrate altivec_yuv2packedX
Marc Hoffman
mmh at pleasantst.com
Wed Mar 17 05:55:36 CET 2004
The current template file doesn't seem to be friendly enough to let me
place this inside it easily can I just place it with in my developing
yuv2rgb_altivec.c module? This will require hash if defs have_altivec
every where in a bunch more places inside the swscaler.
static vector signed short *YCoeffs, *CCoeffs;
void
altivec_yuv2packedX (SwsContext *c,
int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int y)
{
int i,j;
short tmp __attribute__((aligned (16)));
short *p;
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
vector signed short R0,G0,B0,R1,G1,B1;
vector unsigned char R,G,B,pels[3];
vector unsigned char *out,*nout;
vector signed short RND = (vector signed short)(1<<3);
vector unsigned short SCL = (vector unsigned short)(4);
unsigned long scratch[16] __attribute__ ((aligned (16)));
/* need to move this to global filters... done outside of loop */
for (i=0;i<lumFilterSize;i++) {
tmp = lumFilter[i];
p = &YCoeffs[i];
for (j=0;j<8;j++)
p[j] = tmp;
}
for (i=0;i<chrFilterSize;i++) {
tmp = chrFilter[i];
p = &CCoeffs[i];
for (j=0;j<8;j++)
p[j] = tmp;
}
out = (vector unsigned char *)dest;
for(i=0; i<dstW; i+=16){
Y0 = RND;
Y1 = RND;
/* extract 16 coeffs from lumSrc */
for(j=0; j<lumFilterSize; j++) {
X0 = vec_ld (0, &lumSrc[j][i]);
X1 = vec_ld (16, &lumSrc[j][i]);
Y0 = vec_mradds (X0, YCoeffs[j], Y0);
Y1 = vec_mradds (X1, YCoeffs[j], Y1);
}
U = RND;
V = RND;
/* extract 8 coeffs from U,V */
for(j=0; j<chrFilterSize; j++) {
X = vec_ld (0, &chrSrc[j][i/2]);
U = vec_mradds (X, CCoeffs[j], U);
X = vec_ld (0, &chrSrc[j][i/2+2048]);
V = vec_mradds (X, CCoeffs[j], V);
}
/* scale and clip signals */
Y0 = vec_sra (Y0, SCL);
Y1 = vec_sra (Y1, SCL);
U = vec_sra (U, SCL);
V = vec_sra (V, SCL);
Y0 = vec_clip (Y0);
Y1 = vec_clip (Y1);
U = vec_clip (U);
V = vec_clip (V);
/* now we have
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7
V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7
*/
U0 = vec_mergeh (U,U);
V0 = vec_mergeh (V,V);
U1 = vec_mergel (U,U);
V1 = vec_mergel (V,V);
cvtyuvtoRGB (Y0,U0,V0,&R0,&G0,&B0);
cvtyuvtoRGB (Y1,U1,V1,&R1,&G1,&B1);
R = vec_packs (R0,R1);
G = vec_packs (G0,G1);
B = vec_packs (B0,B1);
out_rgba (R,G,B,out);
}
if (i < dstW) {
i -= 16;
Y0 = RND;
Y1 = RND;
/* extract 16 coeffs from lumSrc */
for(j=0; j<lumFilterSize; j++) {
X0 = vec_ld (0, &lumSrc[j][i]);
X1 = vec_ld (16, &lumSrc[j][i]);
Y0 = vec_mradds (X0, YCoeffs[j], Y0);
Y1 = vec_mradds (X1, YCoeffs[j], Y1);
}
U = RND;
V = RND;
/* extract 8 coeffs from U,V */
for(j=0; j<chrFilterSize; j++) {
X = vec_ld (0, &chrSrc[j][i/2]);
U = vec_mradds (X, CCoeffs[j], U);
X = vec_ld (0, &chrSrc[j][i/2+2048]);
V = vec_mradds (X, CCoeffs[j], V);
}
/* scale and clip signals */
Y0 = vec_sra (Y0, SCL);
Y1 = vec_sra (Y1, SCL);
U = vec_sra (U, SCL);
V = vec_sra (V, SCL);
Y0 = vec_clip (Y0);
Y1 = vec_clip (Y1);
U = vec_clip (U);
V = vec_clip (V);
/* now we have
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7
V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7
*/
U0 = vec_mergeh (U,U);
V0 = vec_mergeh (V,V);
U1 = vec_mergel (U,U);
V1 = vec_mergel (V,V);
cvtyuvtoRGB (Y0,U0,V0,&R0,&G0,&B0);
cvtyuvtoRGB (Y1,U1,V1,&R1,&G1,&B1);
R = vec_packs (R0,R1);
G = vec_packs (G0,G1);
B = vec_packs (B0,B1);
nout = (vector unsigned char *)scratch;
out_rgba (R,G,B,nout);
memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
}
}
initALTIVECScaler (SwsContext *c, int dstW)
{
int lumFilterSize = c->hLumFilterSize;
int chrFilterSize = c->hChrFilterSize;
YCoeffs = malloc (sizeof (vector signed short)*lumFilterSize);
CCoeffs = malloc (sizeof (vector signed short)*chrFilterSize);
}
releaseALTIVECScaler (SwsContext *c)
{
if (YCoeffs) free (YCoeffs);
if (CCoeffs) free (CCoeffs);
}
More information about the MPlayer-dev-eng
mailing list