[FFmpeg-devel] [PATCH 3/7] Made QP, nonBQP, and pQPb arrays
Tucker DiNapoli
t.dinapoli42 at gmail.com
Fri Mar 27 21:51:44 CET 2015
From: Tucker DiNapoli <T.DiNapoli42 at gmail.com>
Also pulled QP initialization out of inner loop.
Added some dummy fields to PPContext to allow current code to work while
changing QP stuff.
---
libpostproc/postprocess_internal.h | 6 ++
libpostproc/postprocess_template.c | 138 ++++++++++++++++++-------------------
2 files changed, 74 insertions(+), 70 deletions(-)
diff --git a/libpostproc/postprocess_internal.h b/libpostproc/postprocess_internal.h
index 1ebd974..ccf862a 100644
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -143,6 +143,9 @@ typedef struct PPContext{
DECLARE_ALIGNED(8, uint64_t, pQPb);
DECLARE_ALIGNED(8, uint64_t, pQPb2);
+ DECLARE_ALIGNED(8, uint64_t, pQPb_block)[4];
+ DECLARE_ALIGNED(8, uint64_t, pQPb2_block)[4];
+
DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];
@@ -153,6 +156,9 @@ typedef struct PPContext{
int QP;
int nonBQP;
+ QP_STORE_T QP_block[4];
+ QP_STORE_T nonBQP_block[4];
+
int frameNum;
int cpuCaps;
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 6377ea7..344152e 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3416,7 +3416,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#endif
const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
- int QP=0;
+ int QP=0, nonBQP=0;
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
if not than use a temporary buffer */
if(y+15 >= height){
@@ -3449,58 +3449,69 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
int endx = FFMIN(width, x+32);
uint8_t *dstBlockStart = dstBlock;
const uint8_t *srcBlockStart = srcBlock;
- for(; x < endx; x+=BLOCK_SIZE){
- prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
- prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
- prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
- prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
-
- RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
- srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+ int qp_index = 0;
+ for(qp_index=0; qp_index < 4; qp_index+=1){
+ QP = QPptr[(x+qp_index*8)>>qpHShift];
+ nonBQP = nonBQPptr[(x+qp_index*8)>>qpHShift];
+ if(!isColor){
+ QP= (QP* QPCorrecture + 256*128)>>16;
+ nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
+ yHistogram[srcBlock[srcStride*12 + 4]]++;
+ }
+ c.QP_block[qp_index]= QP;
+ c.nonBQP_block[qp_index]= nonBQP;
+#if TEMPLATE_PP_MMX
+ __asm__ volatile(
+ "movd %1, %%mm7 \n\t"
+ "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
+ "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
+ "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
+ "movq %%mm7, %0 \n\t"
+ : "=m" (c.pQPb_block[qp_index])
+ : "r" (QP)
+ );
+#endif
+ }
+ for(; x < endx; x+=BLOCK_SIZE){
+ prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+ prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+ prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+ prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
+
+ RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
+ srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+
+ if(mode & LINEAR_IPOL_DEINT_FILTER)
+ RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
+ else if(mode & LINEAR_BLEND_DEINT_FILTER)
+ RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
+ else if(mode & MEDIAN_DEINT_FILTER)
+ RENAME(deInterlaceMedian)(dstBlock, dstStride);
+ else if(mode & CUBIC_IPOL_DEINT_FILTER)
+ RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
+ else if(mode & FFMPEG_DEINT_FILTER)
+ RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+ else if(mode & LOWPASS5_DEINT_FILTER)
+ RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
+ /* else if(mode & CUBIC_BLEND_DEINT_FILTER)
+ RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
+ */
+ dstBlock+=8;
+ srcBlock+=8;
+ }
- if(mode & LINEAR_IPOL_DEINT_FILTER)
- RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
- else if(mode & LINEAR_BLEND_DEINT_FILTER)
- RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
- else if(mode & MEDIAN_DEINT_FILTER)
- RENAME(deInterlaceMedian)(dstBlock, dstStride);
- else if(mode & CUBIC_IPOL_DEINT_FILTER)
- RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
- else if(mode & FFMPEG_DEINT_FILTER)
- RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
- else if(mode & LOWPASS5_DEINT_FILTER)
- RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
-/* else if(mode & CUBIC_BLEND_DEINT_FILTER)
- RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
-*/
- dstBlock+=8;
- srcBlock+=8;
- }
+ qp_index = 0;
+ dstBlock = dstBlockStart;
+ srcBlock = srcBlockStart;
- dstBlock = dstBlockStart;
- srcBlock = srcBlockStart;
+ for(x = startx; x < endx; x+=BLOCK_SIZE){
+ const int stride= dstStride;
+ //temporary while changing QP stuff to make things continue to work
+ c.QP = c.QP_block[qp_index];
+ c.nonBQP = c.nonBQP_block[qp_index];
+ c.pQPb = c.pQPb_block[qp_index];
+ c.pQPb2 = c.pQPb2_block[qp_index++];
- for(x = startx; x < endx; x+=BLOCK_SIZE){
- const int stride= dstStride;
- QP = QPptr[x>>qpHShift];
- c.nonBQP = nonBQPptr[x>>qpHShift];
- if(!isColor){
- QP= (QP* QPCorrecture + 256*128)>>16;
- c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
- yHistogram[srcBlock[srcStride*12 + 4]]++;
- }
- c.QP= QP;
-#if TEMPLATE_PP_MMX
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
- "movq %%mm7, %0 \n\t"
- : "=m" (c.pQPb)
- : "r" (QP)
- );
-#endif
/* only deblock if we have 2 blocks */
if(y + 8 < height){
if(mode & V_X1_FILTER)
@@ -3521,6 +3532,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
srcBlock+=8;
}
+ qp_index = 0;
dstBlock = dstBlockStart;
srcBlock = srcBlockStart;
@@ -3528,26 +3540,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
const int stride= dstStride;
av_unused uint8_t *tmpXchg;
- if(isColor){
- QP= QPptr[x>>qpHShift];
- c.nonBQP= nonBQPptr[x>>qpHShift];
- }else{
- QP= QPptr[x>>4];
- QP= (QP* QPCorrecture + 256*128)>>16;
- c.nonBQP= nonBQPptr[x>>4];
- c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
- }
- c.QP= QP;
+
+ c.QP = c.QP_block[qp_index];
+ c.nonBQP = c.nonBQP_block[qp_index];
+ c.pQPb = c.pQPb_block[qp_index];
+ c.pQPb2 = c.pQPb2_block[qp_index++];
#if TEMPLATE_PP_MMX
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
- "movq %%mm7, %0 \n\t"
- : "=m" (c.pQPb)
- : "r" (QP)
- );
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
#endif
/* check if we have a previous block to deblock it with dstBlock */
@@ -3569,7 +3567,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#else
if(mode & H_X1_FILTER)
- horizX1Filter(dstBlock-4, stride, QP);
+ horizX1Filter(dstBlock-4, stride, c.QP);
else if(mode & H_DEBLOCK){
#if TEMPLATE_PP_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
--
2.3.3
More information about the ffmpeg-devel
mailing list