[FFmpeg-cvslog] commit: H.264: Partially inline CABAC residual	decoding ( Jason Garrett-Glaser )
    git at videolan.org 
    git
       
    Sat Jan 15 19:37:54 CET 2011
    
    
  
ffmpeg | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sat Jan 15 17:52:48 2011 +0000| [f14bdd8e756e7a2bc17f7202d880a6dc283df924] | committer: Jason Garrett-Glaser 
H.264: Partially inline CABAC residual decoding
Improves CABAC performance about ~1.2%.
Trick originates from x264 and has also been used in ffvp8.  It's useful because
coded block flags are usually zero, so it helps to have the early termination
inlined into the main function.
Originally committed as revision 26375 to svn://svn.ffmpeg.org/ffmpeg/trunk
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f14bdd8e756e7a2bc17f7202d880a6dc283df924
---
 libavcodec/h264_cabac.c |   60 ++++++++++++++++++++++++++--------------------
 1 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index d732932..84dde0b 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1049,29 +1049,6 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
 #define CC &h->cabac
 #endif
 
-
-    /* cat: 0-> DC 16x16  n = 0
-     *      1-> AC 16x16  n = luma4x4idx
-     *      2-> Luma4x4   n = luma4x4idx
-     *      3-> DC Chroma n = iCbCr
-     *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
-     *      5-> Luma8x8   n = 4 * luma8x8idx
-     */
-
-    /* read coded block flag */
-    if( is_dc || cat != 5 ) {
-        if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
-            h->non_zero_count_cache[scan8[n]] = 0;
-
-#ifdef CABAC_ON_STACK
-            h->cabac.range     = cc.range     ;
-            h->cabac.low       = cc.low       ;
-            h->cabac.bytestream= cc.bytestream;
-#endif
-            return;
-        }
-    }
-
     significant_coeff_ctx_base = h->cabac_state
         + significant_coeff_flag_offset[MB_FIELD][cat];
     last_coeff_ctx_base = h->cabac_state
@@ -1172,19 +1149,50 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
 
 }
 
-static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
+static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
     decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
 }
 
-static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
 }
 
+/* cat: 0-> DC 16x16  n = 0
+ *      1-> AC 16x16  n = luma4x4idx
+ *      2-> Luma4x4   n = luma4x4idx
+ *      3-> DC Chroma n = iCbCr
+ *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
+ *      5-> Luma8x8   n = 4 * luma8x8idx */
+
+/* Partially inline the CABAC residual decode: inline the coded block flag.
+ * This has very little impact on binary size and improves performance
+ * because it allows improved constant propagation into get_cabac_cbf_ctx,
+ * as well as because most blocks have zero CBFs. */
+
+static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
+    /* read coded block flag */
+    if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 1 ) ] ) == 0 ) {
+        h->non_zero_count_cache[scan8[n]] = 0;
+        return;
+    }
+    decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
+}
+
+static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+    /* read coded block flag */
+    if( cat != 5 && get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 0 ) ] ) == 0 ) {
+        h->non_zero_count_cache[scan8[n]] = 0;
+        return;
+    }
+    decode_cabac_residual_nondc_internal( h, block, cat, n, scantable, qmul, max_coeff );
+}
+
 /**
  * decodes a macroblock
  * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
 int ff_h264_decode_mb_cabac(H264Context *h) {
+    START_TIMER;
     MpegEncContext * const s = &h->s;
     int mb_xy;
     int mb_type, partition_count, cbp = 0;
@@ -1717,6 +1725,6 @@ decode_intra_mb:
         h->ref_count[0] >>= 1;
         h->ref_count[1] >>= 1;
     }
-
+STOP_TIMER("test");
     return 0;
 }
    
    
More information about the ffmpeg-cvslog
mailing list