[FFmpeg-devel] [PATCH 4/5] startcode: Don't return false positives
Andreas Rheinhardt
andreas.rheinhardt at gmail.com
Sun Jun 2 01:47:18 EEST 2019
Until now the function ff_startcode_find_candidate_c did not really
search for startcodes (the startcode 0x00 0x00 0x01 (used in
MPEG-1/2/4, VC-1 and H.264/5) is the only startcode meant here). Instead
it searched for zero bytes and returned the earliest position of a zero
byte. This of course led to lots of false positives - millions per GB
of video.
This has been changed: The first position of the buffer that
may be part of a four-byte startcode is now returned. This excludes zero
bytes that are known not to belong to a startcode, but zero bytes at the
end of a buffer that might be part of a startcode whose second part is in
the next buffer are also returned. This is in accordance with the
expectations of the current callers of ff_startcode_find_candidate_c,
namely the H.264 parser and the VC-1 parser. This is also the reason why
"find_candidate" in the name of the function has been kept.
Getting rid of lots of function calls with their accompanying overhead
of course brings a speedup with it: Here are some benchmarks for
a 30.2 Mb/s transport stream A (10 iterations of 8192 runs each) and
another 7.4 Mb/s transport stream B (10 iterations of 131072 runs each)
on an x64 Haswell:
| vanilla | aligned | current: aligned,
| (unaligned) | reads | no false positives
--|-------------|---------|-------------------
A | 411578 | 424503 | 323355
B | 55476 | 58326 | 44147
vanilla refers to the state before the switch to aligned reads;
"aligned reads" refers to the state after the switch to aligned reads.
(Calls to h264_find_frame_end have been timed; given that the amount
of calls to ff_startcode_find_candidate_c has been reduced considerably,
timing them directly would be worthless.)
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
---
libavcodec/h264dsp.h | 7 +--
libavcodec/startcode.c | 98 +++++++++++++++++++++++++++++++++++++-----
libavcodec/vc1dsp.h | 6 +--
3 files changed, 91 insertions(+), 20 deletions(-)
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index cbea3173c6..51de7a4e21 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -108,11 +108,8 @@ typedef struct H264DSPContext {
void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
/**
- * Search buf from the start for up to size bytes. Return the index
- * of a zero byte, or >= size if not found. Ideally, use lookahead
- * to filter out any zero bytes that are known to not be followed by
- * one or more further zero bytes and a one byte. Better still, filter
- * out any bytes that form the trailing_zero_8bits syntax element too.
+ * Search buf from the start for up to size bytes. Return the first 0x00
+ * that might be part of a (three or four) byte startcode.
*/
int (*startcode_find_candidate)(const uint8_t *buf, int size);
} H264DSPContext;
diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
index b027c191c0..f6105289f1 100644
--- a/libavcodec/startcode.c
+++ b/libavcodec/startcode.c
@@ -22,7 +22,8 @@
* @file
* Accelerated start code search function for start codes common to
* MPEG-1/2/4 video, VC-1, H.264/5
- * @author Michael Niedermayer <michaelni at gmx.at>
+ * @author Michael Niedermayer <michaelni at gmx.at>,
+ * @author Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
*/
#include "startcode.h"
@@ -31,20 +32,60 @@
int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
{
- const uint8_t *start = buf, *end = buf + size;
+ const uint8_t *start = buf, *end = buf + size, *temp;
#define INITIALIZATION(mod) do { \
- for (; buf < end && (uintptr_t)buf % mod; buf++) \
- if (!*buf) \
- return buf - start; \
+ if (end - start <= mod + 1) \
+ goto near_end; \
+ /* From this point on until the end of the MAIN_LOOP, \
+ * buf is the earliest possible position of a 0x00 \
+ * immediately preceding a startcode's 0x01, i.e. \
+ * everything from start to buf (inclusive) is known \
+ * to not contain a startcode's 0x01. */ \
+ buf += 1; \
+ temp = (const uint8_t *)((uintptr_t)(buf + mod - 1) / mod * mod); \
+ goto startcode_check; \
} while (0)
#define READ(bitness) AV_RN ## bitness ## A
+#define STARTCODE_CHECK(offset) do { \
+ if (buf[2 - offset] > 1) { \
+ buf += 3; \
+ } else if (buf[1 - offset]) { \
+ buf += 2; \
+ } else if (buf[-offset] || buf[2 - offset] == 0) { \
+ buf += 1; \
+ } else { \
+ buf += 1 - offset; \
+ goto found_startcode; \
+ } \
+ } while (0)
+
+ /* The MAIN_LOOP tries to read several bytes at once.
+ * A startcode's 0x00 0x01 or 0x00 0x00 will be detected
+ * by it if these bytes are contained within the bytes
+ * read at once. */
#define MAIN_LOOP(bitness, mask1, mask2) do { \
- for (; buf <= end - bitness / 8; buf += bitness / 8) \
- if ((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1)) \
- & mask2) \
- break; \
+ for (; buf < end - bitness / 8 - 1; ) { \
+ if (!((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1)) \
+ & mask2)) { \
+ buf += bitness / 8; \
+ continue; \
+ } \
+ temp = buf + bitness / 8; \
+ /* If the 0x01 of a startcode is at position buf + 1, \
+ * the following check detects it. \
+ * Because buf gets incremented before entering this \
+ * loop, buf - 1 may be evaluated. \
+ * Because temp + 1 < end, buf is always <= end during \
+ * the closing check so that the pointer comparison is \
+ * defined. */ \
+ startcode_check: \
+ do { \
+ STARTCODE_CHECK(1); \
+ } while (buf < temp); \
+ buf = temp; \
+ } \
} while (0)
#if HAVE_FAST_64BIT
@@ -54,8 +95,43 @@ int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
INITIALIZATION(4);
MAIN_LOOP(32, 0x01010101U, 0x80808080U);
#endif
- for (; buf < end; buf++)
- if (!*buf)
+
+ /* For the end, buf is the earliest possible position
+ * of a three-byte startcode's leading 0x00. This is
+ * done in order not to run into the undefined behaviour
+ * explained below. */
+ buf--;
+near_end:
+ while (2 < end - buf) {
+ /* The STARTCODE_CHECK in the MAIN_LOOP can't be used
+ * to check whether the file ends with a startcode,
+ * because for this buf would need to be end - 2.
+ * But depending on the value of end[-1], buf might get
+ * incremented by 3 and therefore be beyond end. But
+ * pointer arithmetic beyond end is undefined behaviour.
+ * So a STARTCODE_CHECK with a different offset is used
+ * here: It detects whether buf points to the first 0x00
+ * of a three-byte startcode. */
+ STARTCODE_CHECK(0);
+ }
+
+ /* No startcode found, but if the last bytes vanish,
+ * they may be part of a startcode whose end is not
+ * in the current buffer. Return the offset of the
+ * earliest element that may belong to a startcode. */
+ for (buf = end; buf > (end - start > 3 ? end - 3 : start); buf--)
+ if (buf[-1])
break;
+
+ return buf - start;
+
+found_startcode:
+ /* buf points to the byte preceding a startcode's 0x01.
+ * Check whether it is a four-byte startcode. */
+
+ buf -= 1;
+ if (buf > start && buf[-1] == 0)
+ buf--;
+
return buf - start;
}
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index 75db62b1b4..be9468add3 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -74,10 +74,8 @@ typedef struct VC1DSPContext {
int alpha, int width);
/**
- * Search buf from the start for up to size bytes. Return the index
- * of a zero byte, or >= size if not found. Ideally, use lookahead
- * to filter out any zero bytes that are known to not be followed by
- * one or more further zero bytes and a one byte.
+ * Search buf from the start for up to size bytes. Return the first 0x00
+ * that might be part of a (three or four) byte startcode.
*/
int (*startcode_find_candidate)(const uint8_t *buf, int size);
} VC1DSPContext;
--
2.21.0
More information about the ffmpeg-devel
mailing list