[FFmpeg-devel] [PATCH 3/3] cbs_h265: Improve performance of writing slices
Andreas Rheinhardt
andreas.rheinhardt at googlemail.com
Sun Nov 4 06:48:42 EET 2018
Instead of using a combination of bitreader and -writer for copying data,
one can byte-align the (obsolete and removed) bitreader to improve performance.
Given that the H265 slice segment header always has a byte length,
one can normally use memcpy.
With this patch the number of decicycles used to copy the slicedata
went down from 181395 to 8672 for a 830kb/s sample with 16384 runs.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at googlemail.com>
---
libavcodec/cbs_h2645.c | 70 +++++++++++++++++++++++++++++-------------
1 file changed, 48 insertions(+), 22 deletions(-)
diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
index d3a41fbdf0..d9ea498faa 100644
--- a/libavcodec/cbs_h2645.c
+++ b/libavcodec/cbs_h2645.c
@@ -1279,39 +1279,65 @@ static int cbs_h265_write_nal_unit(CodedBitstreamContext *ctx,
case HEVC_NAL_CRA_NUT:
{
H265RawSlice *slice = unit->content;
- GetBitContext gbc;
- int bits_left, end, zeroes;
err = cbs_h265_write_slice_segment_header(ctx, pbc, &slice->header);
if (err < 0)
return err;
if (slice->data) {
+ size_t rest = slice->data_size - (slice->data_bit_start + 7) / 8;
+ uint8_t *pos = slice->data + slice->data_bit_start / 8;
+
+ av_assert0(slice->data_bit_start >= 0 &&
+ 8 * slice->data_size > slice->data_bit_start);
+
if (slice->data_size * 8 + 8 > put_bits_left(pbc))
return AVERROR(ENOSPC);
- init_get_bits(&gbc, slice->data, slice->data_size * 8);
- skip_bits_long(&gbc, slice->data_bit_start);
-
- // Copy in two-byte blocks, but stop before copying the
- // rbsp_stop_one_bit in the final byte.
- while (get_bits_left(&gbc) > 23)
- put_bits(pbc, 16, get_bits(&gbc, 16));
-
- bits_left = get_bits_left(&gbc);
- end = get_bits(&gbc, bits_left);
-
- // rbsp_stop_one_bit must be present here.
- av_assert0(end);
- zeroes = ff_ctz(end);
- if (bits_left > zeroes + 1)
- put_bits(pbc, bits_left - zeroes - 1,
- end >> (zeroes + 1));
- put_bits(pbc, 1, 1);
- while (put_bits_count(pbc) % 8 != 0)
- put_bits(pbc, 1, 0);
+ if (!rest)
+ goto rbsp_stop_one_bit;
+
+ // First copy the remaining bits of the first byte
+ // The above check ensures that we do not accidentally
+ // copy beyond the rbsp_stop_one_bit.
+ if (slice->data_bit_start % 8)
+ put_bits(pbc, 8 - slice->data_bit_start % 8,
+ *pos++ & MAX_UINT_BITS(8 - slice->data_bit_start % 8));
+
+ if (put_bits_count(pbc) % 8 == 0) {
+ // If the writer is aligned at this point,
+ // memcpy can be used to improve performance.
+ // This is the normal case.
+ flush_put_bits(pbc);
+ memcpy(put_bits_ptr(pbc), pos, rest);
+ skip_put_bytes(pbc, rest);
+ break;
+ } else {
+ // If not, we have to copy manually.
+ // rbsp_stop_one_bit forces us to special-case
+ // the last byte.
+ for (; rest > 4; rest -= 4, pos += 4)
+ put_bits32(pbc, AV_RB32(pos));
+
+ for (; rest > 1; rest--, pos++)
+ put_bits(pbc, 8, *pos);
+ }
+
+ rbsp_stop_one_bit: {
+ int i;
+ uint8_t temp = rest ? *pos : *pos & MAX_UINT_BITS(8 -
+ slice->data_bit_start % 8);
+ av_assert0(temp);
+ i = ff_ctz(*pos);
+ temp = temp >> i;
+ i = rest ? (8 - i) : (8 - i - slice->data_bit_start % 8);
+ put_bits(pbc, i, temp);
+ if (put_bits_count(pbc) % 8)
+ put_bits(pbc, 8 - put_bits_count(pbc) % 8, 0U);
+ }
} else {
// No slice data - that was just the header.
+ // (Bitstream may be unaligned!)
}
}
break;
--
2.19.0
More information about the ffmpeg-devel
mailing list