[FFmpeg-devel] [PATCH 2/2] libavcodec/ffv1: Support storing LSB raw
Lynne
dev at lynne.ee
Wed Oct 16 03:13:35 EEST 2024
On 16/10/2024 01:17, Michael Niedermayer wrote:
> This makes a 16bit RGB raw sample 25% faster at a 2% loss of compression with rawlsb=4
>
> Please test and comment
>
> This stores the LSB through non binary range coding, this is simpler than using a
> separate coder
> For cases where range coding is not wanted its probably best to use golomb rice
> for everything.
>
> We also pass the LSB through the decorrelation and context stages (which is basically free)
> this leads to slightly better compression than separating them earlier.
>
> Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
> ---
> libavcodec/ffv1.h | 2 ++
> libavcodec/ffv1_template.c | 19 ++++++++++---------
> libavcodec/ffv1dec.c | 2 ++
> libavcodec/ffv1dec_template.c | 16 +++++++++++++---
> libavcodec/ffv1enc.c | 15 ++++++++++++++-
> libavcodec/ffv1enc_template.c | 17 +++++++++++++++--
> libavcodec/rangecoder.h | 20 ++++++++++++++++++++
> libavcodec/tests/rangecoder.c | 9 +++++++++
> 8 files changed, 85 insertions(+), 15 deletions(-)
>
> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> index 4f5a8ab2be7..02bfc33f680 100644
> --- a/libavcodec/ffv1.h
> +++ b/libavcodec/ffv1.h
> @@ -83,6 +83,7 @@ typedef struct FFV1SliceContext {
> int slice_coding_mode;
> int slice_rct_by_coef;
> int slice_rct_ry_coef;
> + int rawlsb;
>
> // RefStruct reference, array of MAX_PLANES elements
> PlaneContext *plane;
> @@ -139,6 +140,7 @@ typedef struct FFV1Context {
> int key_frame_ok;
> int context_model;
> int qtable;
> + int rawlsb;
>
> int bits_per_raw_sample;
> int packed_at_lsb;
> diff --git a/libavcodec/ffv1_template.c b/libavcodec/ffv1_template.c
> index abb90a12e49..10206702ee8 100644
> --- a/libavcodec/ffv1_template.c
> +++ b/libavcodec/ffv1_template.c
> @@ -30,24 +30,25 @@ static inline int RENAME(predict)(TYPE *src, TYPE *last)
> }
>
> static inline int RENAME(get_context)(const int16_t quant_table[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE],
> - TYPE *src, TYPE *last, TYPE *last2)
> + TYPE *src, TYPE *last, TYPE *last2, int rawlsb)
> {
> const int LT = last[-1];
> const int T = last[0];
> const int RT = last[1];
> const int L = src[-1];
> + const int rawoff = (1<<rawlsb) >> 1;
>
> if (quant_table[3][127] || quant_table[4][127]) {
> const int TT = last2[0];
> const int LL = src[-2];
> - return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
> - quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
> - quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK] +
> - quant_table[3][(LL - L) & MAX_QUANT_TABLE_MASK] +
> - quant_table[4][(TT - T) & MAX_QUANT_TABLE_MASK];
> + return quant_table[0][(L - LT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
> + quant_table[1][(LT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
> + quant_table[2][(T - RT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
> + quant_table[3][(LL - L + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
> + quant_table[4][(TT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK];
> } else
> - return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
> - quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
> - quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK];
> + return quant_table[0][(L - LT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
> + quant_table[1][(LT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
> + quant_table[2][(T - RT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK];
> }
>
> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
> index 5c099e49ad4..fc96bfb4cea 100644
> --- a/libavcodec/ffv1dec.c
> +++ b/libavcodec/ffv1dec.c
> @@ -249,6 +249,8 @@ static int decode_slice_header(const FFV1Context *f,
> return AVERROR_INVALIDDATA;
> }
> }
> + if (f->micro_version > 2)
> + sc->rawlsb = get_symbol(c, state, 0);
> }
>
> return 0;
> diff --git a/libavcodec/ffv1dec_template.c b/libavcodec/ffv1dec_template.c
> index 2da6bd935dc..dbdcad7768e 100644
> --- a/libavcodec/ffv1dec_template.c
> +++ b/libavcodec/ffv1dec_template.c
> @@ -60,8 +60,13 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
> return AVERROR_INVALIDDATA;
> }
>
> - context = RENAME(get_context)(quant_table,
> - sample[1] + x, sample[0] + x, sample[1] + x);
> + if (sc->rawlsb) {
> + context = RENAME(get_context)(quant_table,
> + sample[1] + x, sample[0] + x, sample[1] + x, sc->rawlsb);
> + } else {
> + context = RENAME(get_context)(quant_table,
> + sample[1] + x, sample[0] + x, sample[1] + x, 0);
> + }
> if (context < 0) {
> context = -context;
> sign = 1;
> @@ -71,7 +76,12 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
> av_assert2(context < p->context_count);
>
> if (ac != AC_GOLOMB_RICE) {
> - diff = get_symbol_inline(c, p->state[context], 1);
> + if (sc->rawlsb) {
> + const int rawoff = (1<<sc->rawlsb) >> 1;
> + diff = get_rac_raw(c, sc->rawlsb);
> + diff += (get_symbol_inline(c, p->state[context], 1) << sc->rawlsb) - rawoff;
> + } else
> + diff = get_symbol_inline(c, p->state[context], 1);
> } else {
> if (context == 0 && run_mode == 0)
> run_mode = 1;
> diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
> index 0dbfebc1a1a..c574c739380 100644
> --- a/libavcodec/ffv1enc.c
> +++ b/libavcodec/ffv1enc.c
> @@ -416,7 +416,7 @@ static int write_extradata(FFV1Context *f)
> if (f->version == 3) {
> f->micro_version = 4;
> } else if (f->version == 4)
> - f->micro_version = 2;
> + f->micro_version = 3;
> put_symbol(&c, state, f->micro_version, 0);
> }
>
> @@ -564,6 +564,9 @@ static av_cold int encode_init(AVCodecContext *avctx)
> if (s->ec == 2)
> s->version = FFMAX(s->version, 4);
>
> + if (s->rawlsb)
> + s->version = FFMAX(s->version, 4);
> +
> if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
> av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features but version 2 or 4 is experimental and not enabled\n");
> return AVERROR_INVALIDDATA;
> @@ -716,6 +719,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
> }
> }
>
> + if (s->rawlsb > s->bits_per_raw_sample) {
> + av_log(avctx, AV_LOG_ERROR, "too many raw lsb\n");
> + return AVERROR(EINVAL);
> + }
> +
> if (s->ac == AC_RANGE_CUSTOM_TAB) {
> for (i = 1; i < 256; i++)
> s->state_transition[i] = ver2_state[i];
> @@ -958,6 +966,7 @@ static void encode_slice_header(FFV1Context *f, FFV1SliceContext *sc)
> put_symbol(c, state, sc->slice_rct_by_coef, 0);
> put_symbol(c, state, sc->slice_rct_ry_coef, 0);
> }
> + put_symbol(c, state, sc->rawlsb, 0);
> }
> }
>
> @@ -1077,6 +1086,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
> sc->slice_rct_ry_coef = 1;
> }
>
> + sc->rawlsb = f->rawlsb; // we do not optimize this per slice, but other encoders could
> +
> retry:
> if (f->key_frame)
> ff_ffv1_clear_slice_state(f, sc);
> @@ -1291,6 +1302,8 @@ static const AVOption options[] = {
> { .i64 = 0 }, 0, 1, VE },
> { "qtable", "Quantization table", OFFSET(qtable), AV_OPT_TYPE_INT,
> { .i64 = -1 }, -1, 2, VE },
> + { "rawlsb", "number of LSBs stored RAW", OFFSET(rawlsb), AV_OPT_TYPE_INT,
> + { .i64 = 0 }, 0, 16, VE },
>
> { NULL }
> };
> diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
> index bc14926ab95..848328c70af 100644
> --- a/libavcodec/ffv1enc_template.c
> +++ b/libavcodec/ffv1enc_template.c
> @@ -62,8 +62,14 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
> for (x = 0; x < w; x++) {
> int diff, context;
>
> - context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
> - sample[0] + x, sample[1] + x, sample[2] + x);
> + if (f->rawlsb) {
> + context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
> + sample[0] + x, sample[1] + x, sample[2] + x, f->rawlsb);
> + } else {
> + //try to force a version with rawlsb optimized out
> + context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
> + sample[0] + x, sample[1] + x, sample[2] + x, 0);
> + }
> diff = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
>
> if (context < 0) {
> @@ -74,6 +80,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
> diff = fold(diff, bits);
>
> if (ac != AC_GOLOMB_RICE) {
> + if (f->rawlsb) {
> + const int rawoff = (1<<f->rawlsb) >> 1;
> + const unsigned mask = (1<<f->rawlsb) - 1;
> + diff += rawoff;
> + put_rac_raw(c, (diff & mask), f->rawlsb);
> + diff = diff >> f->rawlsb; // Note, this will be biased on small rawlsb
> + }
> if (pass1) {
> put_symbol_inline(c, p->state[context], diff, 1, sc->rc_stat,
> sc->rc_stat2[p->quant_table_index][context]);
> diff --git a/libavcodec/rangecoder.h b/libavcodec/rangecoder.h
> index 89d178ac314..d02a65fa7da 100644
> --- a/libavcodec/rangecoder.h
> +++ b/libavcodec/rangecoder.h
> @@ -111,6 +111,16 @@ static inline void put_rac(RangeCoder *c, uint8_t *const state, int bit)
> renorm_encoder(c);
> }
>
> +static inline void put_rac_raw(RangeCoder *c, int bits, int len)
> +{
> + int r = c->range >> len;
> +
> + c->low += r * bits;
> + c->range = r;
> +
> + renorm_encoder(c);
> +}
> +
> static inline void refill(RangeCoder *c)
> {
> if (c->range < 0x100) {
> @@ -142,4 +152,14 @@ static inline int get_rac(RangeCoder *c, uint8_t *const state)
> }
> }
>
> +static inline int get_rac_raw(RangeCoder *c, int len)
> +{
> + int r = c->range >> len;
> + int bits = c->low / r;
> + c->low -= r * bits;
> + c->range = r;
> + refill(c);
> + return bits;
> +}
> +
> #endif /* AVCODEC_RANGECODER_H */
> diff --git a/libavcodec/tests/rangecoder.c b/libavcodec/tests/rangecoder.c
> index fd858535a5b..9205be2bf3f 100644
> --- a/libavcodec/tests/rangecoder.c
> +++ b/libavcodec/tests/rangecoder.c
> @@ -76,6 +76,10 @@ int main(void)
> for (i = 0; i < SIZE; i++)
> put_rac(&c, state, r[i] & 1);
>
> + for (i = 0; i < 30; i++) {
> + put_rac_raw(&c, r[i]&7, 3);
> + }
> +
> actual_length = ff_rac_terminate(&c, version);
>
> ff_init_range_decoder(&c, b, version ? SIZE : actual_length);
> @@ -87,6 +91,11 @@ int main(void)
> av_log(NULL, AV_LOG_ERROR, "rac failure at %d pass %d version %d\n", i, p, version);
> return 1;
> }
> + for (i = 0; i < 30; i++)
> + if ((r[i] & 7) != get_rac_raw(&c, 3)) {
> + av_log(NULL, AV_LOG_ERROR, "rac raw failure at %d pass %d version %d\n", i, p, version);
> + return 1;
> + }
>
> if (rac_check_termination(&c, version) < 0) {
> av_log(NULL, AV_LOG_ERROR, "rac failure at termination pass %d version %d\n", p, version);
You're interfering with the rangecoder by asking it to write very random
data in between each symbol.
You should do what Opus does and write the rawbits in a separate buffer
which gets merged at the very end.
I think rather than doing this, you should instead simply permit golomb
coding to be used on high bit-depths.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_0xA2FEA5F03F034464.asc
Type: application/pgp-keys
Size: 624 bytes
Desc: OpenPGP public key
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20241016/4cb9ac0d/attachment.key>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 236 bytes
Desc: OpenPGP digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20241016/4cb9ac0d/attachment.sig>
More information about the ffmpeg-devel
mailing list