[FFmpeg-devel] [PATCH 1/2] libavcodec/ffv1: Support storing decorrelated LSB raw without rangecoder
Michael Niedermayer
michael at niedermayer.cc
Fri Mar 7 02:36:02 EET 2025
With 16bit float rawlsb 2 gives 0.66% better compression. This is maybe due
to the quantization tables being tuned to smaller number of bits
rawlsb 4 is about 30% faster than 0 and about 1% worse compression
Above was tested using ACES_OT_VWG_SampleFrames
Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
---
libavcodec/ffv1.h | 4 ++++
libavcodec/ffv1_template.c | 19 ++++++++++---------
libavcodec/ffv1dec.c | 16 ++++++++++++++--
libavcodec/ffv1dec_template.c | 20 ++++++++++++++++----
libavcodec/ffv1enc.c | 30 ++++++++++++++++++++++++++++--
libavcodec/ffv1enc_template.c | 22 +++++++++++++++++++---
6 files changed, 91 insertions(+), 20 deletions(-)
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index c23d64d54a4..189004f7981 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -85,10 +85,13 @@ typedef struct FFV1SliceContext {
int slice_rct_by_coef;
int slice_rct_ry_coef;
int remap;
+ int rawlsb;
// RefStruct reference, array of MAX_PLANES elements
PlaneContext *plane;
PutBitContext pb;
+ PutBitContext rawlsb_pb;
+ GetBitContext rawlsb_gb;
RangeCoder c;
int ac_byte_count; ///< number of bytes used for AC coding
@@ -146,6 +149,7 @@ typedef struct FFV1Context {
int key_frame_ok;
int context_model;
int qtable;
+ int rawlsb;
int bits_per_raw_sample;
int packed_at_lsb;
diff --git a/libavcodec/ffv1_template.c b/libavcodec/ffv1_template.c
index abb90a12e49..10206702ee8 100644
--- a/libavcodec/ffv1_template.c
+++ b/libavcodec/ffv1_template.c
@@ -30,24 +30,25 @@ static inline int RENAME(predict)(TYPE *src, TYPE *last)
}
static inline int RENAME(get_context)(const int16_t quant_table[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE],
- TYPE *src, TYPE *last, TYPE *last2)
+ TYPE *src, TYPE *last, TYPE *last2, int rawlsb)
{
const int LT = last[-1];
const int T = last[0];
const int RT = last[1];
const int L = src[-1];
+ const int rawoff = (1<<rawlsb) >> 1;
if (quant_table[3][127] || quant_table[4][127]) {
const int TT = last2[0];
const int LL = src[-2];
- return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
- quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
- quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK] +
- quant_table[3][(LL - L) & MAX_QUANT_TABLE_MASK] +
- quant_table[4][(TT - T) & MAX_QUANT_TABLE_MASK];
+ return quant_table[0][(L - LT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+ quant_table[1][(LT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+ quant_table[2][(T - RT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+ quant_table[3][(LL - L + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+ quant_table[4][(TT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK];
} else
- return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
- quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
- quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK];
+ return quant_table[0][(L - LT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+ quant_table[1][(LT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+ quant_table[2][(T - RT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK];
}
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 3926659ebc9..0f2956eabf7 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -227,6 +227,7 @@ static int decode_slice_header(const FFV1Context *f,
av_log(f->avctx, AV_LOG_ERROR, "unsupported remap %d\n", sc->remap);
return AVERROR_INVALIDDATA;
}
+ sc->rawlsb = ff_ffv1_get_symbol(c, state, 0);
}
}
@@ -248,6 +249,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
FFV1Context *f = c->priv_data;
FFV1SliceContext *sc = arg;
int width, height, x, y, ret;
+ int chroma_width, chroma_height;
const int ps = av_pix_fmt_desc_get(f->pix_fmt)->comp[0].step;
AVFrame * const p = f->picture.f;
const int si = sc - f->slices;
@@ -284,6 +286,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
height = sc->slice_height;
x = sc->slice_x;
y = sc->slice_y;
+ chroma_width = AV_CEIL_RSHIFT(width, f->chroma_h_shift);
+ chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
if (ac == AC_GOLOMB_RICE) {
if (f->combined_version >= 0x30002)
@@ -293,11 +297,17 @@ static int decode_slice(AVCodecContext *c, void *arg)
sc->c.bytestream_start + sc->ac_byte_count,
(sc->c.bytestream_end - sc->c.bytestream_start - sc->ac_byte_count) * 8);
}
+ if (sc->rawlsb) {
+ int lsb_size = sc->rawlsb * (width * height * (1 + f->transparency) + chroma_width * chroma_height * 2 * f->chroma_planes);
+ int lsb_size_bytes = (lsb_size + 7) >> 3;
+
+ init_get_bits(&sc->rawlsb_gb,
+ sc->c.bytestream_end - lsb_size_bytes - 3 - 5*!!f->ec,
+ lsb_size);
+ }
av_assert1(width && height);
if (f->colorspace == 0 && (f->chroma_planes || !f->transparency)) {
- const int chroma_width = AV_CEIL_RSHIFT(width, f->chroma_h_shift);
- const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
const int cx = x >> f->chroma_h_shift;
const int cy = y >> f->chroma_v_shift;
decode_plane(f, sc, &gb, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1, ac);
@@ -328,6 +338,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
int v;
get_rac(&sc->c, (uint8_t[]) { 129 });
v = sc->c.bytestream_end - sc->c.bytestream - 2 - 5*!!f->ec;
+ if (sc->rawlsb)
+ v -= get_bits_count(&sc->rawlsb_gb) + 7 >> 3;
if (v) {
av_log(f->avctx, AV_LOG_ERROR, "bytestream end mismatching by %d\n", v);
slice_set_damaged(f, sc);
diff --git a/libavcodec/ffv1dec_template.c b/libavcodec/ffv1dec_template.c
index f2c88734fe1..37caacb758d 100644
--- a/libavcodec/ffv1dec_template.c
+++ b/libavcodec/ffv1dec_template.c
@@ -43,10 +43,12 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
int i;
for (x = 0; x < w; x++) {
int v = 0;
- for (i=0; i<bits; i++) {
+ for (i=0; i<bits - sc->rawlsb; i++) {
uint8_t state = 128;
v += v + get_rac(c, &state);
}
+ if (sc->rawlsb)
+ v = (v << sc->rawlsb) + get_bits(&sc->rawlsb_gb, sc->rawlsb);
sample[1][x] = v;
}
return 0;
@@ -60,8 +62,13 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
return AVERROR_INVALIDDATA;
}
- context = RENAME(get_context)(quant_table,
- sample[1] + x, sample[0] + x, sample[1] + x);
+ if (sc->rawlsb) {
+ context = RENAME(get_context)(quant_table,
+ sample[1] + x, sample[0] + x, sample[1] + x, sc->rawlsb);
+ } else {
+ context = RENAME(get_context)(quant_table,
+ sample[1] + x, sample[0] + x, sample[1] + x, 0);
+ }
if (context < 0) {
context = -context;
sign = 1;
@@ -71,7 +78,12 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
av_assert2(context < p->context_count);
if (ac != AC_GOLOMB_RICE) {
- diff = get_symbol_inline(c, p->state[context], 1);
+ if (sc->rawlsb) {
+ const int rawoff = (1<<sc->rawlsb) >> 1;
+ diff = get_bits(&sc->rawlsb_gb, sc->rawlsb);
+ diff += (get_symbol_inline(c, p->state[context], 1) << sc->rawlsb) - rawoff;
+ } else
+ diff = get_symbol_inline(c, p->state[context], 1);
} else {
if (context == 0 && run_mode == 0)
run_mode = 1;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 1292b4227d3..b4080f29002 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -594,6 +594,9 @@ av_cold int ff_ffv1_encode_init(AVCodecContext *avctx)
if (s->ec == 2)
s->version = FFMAX(s->version, 4);
+ if (s->rawlsb)
+ s->version = FFMAX(s->version, 4);
+
if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features but version 2 or 4 is experimental and not enabled\n");
return AVERROR_INVALIDDATA;
@@ -920,6 +923,11 @@ static int encode_init_internal(AVCodecContext *avctx)
}
}
+ if (s->rawlsb > s->bits_per_raw_sample) {
+ av_log(avctx, AV_LOG_ERROR, "too many raw lsb\n");
+ return AVERROR(EINVAL);
+ }
+
ret = ff_ffv1_encode_init(avctx);
if (ret < 0)
@@ -1003,6 +1011,7 @@ static void encode_slice_header(FFV1Context *f, FFV1SliceContext *sc)
put_symbol(c, state, sc->slice_rct_ry_coef, 0);
}
put_symbol(c, state, sc->remap, 0);
+ put_symbol(c, state, sc->rawlsb, 0);
}
}
@@ -1113,6 +1122,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
const int ps = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step;
int ret;
RangeCoder c_bak = sc->c;
+ const int chroma_width = AV_CEIL_RSHIFT(width, f->chroma_h_shift);
+ const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
const uint8_t *planes[4] = {p->data[0] + ps*x + y*p->linesize[0],
p->data[1] ? p->data[1] + ps*x + y*p->linesize[1] : NULL,
p->data[2] ? p->data[2] + ps*x + y*p->linesize[2] : NULL,
@@ -1127,6 +1138,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
sc->slice_rct_ry_coef = 1;
}
+ sc->rawlsb = f->rawlsb; // we do not optimize this per slice, but other encoders could
+
retry:
if (f->key_frame)
ff_ffv1_clear_slice_state(f, sc);
@@ -1139,10 +1152,15 @@ retry:
sc->c.bytestream_start + sc->ac_byte_count,
sc->c.bytestream_end - sc->c.bytestream_start - sc->ac_byte_count);
}
+ if (sc->rawlsb) {
+ int lsb_size = sc->rawlsb * (width * height * (1 + !!f->transparency) + chroma_width * chroma_height * 2 * f->chroma_planes);
+ int lsb_size_bytes = (lsb_size + 7) >> 3;
+ init_put_bits(&sc->rawlsb_pb,
+ sc->c.bytestream_end - lsb_size_bytes,
+ lsb_size_bytes);
+ }
if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8) {
- const int chroma_width = AV_CEIL_RSHIFT(width, f->chroma_h_shift);
- const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
const int cx = x >> f->chroma_h_shift;
const int cy = y >> f->chroma_v_shift;
@@ -1318,6 +1336,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
if (i > 0 || f->version > 2) {
av_assert0(bytes < pkt->size / f->slice_count);
memmove(buf_p, sc->c.bytestream_start, bytes);
+ if (sc->rawlsb) {
+ flush_put_bits(&sc->rawlsb_pb);
+ av_assert0(bytes + put_bytes_count(&sc->rawlsb_pb, 1) < pkt->size / f->slice_count);
+ memmove(buf_p + bytes, sc->rawlsb_pb.buf, put_bytes_count(&sc->rawlsb_pb, 1));
+ bytes += put_bytes_count(&sc->rawlsb_pb, 1);
+ }
av_assert0(bytes < (1 << 24));
AV_WB24(buf_p + bytes, bytes);
bytes += 3;
@@ -1377,6 +1401,8 @@ static const AVOption options[] = {
{ .i64 = QTABLE_8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" },
{ "greater8bit", NULL, 0, AV_OPT_TYPE_CONST,
{ .i64 = QTABLE_GT8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" },
+ { "rawlsb", "number of LSBs stored RAW", OFFSET(rawlsb), AV_OPT_TYPE_INT,
+ { .i64 = 0 }, 0, 8, VE },
{ NULL }
};
diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
index af3354497ae..9e1252010dd 100644
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c
@@ -34,6 +34,8 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
int run_index = sc->run_index;
int run_count = 0;
int run_mode = 0;
+ const int rawoff = (1<<sc->rawlsb) >> 1;
+ const unsigned mask = (1<<sc->rawlsb) - 1;
if (ac != AC_GOLOMB_RICE) {
if (c->bytestream_end - c->bytestream < w * 35) {
@@ -51,10 +53,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
for (x = 0; x < w; x++) {
int i;
int v = sample[0][x];
- for (i = bits-1; i>=0; i--) {
+
+ for (i = bits-1; i>=sc->rawlsb; i--) {
uint8_t state = 128;
put_rac(c, &state, (v>>i) & 1);
}
+ if (sc->rawlsb)
+ put_bits(&sc->rawlsb_pb, sc->rawlsb, v & mask);
}
return 0;
}
@@ -62,8 +67,14 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
for (x = 0; x < w; x++) {
int diff, context;
- context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
- sample[0] + x, sample[1] + x, sample[2] + x);
+ if (sc->rawlsb) {
+ context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
+ sample[0] + x, sample[1] + x, sample[2] + x, sc->rawlsb);
+ } else {
+ //try to force a version with rawlsb optimized out
+ context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
+ sample[0] + x, sample[1] + x, sample[2] + x, 0);
+ }
diff = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
if (context < 0) {
@@ -74,6 +85,11 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
diff = fold(diff, bits);
if (ac != AC_GOLOMB_RICE) {
+ if (sc->rawlsb) {
+ diff += rawoff;
+ put_bits(&sc->rawlsb_pb, sc->rawlsb, diff & mask);
+ diff = diff >> sc->rawlsb; // Note, this will be biased on small rawlsb
+ }
if (pass1) {
put_symbol_inline(c, p->state[context], diff, 1, sc->rc_stat,
sc->rc_stat2[p->quant_table_index][context]);
--
2.48.1
More information about the ffmpeg-devel
mailing list