[FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products
Emma Worley
emma at emma.gg
Mon Apr 21 19:59:33 EEST 2025
On Mon, Apr 21, 2025 at 4:19 AM Andreas Rheinhardt <
andreas.rheinhardt at outlook.com> wrote:
> Emma Worley:
> > Improves compatibility with Resolume products by adding an additional
> hashtable
> > for DXT color+LUT combinations, and padding the DXT texture to the next
> largest
> > 16x16 multiple. Produces identical packets to Resolume Alley in manual
> tests.
> >
>
> Are the files that this encoder currently produces invalid?
>
Yes, if they are unaligned.
>
> > Signed-off-by: Emma Worley <emma at emma.gg>
> > ---
> > libavcodec/dxvenc.c | 105 ++++++++++++++++++++----------------
> > tests/ref/fate/dxv3enc-dxt1 | 2 +-
> > 2 files changed, 60 insertions(+), 47 deletions(-)
> >
> > diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
> > index c64b4d5f15..737ccfff3e 100644
> > --- a/libavcodec/dxvenc.c
> > +++ b/libavcodec/dxvenc.c
> > @@ -34,6 +34,11 @@
> >
> > #define DXV_HEADER_LENGTH 12
> >
> > +/*
> > + * Resolume will refuse to display frames that are not padded to 16x16
> pixels.
> > + */
> > +#define DXV_ALIGN(x) FFALIGN(x, 16)
> > +
> > /*
> > * DXV uses LZ-like back-references to avoid copying words that have
> already
> > * appeared in the decompressed stream. Using a simple hash table (HT)
> > @@ -60,6 +65,7 @@ typedef struct DXVEncContext {
> >
> > struct AVHashtableContext *color_ht;
> > struct AVHashtableContext *lut_ht;
> > + struct AVHashtableContext *combo_ht;
> > } DXVEncContext;
> >
> > /* Converts an index offset value to a 2-bit opcode and pushes it to a
> stream.
> > @@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> > DXVEncContext *ctx = avctx->priv_data;
> > PutByteContext *pbc = &ctx->pbc;
> > void *value;
> > - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16,
> pos = 0, op = 0;
> > + uint64_t combo;
> > + uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16,
> pos = 0, op = 0;
> >
> > av_hashtable_clear(ctx->color_ht);
> > av_hashtable_clear(ctx->lut_ht);
> > + av_hashtable_clear(ctx->combo_ht);
> > +
> > + av_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos);
> >
> > bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
> > av_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
> > @@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> > pos++;
> >
> > while (pos + 2 <= ctx->tex_size / 4) {
> > - idx = 0;
> > - color_idx = 0;
> > - lut_idx = 0;
> > + combo = AV_RL64(ctx->tex_data + pos * 4);
> > + combo_idx = av_hashtable_get(ctx->combo_ht, &combo, &prev_pos)
> ? pos - prev_pos : 0;
> > + idx = combo_idx;
> > + PUSH_OP(2);
> > + if (pos >= LOOKBACK_WORDS) {
> > + old_pos = pos - LOOKBACK_WORDS;
> > + if (av_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos
> * 4, &prev_pos) && prev_pos <= old_pos)
> > + av_hashtable_delete(ctx->combo_ht, ctx->tex_data +
> old_pos * 4);
> > + }
> > + av_hashtable_set(ctx->combo_ht, &combo, &pos);
> >
> > color = AV_RL32(ctx->tex_data + pos * 4);
> > - if (av_hashtable_get(ctx->color_ht, &color, &prev_pos))
> > - color_idx = pos - prev_pos;
> > - av_hashtable_set(ctx->color_ht, &color, &pos);
> > -
> > + if (!combo_idx) {
> > + idx = av_hashtable_get(ctx->color_ht, &color, &prev_pos) ?
> pos - prev_pos : 0;
> > + PUSH_OP(2);
> > + if (!idx)
> > + bytestream2_put_le32(pbc, color);
> > + }
> > if (pos >= LOOKBACK_WORDS) {
> > - uint32_t old_pos = pos - LOOKBACK_WORDS;
> > + old_pos = pos - LOOKBACK_WORDS;
> > if (av_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos
> * 4, &prev_pos) && prev_pos <= old_pos)
> > av_hashtable_delete(ctx->color_ht, ctx->tex_data +
> old_pos * 4);
> > }
> > + av_hashtable_set(ctx->color_ht, &color, &pos);
> > pos++;
> >
> > lut = AV_RL32(ctx->tex_data + pos * 4);
> > - if (color_idx && lut == AV_RL32(ctx->tex_data + (pos -
> color_idx) * 4)) {
> > - idx = color_idx;
> > - } else {
> > - idx = 0;
> > - if (av_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
> > - lut_idx = pos - prev_pos;
> > - av_hashtable_set(ctx->lut_ht, &lut, &pos);
> > + if (!combo_idx) {
> > + idx = av_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos
> - prev_pos : 0;
> > + PUSH_OP(2);
> > + if (!idx)
> > + bytestream2_put_le32(pbc, lut);
> > }
> > if (pos >= LOOKBACK_WORDS) {
> > - uint32_t old_pos = pos - LOOKBACK_WORDS;
> > + old_pos = pos - LOOKBACK_WORDS;
> > if (av_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos *
> 4, &prev_pos) && prev_pos <= old_pos)
> > av_hashtable_delete(ctx->lut_ht, ctx->tex_data +
> old_pos * 4);
> > }
> > + av_hashtable_set(ctx->lut_ht, &lut, &pos);
> > pos++;
> > -
> > - PUSH_OP(2);
> > -
> > - if (!idx) {
> > - idx = color_idx;
> > - PUSH_OP(2);
> > - if (!idx)
> > - bytestream2_put_le32(pbc, color);
> > -
> > - idx = lut_idx;
> > - PUSH_OP(2);
> > - if (!idx)
> > - bytestream2_put_le32(pbc, lut);
> > - }
> > }
> >
> > return 0;
> > @@ -172,11 +177,23 @@ static int dxv_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> > return ret;
> >
> > if (ctx->enc.tex_funct) {
> > + uint8_t *dst_data[4] = {av_mallocz(DXV_ALIGN(avctx->width) *
> DXV_ALIGN(avctx->height) * 4), 0, 0, 0};
> > + int dst_linesize[4] = {DXV_ALIGN(avctx->width) * 4, 0, 0, 0};
> > +
> > + av_image_copy2(
> > + dst_data,
> > + dst_linesize,
> > + frame->data,
> > + frame->linesize,
> > + frame->format,
> > + avctx->width,
> > + avctx->height);
>
> 1. Unchecked allocation.
> 2. The buffer leaks unconditionally.
> 3. The copy is definitely unnecessary if the input is already properly
> aligned.
> 4. Can't we avoid the copy in case the input is not properly aligned?
5. Most of dst_data will be initially zeroed only to be overwritten with
> the final value again. This is wasteful.
>
Is there an idiomatic way to do a partial fill here?
> > +
> > ctx->enc.tex_data.out = ctx->tex_data;
> > - ctx->enc.frame_data.in = frame->data[0];
> > - ctx->enc.stride = frame->linesize[0];
> > - ctx->enc.width = avctx->width;
> > - ctx->enc.height = avctx->height;
> > + ctx->enc.frame_data.in = dst_data[0];
> > + ctx->enc.stride = dst_linesize[0];
> > + ctx->enc.width = DXV_ALIGN(avctx->width);
> > + ctx->enc.height = DXV_ALIGN(avctx->height);
> > ff_texturedsp_exec_compress_threads(avctx, &ctx->enc);
> > } else {
> > /* unimplemented: YCoCg formats */
> > @@ -216,14 +233,6 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> > return ret;
> > }
> >
> > - if (avctx->width % TEXTURE_BLOCK_W || avctx->height %
> TEXTURE_BLOCK_H) {
> > - av_log(avctx,
> > - AV_LOG_ERROR,
> > - "Video size %dx%d is not multiple of
> "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n",
> > - avctx->width, avctx->height);
> > - return AVERROR_INVALIDDATA;
> > - }
> > -
> > ff_texturedspenc_init(&texdsp);
> >
> > switch (ctx->tex_fmt) {
> > @@ -237,10 +246,10 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> > return AVERROR_INVALIDDATA;
> > }
> > ctx->enc.raw_ratio = 16;
> > - ctx->tex_size = avctx->width / TEXTURE_BLOCK_W *
> > - avctx->height / TEXTURE_BLOCK_H *
> > + ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W *
> > + DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H *
> > ctx->enc.tex_ratio;
> > - ctx->enc.slice_count = av_clip(avctx->thread_count, 1,
> avctx->height / TEXTURE_BLOCK_H);
> > + ctx->enc.slice_count = av_clip(avctx->thread_count, 1,
> DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H);
> >
> > ctx->tex_data = av_malloc(ctx->tex_size);
> > if (!ctx->tex_data) {
> > @@ -251,6 +260,9 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> > if (ret < 0)
> > return ret;
> > ret = av_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t),
> sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> > + if (ret < 0)
> > + return ret;
> > + ret = av_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t),
> sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> > if (ret < 0)
> > return ret;
> >
> > @@ -265,6 +277,7 @@ static av_cold int dxv_close(AVCodecContext *avctx)
> >
> > av_hashtable_freep(&ctx->color_ht);
> > av_hashtable_freep(&ctx->lut_ht);
> > + av_hashtable_freep(&ctx->combo_ht);
> >
> > return 0;
> > }
> > diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1
> > index 74849a8031..e09000e181 100644
> > --- a/tests/ref/fate/dxv3enc-dxt1
> > +++ b/tests/ref/fate/dxv3enc-dxt1
> > @@ -3,4 +3,4 @@
> > #codec_id 0: dxv
> > #dimensions 0: 1920x1080
> > #sar 0: 1/1
> > -0, 0, 0, 1, 76521, 0xed387a5e
> > +0, 0, 0, 1, 76190, 0x0e6f0326
>
> Is padding the reason for this change or is it the new hashtable? Or both?
>
Both. For more context, the padding is solely for player compatibility
while the new hashtable optimize output file size.
With both I see identical packets to the de facto closed source encoder
(Resolume Alley) given the same DXT1 texture going into dxv_compress_dxt1.
>
> - Andreas
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list