[FFmpeg-devel] [PATCH] avcodec/dxv: add support for "high" quality mode
Rostislav Pehlivanov
atomnuker at gmail.com
Thu Apr 12 15:57:34 EEST 2018
On 12 April 2018 at 11:31, Paul B Mahol <onemda at gmail.com> wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
> libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++++--
> 1 file changed, 978 insertions(+), 28 deletions(-)
>
>
> +
> +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> a)
> +{
> + int r, g, b;
> +
> + co = co - 127;
> + cg = cg - 127;
> +
> + r = av_clip_uint8(yo + co - cg);
> + g = av_clip_uint8(yo + cg);
> + b = av_clip_uint8(yo - co - cg);
> +
> + return (a << 24) | (b << 16) | (g << 8) | (r);
> +}
Cinepak all over again? We're not doing and are never going to do
conversion inside decoders. Output it as YCoCg by writing directly to the
data planes. We support it. It doesn't matter if it's perfectly mappable to
RGB, if you allow for 2 more bits of precision.
On 12 April 2018 at 11:31, Paul B Mahol <onemda at gmail.com> wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
> libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++++--
> 1 file changed, 978 insertions(+), 28 deletions(-)
>
> diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
> index 529e211258..6308163735 100644
> --- a/libavcodec/dxv.c
> +++ b/libavcodec/dxv.c
> @@ -1,6 +1,7 @@
> /*
> * Resolume DXV decoder
> * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara at gmail.com>
> + * Copyright (C) 2018 Paul B Mahol
> *
> * This file is part of FFmpeg.
> *
> @@ -23,6 +24,7 @@
>
> #include "libavutil/imgutils.h"
>
> +#include "mathops.h"
> #include "avcodec.h"
> #include "bytestream.h"
> #include "internal.h"
> @@ -34,53 +36,250 @@ typedef struct DXVContext {
> TextureDSPContext texdsp;
> GetByteContext gbc;
>
> - uint8_t *tex_data; // Compressed texture
> - int tex_rat; // Compression ratio
> - int tex_step; // Distance between blocks
> - int64_t tex_size; // Texture size
> + uint8_t *tex_data; // Compressed texture
> + uint8_t *ctex_data; // Compressed texture
> + int tex_rat; // Compression ratio
> + int tex_step; // Distance between blocks
> + int ctex_step; // Distance between blocks
> + int64_t tex_size; // Texture size
> + int64_t ctex_size; // Texture size
>
> /* Optimal number of slices for parallel decoding */
> int slice_count;
>
> + uint8_t *op_data[4]; // Opcodes
> + int64_t op_size[4]; // Opcodes size
> +
> + int texture_block_w;
> + int texture_block_h;
> +
> + int ctexture_block_w;
> + int ctexture_block_h;
> +
> /* Pointer to the selected decompression function */
> int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> *block);
> + int (*ctex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> *block);
> } DXVContext;
>
> +static void decompress_indices(uint8_t *dst, const uint8_t *src)
> +{
> + int block, i;
> +
> + for (block = 0; block < 2; block++) {
> + int tmp = AV_RL24(src);
> +
> + /* Unpack 8x3 bit from last 3 byte block */
> + for (i = 0; i < 8; i++)
> + dst[i] = (tmp >> (i * 3)) & 0x7;
> +
> + src += 3;
> + dst += 8;
> + }
> +}
> +
> +static int extract_component(int yo0, int yo1, int code)
> +{
> + int yo;
> +
> + if (yo0 == yo1) {
> + yo = yo0;
> + } else if (code == 0) {
> + yo = yo0;
> + } else if (code == 1) {
> + yo = yo1;
> + } else {
> + if (yo0 > yo1) {
> + yo = (uint8_t) (((8 - code) * yo0 +
> + (code - 1) * yo1) / 7);
> + } else {
> + if (code == 6) {
> + yo = 0;
> + } else if (code == 7) {
> + yo = 255;
> + } else {
> + yo = (uint8_t) (((6 - code) * yo0 +
> + (code - 1) * yo1) / 5);
> + }
> + }
> + }
> +
> + return yo;
> +}
> +
> +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> a)
> +{
> + int r, g, b;
> +
> + co = co - 127;
> + cg = cg - 127;
> +
> + r = av_clip_uint8(yo + co - cg);
> + g = av_clip_uint8(yo + cg);
> + b = av_clip_uint8(yo - co - cg);
> +
> + return (a << 24) | (b << 16) | (g << 8) | (r);
> +}
> +
> +static int cocg_block(uint8_t *dst, ptrdiff_t stride,
> + const uint8_t *block)
> +{
> + uint8_t co_indices[16];
> + uint8_t cg_indices[16];
> + uint8_t co0 = *(block);
> + uint8_t co1 = *(block + 1);
> + uint8_t cg0 = *(block + 8);
> + uint8_t cg1 = *(block + 9);
> + int x, y;
> +
> + decompress_indices(co_indices, block + 2);
> + decompress_indices(cg_indices, block + 10);
> +
> + for (y = 0; y < 4; y++) {
> + for (x = 0; x < 4; x++) {
> + int co_code = co_indices[x + y * 4];
> + int cg_code = cg_indices[x + y * 4];
> + uint8_t co, cg;
> +
> + co = extract_component(co0, co1, co_code);
> + cg = extract_component(cg0, cg1, cg_code);
> +
> + dst[x * 8 + 1] = co;
> + dst[x * 8 + 2] = cg;
> + dst[x * 8 + 5] = co;
> + dst[x * 8 + 6] = cg;
> + dst[x * 8 + stride + 1] = co;
> + dst[x * 8 + stride + 2] = cg;
> + dst[x * 8 + stride + 5] = co;
> + dst[x * 8 + stride + 6] = cg;
> + }
> + dst += 2 * stride;
> + }
> +
> + return 16;
> +}
> +
> +static void yo_subblock(uint8_t *dst, uint8_t *yo_indices,
> + ptrdiff_t stride, const uint8_t *block)
> +{
> + uint8_t yo0 = *(block);
> + uint8_t yo1 = *(block + 1);
> + int x, y;
> +
> + decompress_indices(yo_indices, block + 2);
> +
> + for (y = 0; y < 4; y++) {
> + for (x = 0; x < 4; x++) {
> + int yo_code = yo_indices[x + y * 4];
> + uint8_t yo;
> +
> + yo = extract_component(yo0, yo1, yo_code);
> +
> + AV_WL32(dst + x * 4, 255u << 24 | yo);
> + }
> + dst += stride;
> + }
> +}
> +
> +static int yo_block(uint8_t *dst, ptrdiff_t stride,
> + const uint8_t *block)
> +{
> + uint8_t yo_indices[16];
> +
> + yo_subblock(dst, yo_indices, stride, block);
> + yo_subblock(dst + 16, yo_indices, stride, block + 8);
> + yo_subblock(dst + 32, yo_indices, stride, block + 16);
> + yo_subblock(dst + 48, yo_indices, stride, block + 24);
> +
> + return 32;
> +}
> +
> +static void a_subblock(uint8_t *dst, uint8_t *a_indices,
> + ptrdiff_t stride, const uint8_t *block)
> +{
> + uint8_t a0 = *(block);
> + uint8_t a1 = *(block + 1);
> + int x, y;
> +
> + decompress_indices(a_indices, block + 2);
> +
> + for (y = 0; y < 4; y++) {
> + for (x = 0; x < 4; x++) {
> + int a_code = a_indices[x + y * 4];
> + uint8_t a;
> +
> + a = extract_component(a0, a1, a_code);
> + dst[x * 4 + 3] = a;
> + }
> + dst += stride;
> + }
> +}
> +
> +static int yao_block(uint8_t *dst, ptrdiff_t stride,
> + const uint8_t *block)
> +{
> + uint8_t yo_indices[16];
> + uint8_t a_indices[16];
> +
> + yo_subblock(dst, yo_indices, stride, block);
> + a_subblock(dst, a_indices, stride, block + 8);
> + yo_subblock(dst + 16, yo_indices, stride, block + 16);
> + a_subblock(dst + 16, a_indices, stride, block + 24);
> + yo_subblock(dst + 32, yo_indices, stride, block + 32);
> + a_subblock(dst + 32, a_indices, stride, block + 40);
> + yo_subblock(dst + 48, yo_indices, stride, block + 48);
> + a_subblock(dst + 48, a_indices, stride, block + 56);
> +
> + return 64;
> +}
> +
> static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
> int slice, int thread_nb)
> {
> DXVContext *ctx = avctx->priv_data;
> AVFrame *frame = arg;
> const uint8_t *d = ctx->tex_data;
> - int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
> - int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
> + const uint8_t *c = ctx->ctex_data;
> + int w_block = avctx->coded_width / ctx->texture_block_w;
> + int h_block = avctx->coded_height / ctx->texture_block_h;
> int x, y;
> int start_slice, end_slice;
> - int base_blocks_per_slice = h_block / ctx->slice_count;
> - int remainder_blocks = h_block % ctx->slice_count;
> -
> - /* When the frame height (in blocks) doesn't divide evenly between the
> - * number of slices, spread the remaining blocks evenly between the
> first
> - * operations */
> - start_slice = slice * base_blocks_per_slice;
> - /* Add any extra blocks (one per slice) that have been added
> - * before this slice */
> - start_slice += FFMIN(slice, remainder_blocks);
> -
> - end_slice = start_slice + base_blocks_per_slice;
> - /* Add an extra block if there are remainder blocks to be accounted
> for */
> - if (slice < remainder_blocks)
> - end_slice++;
> +
> + start_slice = h_block * slice / ctx->slice_count;
> + end_slice = h_block * (slice + 1) / ctx->slice_count;
>
> for (y = start_slice; y < end_slice; y++) {
> - uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> TEXTURE_BLOCK_H;
> + uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> ctx->texture_block_h;
> int off = y * w_block;
> for (x = 0; x < w_block; x++) {
> - ctx->tex_funct(p + x * 16, frame->linesize[0],
> + ctx->tex_funct(p + x * 4 * ctx->texture_block_w,
> frame->linesize[0],
> d + (off + x) * ctx->tex_step);
> }
> }
>
> + if (ctx->ctex_funct) {
> + w_block = avctx->coded_width / ctx->ctexture_block_w;
> + h_block = avctx->coded_height / ctx->ctexture_block_h;
> +
> + start_slice = h_block * slice / ctx->slice_count;
> + end_slice = h_block * (slice + 1) / ctx->slice_count;
> +
> + for (y = start_slice; y < end_slice; y++) {
> + uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> ctx->ctexture_block_h;
> + int off = y * w_block;
> + for (x = 0; x < w_block; x++) {
> + ctx->ctex_funct(p + x * 4 * ctx->ctexture_block_w,
> frame->linesize[0],
> + c + (off + x) * ctx->ctex_step);
> + }
> + }
> + for (y = start_slice * ctx->ctexture_block_h; y < FFMIN(end_slice
> * ctx->ctexture_block_h, avctx->height); y++) {
> + uint8_t *p8 = frame->data[0] + y * frame->linesize[0];
> + uint32_t *p = (uint32_t *)p8;
> + for (x = 0; x < avctx->width; x++) {
> + p[x] = yacocg2rgba(p8[4 * x], p8[4 * x + 1], p8[4 * x +
> 2], p8[4 * x + 3]);
> + }
> + }
> + }
> +
> return 0;
> }
>
> @@ -169,6 +368,705 @@ static int dxv_decompress_dxt1(AVCodecContext
> *avctx)
> return 0;
> }
>
> +typedef struct OpcodeTable {
> + int16_t next;
> + uint8_t val1;
> + uint8_t val2;
> +} OpcodeTable;
> +
> +static int fill_ltable(GetByteContext *gb, uint32_t *table, int
> *nb_elements)
> +{
> + unsigned half = 512, bits = 1023, left = 1024, input, mask;
> + int value, counter = 0, rshift = 10, lshift = 30;
> +
> + mask = bytestream2_get_le32(gb) >> 2;
> + while (left) {
> + if (bytestream2_get_bytes_left(gb) < 0 || counter >= 256)
> + return AVERROR_INVALIDDATA;
> +
> + value = bits & mask;
> + left -= bits & mask;
> + mask >>= rshift;
> + lshift -= rshift;
> + table[counter++] = value;
> + if (lshift < 16) {
> + input = bytestream2_get_le16(gb);
> + mask += input << lshift;
> + lshift += 16;
> + }
> + if (left < half) {
> + half >>= 1;
> + bits >>= 1;
> + rshift--;
> + }
> + }
> +
> + for (; !table[counter - 1]; counter--)
> + if (counter <= 0)
> + return AVERROR_INVALIDDATA;
> +
> + *nb_elements = counter;
> +
> + if (counter < 256)
> + memset(&table[counter], 0, 4 * (256 - counter));
> +
> + if (lshift >= 16)
> + bytestream2_seek(gb, -2, SEEK_CUR);
> +
> + return 0;
> +}
> +
> +static int fill_optable(unsigned *table0, OpcodeTable *table1, int
> nb_elements)
> +{
> + unsigned table2[256];
> + unsigned x = 0;
> + int val0, val1, i, j = 2, k = 0;
> +
> + table2[0] = table0[0];
> + for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
> + val0 = table0[i + 1] + table2[i];
> + }
> +
> + if (!table2[0]) {
> + do {
> + k++;
> + } while (!table2[k]);
> + }
> +
> + j = 2;
> + for (i = 1024; i > 0; i--) {
> + for (table1[x].val1 = k; k < 256 && j > table2[k]; k++)
> + ;
> + x = (x - 383) & 0x3FF;
> + j++;
> + }
> +
> + if (nb_elements > 0)
> + memcpy(&table2[0], table0, 4 * nb_elements);
> +
> + for (i = 0; i < 1024; i++) {
> + val0 = table1[i].val1;
> + val1 = table2[val0];
> + table2[val0]++;
> + x = 31 - ff_clz(val1);
> + if (x > 10)
> + return AVERROR_INVALIDDATA;
> + table1[i].val2 = 10 - x;
> + table1[i].next = (val1 << table1[i].val2) - 1024;
> + }
> +
> + return 0;
> +}
> +
> +static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst,
> int op_size, int nb_elements)
> +{
> + OpcodeTable optable[1024];
> + int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
> + int endoffset, newoffset, offset;
> + unsigned next;
> + uint8_t *src = (uint8_t *)gb->buffer;
> +
> + ret = fill_optable(table, optable, nb_elements);
> + if (ret < 0)
> + return ret;
> +
> + size_in_bits = bytestream2_get_le32(gb);
> + endoffset = ((size_in_bits + 7) >> 3) - 4;
> + if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
> + return AVERROR_INVALIDDATA;
> +
> + offset = endoffset;
> + next = AV_RL32(src + endoffset);
> + rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
> + lshift = 32 - rshift;
> + idx = (next >> rshift) & 0x3FF;
> + for (i = 0; i < op_size; i++) {
> + dst[i] = optable[idx].val1;
> + val = optable[idx].val2;
> + sum = val + lshift;
> + x = (next << lshift) >> 1 >> (31 - val);
> + newoffset = offset - (sum >> 3);
> + lshift = sum & 7;
> + idx = x + optable[idx].next;
> + offset = newoffset;
> + next = AV_RL32(src + offset);
> + }
> +
> + bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
> +
> + return 0;
> +}
> +
> +static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t
> op_size)
> +{
> + int pos = bytestream2_tell(gb);
> + int flag = bytestream2_peek_byte(gb);
> +
> + if ((flag & 3) == 0) {
> + bytestream2_skip(gb, 1);
> + bytestream2_get_buffer(gb, dstp, op_size);
> + } else if ((flag & 3) == 1) {
> + bytestream2_skip(gb, 1);
> + memset(dstp, bytestream2_get_byte(gb), op_size);
> + } else {
> + uint32_t table[256];
> + int ret, elements = 0;
> +
> + ret = fill_ltable(gb, table, &elements);
> + if (ret < 0)
> + return ret;
> + ret = get_opcodes(gb, table, dstp, op_size, elements);
> + if (ret < 0)
> + return ret;
> + }
> + return bytestream2_tell(gb) - pos;
> +}
> +
> +static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
> + uint8_t *tex_data, int tex_size,
> + uint8_t *op_data, int *oindex,
> + uint8_t **dstp, int *statep,
> + uint8_t **tab0, uint8_t **tab1)
> +{
> + uint8_t *dst = *dstp;
> + uint8_t *tptr0, *tptr1, *tptr3;
> + int oi = *oindex;
> + int state = *statep;
> + int opcode, v, vv;
> +
> + if (state <= 0) {
> + opcode = op_data[oi++];
> + if (opcode) {
> + switch (opcode) {
> + case 1:
> + AV_WL32(dst, AV_RL32(dst - 16));
> + AV_WL32(dst + 4, AV_RL32(dst - 12));
> + break;
> + case 2:
> + vv = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> + if (vv < 0 || vv > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + tptr0 = dst - vv;
> + v = AV_RL32(tptr0);
> + AV_WL32(dst, AV_RL32(tptr0));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 3:
> + AV_WL32(dst, bytestream2_get_le32(gb));
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 4:
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, AV_RL16(tptr3));
> + dst[4] = tptr3[2];
> + AV_WL16(dst + 5, bytestream2_get_le16(gb));
> + dst[7] = bytestream2_get_byte(gb);
> + tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + break;
> + case 5:
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + dst[4] = bytestream2_get_byte(gb);
> + AV_WL16(dst + 5, AV_RL16(tptr3));
> + dst[7] = tptr3[2];
> + tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 6:
> + tptr0 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + tptr1 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, AV_RL16(tptr0));
> + dst[4] = tptr0[2];
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + break;
> + case 7:
> + v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> + if (v < 0 || v > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + tptr0 = dst - v;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 8:
> + tptr1 = tab0[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr1));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 9:
> + tptr1 = tab0[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr1));
> + AV_WL16(dst + 2, AV_RL16(tptr3));
> + dst[4] = tptr3[2];
> + AV_WL16(dst + 5, bytestream2_get_le16(gb));
> + dst[7] = bytestream2_get_byte(gb);
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 10:
> + tptr1 = tab0[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr1));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + dst[4] = bytestream2_get_byte(gb);
> + AV_WL16(dst + 5, AV_RL16(tptr3));
> + dst[7] = tptr3[2];
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 11:
> + tptr0 = tab0[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + tptr1 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr0));
> + AV_WL16(dst + 2, AV_RL16(tptr3));
> + dst[4] = tptr3[2];
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + break;
> + case 12:
> + tptr1 = tab0[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> + if (v < 0 || v > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + tptr0 = dst - v;
> + AV_WL16(dst, AV_RL16(tptr1));
> + AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 13:
> + AV_WL16(dst, AV_RL16(dst - 16));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 14:
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 16));
> + AV_WL16(dst + 2, AV_RL16(tptr3));
> + dst[4] = tptr3[2];
> + AV_WL16(dst + 5, bytestream2_get_le16(gb));
> + dst[7] = bytestream2_get_byte(gb);
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 15:
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 16));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + dst[4] = bytestream2_get_byte(gb);
> + AV_WL16(dst + 5, AV_RL16(tptr3));
> + dst[7] = tptr3[2];
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + case 16:
> + tptr3 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr3)
> + return AVERROR_INVALIDDATA;
> + tptr1 = tab1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 16));
> + AV_WL16(dst + 2, AV_RL16(tptr3));
> + dst[4] = tptr3[2];
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + break;
> + case 17:
> + v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> + if (v < 0 || v > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 16));
> + AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
> + AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> + break;
> + default:
> + break;
> + }
> + } else {
> + v = bytestream2_get_byte(gb);
> + if (v == 255) {
> + do {
> + if (bytestream2_get_bytes_left(gb) <= 0)
> + return AVERROR_INVALIDDATA;
> + opcode = bytestream2_get_le16(gb);
> + v += opcode;
> + } while (opcode == 0xFFFF);
> + }
> + AV_WL32(dst, AV_RL32(dst - 16));
> + AV_WL32(dst + 4, AV_RL32(dst - 12));
> + state = v + 3;
> + }
> + } else {
> + AV_WL32(dst, AV_RL32(dst - 16));
> + AV_WL32(dst + 4, AV_RL32(dst - 12));
> + state--;
> + }
> + dst += 8;
> +
> + *oindex = oi;
> + *dstp = dst;
> + *statep = state;
> +
> + return 0;
> +}
> +
> +static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
> + uint8_t *tex_data, int tex_size,
> + uint8_t *op_data0, uint8_t *op_data1)
> +{
> + uint8_t *dst, *tab2[256], *tab0[256], *tab3[256], *tab1[256];
> + int op_offset = bytestream2_get_le32(gb);
> + int op_size0 = bytestream2_get_le32(gb);
> + int op_size1 = bytestream2_get_le32(gb);
> + int data_start = bytestream2_tell(gb);
> + int skip0, skip1, oi0 = 0, oi1 = 0;
> + int ret, state0 = 0, state1 = 0;
> +
> + dst = tex_data;
> + bytestream2_skip(gb, op_offset - 12);
> + skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
> + if (skip0 < 0)
> + return skip0;
> + bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
> + skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
> + if (skip1 < 0)
> + return skip1;
> + bytestream2_seek(gb, data_start, SEEK_SET);
> +
> + AV_WL32(dst, bytestream2_get_le32(gb));
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + AV_WL32(dst + 8, bytestream2_get_le32(gb));
> + AV_WL32(dst + 12, bytestream2_get_le32(gb));
> +
> + tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> + tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
> + tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
> + dst += 16;
> + while (dst + 10 < tex_data + tex_size) {
> + ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0,
> &oi0, &dst, &state0, tab0, tab1);
> + if (ret < 0)
> + return ret;
> + ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1,
> &oi1, &dst, &state1, tab2, tab3);
> + if (ret < 0)
> + return ret;
> + }
> +
> + bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12,
> SEEK_SET);
> +
> + return 0;
> +}
> +
> +static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
> + uint8_t *tex_data, int tex_size, uint8_t
> *op_data)
> +{
> + int op_offset = bytestream2_get_le32(gb);
> + int op_size = bytestream2_get_le32(gb);
> + int data_start = bytestream2_tell(gb);
> + uint8_t *dst, *tptr0, *tptr1, *table0[256], *table1[256];
> + int opcode, skip, oi = 0, v, vv;
> +
> + dst = tex_data;
> + bytestream2_skip(gb, op_offset - 8);
> + skip = dxv_decompress_opcodes(gb, op_data, op_size);
> + if (skip < 0)
> + return skip;
> + bytestream2_seek(gb, data_start, SEEK_SET);
> +
> + v = bytestream2_get_le32(gb);
> + AV_WL32(dst, v);
> + vv = bytestream2_get_le32(gb);
> + table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> + AV_WL32(dst + 4, vv);
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> + dst += 8;
> +
> + while (dst < tex_data + tex_size) {
> + opcode = op_data[oi++];
> + if (opcode) {
> + switch (opcode) {
> + case 1:
> + AV_WL32(dst, AV_RL32(dst - 8));
> + AV_WL32(dst + 4, AV_RL32(dst - 4));
> + break;
> + case 2:
> + vv = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
> + if (vv > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + tptr0 = dst - vv;
> + v = AV_RL32(tptr0);
> + AV_WL32(dst, AV_RL32(tptr0));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFU) >> 24]
> = dst + 2;
> + break;
> + case 3:
> + v = bytestream2_get_le32(gb);
> + AV_WL32(dst, v);
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + vv = AV_RL32(dst + 2);
> + table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> + table1[0x9E3779B1 * (vv & 0xFFFFFF) >> 24] = dst + 2;
> + break;
> + case 4:
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, AV_RL16(tptr1));
> + dst[4] = tptr1[2];
> + AV_WL16(dst + 5, bytestream2_get_le16(gb));
> + dst[7] = bytestream2_get_byte(gb);
> + v = 0x9E3779B1 * AV_RL16(dst) >> 24;
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + table0[v] = dst;
> + break;
> + case 5:
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + dst[4] = bytestream2_get_byte(gb);
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + case 6:
> + tptr0 = table1[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, AV_RL16(tptr0));
> + dst[4] = tptr0[2];
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + break;
> + case 7:
> + v = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
> + if (v > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + tptr0 = dst - v;
> + AV_WL16(dst, bytestream2_get_le16(gb));
> + AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + case 8:
> + tptr0 = table0[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr0));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + case 9:
> + tptr0 = table0[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr0));
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst + 2, AV_RL16(tptr1));
> + dst[4] = tptr1[2];
> + AV_WL16(dst + 5, bytestream2_get_le16(gb));
> + dst[7] = bytestream2_get_byte(gb);
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + case 10:
> + tptr0 = table0[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr0));
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + dst[4] = bytestream2_get_byte(gb);
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + case 11:
> + tptr0 = table0[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr0));
> + tptr0 = table1[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst + 2, AV_RL16(tptr0));
> + dst[4] = tptr0[2];
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + break;
> + case 12:
> + tptr0 = table0[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(tptr0));
> + tptr0 = dst - (8 * bytestream2_get_le16(gb) + 8);
> + AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> = dst + 2;
> + break;
> + case 13:
> + AV_WL16(dst, AV_RL16(dst - 8));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + AV_WL32(dst + 4, bytestream2_get_le32(gb));
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> = dst + 2;
> + break;
> + case 14:
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 8));
> + AV_WL16(dst + 2, AV_RL16(tptr1));
> + dst[4] = tptr1[2];
> + AV_WL16(dst + 5, bytestream2_get_le16(gb));
> + dst[7] = bytestream2_get_byte(gb);
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> = dst + 2;
> + break;
> + case 15:
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 8));
> + AV_WL16(dst + 2, bytestream2_get_le16(gb));
> + dst[4] = bytestream2_get_byte(gb);
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + case 16:
> + tptr0 = table1[bytestream2_get_byte(gb)];
> + if (!tptr0)
> + return AVERROR_INVALIDDATA;
> + tptr1 = table1[bytestream2_get_byte(gb)];
> + if (!tptr1)
> + return AVERROR_INVALIDDATA;
> + AV_WL16(dst, AV_RL16(dst - 8));
> + AV_WL16(dst + 2, AV_RL16(tptr0));
> + dst[4] = tptr0[2];
> + AV_WL16(dst + 5, AV_RL16(tptr1));
> + dst[7] = tptr1[2];
> + break;
> + case 17:
> + v = 8 * bytestream2_get_le16(gb) + 8;
> + if (v > dst - tex_data)
> + return AVERROR_INVALIDDATA;
> + tptr0 = dst - v;
> + AV_WL16(dst, AV_RL16(dst - 8));
> + AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> + AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> + table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> + break;
> + default:
> + break;
> + }
> + dst += 8;
> + } else {
> + v = bytestream2_get_byte(gb);
> + if (v == 255) {
> + do {
> + if (bytestream2_get_bytes_left(gb) <= 0)
> + return AVERROR_INVALIDDATA;
> + opcode = bytestream2_get_le16(gb);
> + v += opcode;
> + } while (opcode == 0xFFFF);
> + }
> + vv = v + 4;
> + do {
> + AV_WL32(dst, AV_RL32(dst - 8));
> + AV_WL32(dst + 4, AV_RL32(dst - 4));
> + dst += 8;
> + --vv;
> + } while (vv);
> + }
> + }
> +
> + bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
> +
> + return 0;
> +}
> +
> +static int dxv_decompress_ycg6(AVCodecContext *avctx)
> +{
> + DXVContext *ctx = avctx->priv_data;
> + GetByteContext *gb = &ctx->gbc;
> + int ret;
> +
> + ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size,
> ctx->op_data[0]);
> + if (ret < 0)
> + return ret;
> +
> + return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> ctx->op_data[1], ctx->op_data[2]);
> +}
> +
> +static int dxv_decompress_yg10(AVCodecContext *avctx)
> +{
> + DXVContext *ctx = avctx->priv_data;
> + GetByteContext *gb = &ctx->gbc;
> + int ret;
> +
> + ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size,
> ctx->op_data[0], ctx->op_data[3]);
> + if (ret < 0)
> + return ret;
> +
> + return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> ctx->op_data[1], ctx->op_data[2]);
> +}
> +
> static int dxv_decompress_dxt5(AVCodecContext *avctx)
> {
> DXVContext *ctx = avctx->priv_data;
> @@ -359,6 +1257,9 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>
> bytestream2_init(gbc, avpkt->data, avpkt->size);
>
> + ctx->texture_block_h = 4;
> + ctx->texture_block_w = 4;
> +
> tag = bytestream2_get_le32(gbc);
> switch (tag) {
> case MKBETAG('D', 'X', 'T', '1'):
> @@ -378,9 +1279,35 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
> msgtext = "DXT5";
> break;
> case MKBETAG('Y', 'C', 'G', '6'):
> + decompress_tex = dxv_decompress_ycg6;
> + ctx->tex_funct = yo_block;
> + ctx->ctex_funct = cocg_block;
> + ctx->tex_rat = 8;
> + ctx->tex_step = 32;
> + ctx->ctex_step = 16;
> + msgcomp = "YOCOCG6";
> + msgtext = "YCG6";
> + ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> + ctx->texture_block_h = 4;
> + ctx->texture_block_w = 16;
> + ctx->ctexture_block_h = 8;
> + ctx->ctexture_block_w = 8;
> + break;
> case MKBETAG('Y', 'G', '1', '0'):
> - avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
> - return AVERROR_PATCHWELCOME;
> + decompress_tex = dxv_decompress_yg10;
> + ctx->tex_funct = yao_block;
> + ctx->ctex_funct = cocg_block;
> + ctx->tex_rat = 4;
> + ctx->tex_step = 64;
> + ctx->ctex_step = 16;
> + msgcomp = "YAOCOCG10";
> + msgtext = "YG10";
> + ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> + ctx->texture_block_h = 4;
> + ctx->texture_block_w = 16;
> + ctx->ctexture_block_h = 8;
> + ctx->ctexture_block_w = 8;
> + break;
> default:
> /* Old version does not have a real header, just size and type. */
> size = tag & 0x00FFFFFF;
> @@ -413,6 +1340,10 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
> break;
> }
>
> + ctx->slice_count = av_clip(avctx->thread_count, 1,
> + avctx->coded_height /
> FFMAX(ctx->texture_block_h,
> +
> ctx->ctexture_block_h));
> +
> /* New header is 12 bytes long. */
> if (!old_type) {
> version_major = bytestream2_get_byte(gbc) - 1;
> @@ -444,6 +1375,24 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
> if (ret < 0)
> return ret;
>
> + if (ctx->ctex_size) {
> + int i;
> +
> + ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
> + ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
> + ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
> + ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
> +
> + ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size);
> + if (ret < 0)
> + return ret;
> + for (i = 0; i < 4; i++) {
> + ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
> + if (ret < 0)
> + return ret;
> + }
> + }
> +
> /* Decompress texture out of the intermediate compression. */
> ret = decompress_tex(avctx);
> if (ret < 0)
> @@ -484,9 +1433,6 @@ static int dxv_init(AVCodecContext *avctx)
> ff_texturedsp_init(&ctx->texdsp);
> avctx->pix_fmt = AV_PIX_FMT_RGBA;
>
> - ctx->slice_count = av_clip(avctx->thread_count, 1,
> - avctx->coded_height / TEXTURE_BLOCK_H);
> -
> return 0;
> }
>
> @@ -495,6 +1441,10 @@ static int dxv_close(AVCodecContext *avctx)
> DXVContext *ctx = avctx->priv_data;
>
> av_freep(&ctx->tex_data);
> + av_freep(&ctx->ctex_data);
> + av_freep(&ctx->op_data[0]);
> + av_freep(&ctx->op_data[1]);
> + av_freep(&ctx->op_data[2]);
>
> return 0;
> }
> --
> 2.11.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list