[FFmpeg-devel] [PATCH v3 2/2] Newtek SpeedHQ decoder.
Paul B Mahol
onemda at gmail.com
Mon Jan 9 19:30:52 EET 2017
On 1/8/17, Steinar H. Gunderson <steinar+ffmpeg at gunderson.no> wrote:
> + * for the longest (10-bit) codes.
> + */
> +#define ALPHA_VLC_BITS 5
> +
> +typedef struct SHQContext {
> + AVCodecContext *avctx;
> + BlockDSPContext bdsp;
> + IDCTDSPContext idsp;
> + ScanTable intra_scantable;
> + int quant_matrix[64];
> + enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
> + subsampling;
> + enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
> +} SHQContext;
> +
> +
> +/* AC codes: Very similar but not identical to MPEG-2. */
> +static uint16_t speedhq_vlc[123][2] = {
Can this be uint8_t too?
> + {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5},
> + {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7},
> + {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8},
> + {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14},
> + {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14},
> + {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14},
> + {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14},
> + {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15},
> + {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15},
> + {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15},
[...]
> + speedhq_run,
> + speedhq_level,
> +};
> +
> +/* NOTE: The first element is always 16, unscaled. */
> +static const uint16_t unscaled_quant_matrix[64] = {
This can be uint8_t
> + 16, 16, 19, 22, 26, 27, 29, 34,
> + 16, 16, 22, 24, 27, 29, 34, 37,
> + 19, 22, 26, 27, 29, 34, 34, 38,
> + 22, 22, 26, 27, 29, 34, 37, 40,
> + 22, 26, 27, 29, 32, 35, 40, 48,
> + 26, 27, 29, 32, 35, 40, 48, 58,
> + 26, 27, 29, 34, 38, 46, 56, 69,
> + 27, 29, 35, 38, 46, 56, 69, 83
> +};
> +
> +static uint8_t ff_speedhq_static_rl_table_store[2][2*MAX_RUN + MAX_LEVEL +
> 3];
> +
> +static VLC ff_dc_lum_vlc_le;
> +static VLC ff_dc_chroma_vlc_le;
> +static VLC ff_dc_alpha_run_vlc_le;
> +static VLC ff_dc_alpha_level_vlc_le;
> +
> +static inline int decode_dc_le(GetBitContext *gb, int component)
> +{
> + int code, diff;
> +
> + if (component == 0 || component == 3) {
> + code = get_vlc2(gb, ff_dc_lum_vlc_le.table, DC_VLC_BITS, 2);
> + } else {
> + code = get_vlc2(gb, ff_dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
> + }
> + if (code < 0) {
> + av_log(NULL, AV_LOG_ERROR, "invalid dc code at\n");
> + return 0xffff;
Why this specific return value? I suppose decoding other blocks still continue?
> + }
> + if (!code) {
> + diff = 0;
> + } else {
> + diff = get_xbits_le(gb, code);
> + }
> + return diff;
> +}
> +
> +static inline int decode_alpha_block(const SHQContext *s, GetBitContext
> *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
> +{
> + uint8_t block[128];
> + int i = 0, x, y;
> +
> + memset(block, 0, sizeof(block));
> +
> + {
> + OPEN_READER(re, gb);
> +
> + for ( ;; ) {
> + int run, level;
> +
> + UPDATE_CACHE_LE(re, gb);
> + GET_VLC(run, re, gb, ff_dc_alpha_run_vlc_le.table,
> ALPHA_VLC_BITS, 2);
> +
> + if (run == 128) break;
> + i += run;
> + if (i >= 128)
> + return AVERROR_INVALIDDATA;
> +
> + UPDATE_CACHE_LE(re, gb);
> + GET_VLC(level, re, gb, ff_dc_alpha_level_vlc_le.table,
> ALPHA_VLC_BITS, 2);
> + block[i++] = level;
> + }
> +
> + CLOSE_READER(re, gb);
> + }
> +
> + for (y = 0; y < 8; y++) {
> + for (x = 0; x < 16; x++) {
> + last_alpha[x] -= block[y * 16 + x];
> + }
> + memcpy(dest, last_alpha, 16);
> + dest += linesize;
> + }
> +
> + return 0;
> +}
> +
> +static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb,
> int last_dc[4], int component, uint8_t *dest, int linesize)
> +{
> + const int *quant_matrix = s->quant_matrix;
> + const uint8_t *scantable = s->intra_scantable.permutated;
> + int16_t block[64];
> + int dc_offset;
> +
> + s->bdsp.clear_block(block);
> +
> + dc_offset = decode_dc_le(gb, component);
> + last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */
> + block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always
> 16. */
> +
> + /* Read AC coefficients. */
> + {
> + int i = 0;
> + OPEN_READER(re, gb);
> + for ( ;; ) {
> + int level, run;
> + UPDATE_CACHE_LE(re, gb);
> + GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
> + TEX_VLC_BITS, 2, 0);
> + if (level == 127) {
> + break;
> + } else if (level) {
> + i += run;
> + if (i > MAX_INDEX)
> + return AVERROR_INVALIDDATA;
> + /* If next bit is 1, level = -level */
> + level = (level ^ SHOW_SBITS(re, gb, 1)) -
> + SHOW_SBITS(re, gb, 1);
> + LAST_SKIP_BITS(re, gb, 1);
> + } else {
> + /* Escape. */
> +#if MIN_CACHE_BITS < 6 + 6 + 12
> +#error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
> +#endif
> + run = SHOW_UBITS(re, gb, 6) + 1;
> + SKIP_BITS(re, gb, 6);
> + level = SHOW_UBITS(re, gb, 12) - 2048;
> + LAST_SKIP_BITS(re, gb, 12);
> +
> + i += run;
> + if (i > MAX_INDEX)
> + return AVERROR_INVALIDDATA;
> + }
> +
> + block[scantable[i]] = (level * quant_matrix[i]) >> 4;
> + }
> + CLOSE_READER(re, gb);
> + }
> +
> + s->idsp.idct_put(dest, linesize, block);
> +
> + return 0;
> +}
> +
> +static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf,
> int buf_size, AVFrame *frame, int field_number, int start, int end, int
> line_stride)
> +{
> + int ret, slice_number, slice_offsets[5];
> + int linesize_y = frame->linesize[0] * line_stride;
> + int linesize_cb = frame->linesize[1] * line_stride;
> + int linesize_cr = frame->linesize[2] * line_stride;
> + int linesize_a;
> +
> + if (s->alpha_type != SHQ_NO_ALPHA)
> + linesize_a = frame->linesize[3] * line_stride;
> +
> + if (end < start || end - start < 3 || end > buf_size)
> + return AVERROR_INVALIDDATA;
> +
> + slice_offsets[0] = start;
> + slice_offsets[4] = end;
> + for (slice_number = 1; slice_number < 4; slice_number++) {
> + uint32_t last_offset, slice_len;
> +
> + last_offset = slice_offsets[slice_number - 1];
> + slice_len = AV_RL24(buf + last_offset);
> + slice_offsets[slice_number] = last_offset + slice_len;
> +
> + if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
> + return AVERROR_INVALIDDATA;
> + }
> +
> + for (slice_number = 0; slice_number < 4; slice_number++) {
> + GetBitContext gb;
> + uint32_t slice_begin, slice_end;
> + int x, y;
> +
> + slice_begin = slice_offsets[slice_number];
> + slice_end = slice_offsets[slice_number + 1];
> +
> + if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end -
> slice_begin - 3)) < 0)
> + return ret;
> +
> + for (y = slice_number * 16 * line_stride; y < frame->height; y +=
> line_stride * 64) {
> + uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
> + int last_dc[4] = { 1024, 1024, 1024, 1024 };
> + uint8_t last_alpha[16];
> +
> + memset(last_alpha, 255, sizeof(last_alpha));
> +
> + dest_y = frame->data[0] + frame->linesize[0] * (y +
> field_number);
> + if (s->subsampling == SHQ_SUBSAMPLING_420) {
> + dest_cb = frame->data[1] + frame->linesize[1] * (y/2 +
> field_number);
> + dest_cr = frame->data[2] + frame->linesize[2] * (y/2 +
> field_number);
> + } else {
> + dest_cb = frame->data[1] + frame->linesize[1] * (y +
> field_number);
> + dest_cr = frame->data[2] + frame->linesize[2] * (y +
> field_number);
> + }
> + if (s->alpha_type != SHQ_NO_ALPHA) {
> + dest_a = frame->data[3] + frame->linesize[3] * (y +
> field_number);
> + }
> +
> + for (x = 0; x < frame->width; x += 16) {
> + /* Decode the four luma blocks. */
> + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y,
> linesize_y)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8,
> linesize_y)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8
> * linesize_y, linesize_y)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8
> * linesize_y + 8, linesize_y)) < 0)
> + return ret;
> +
> + /*
> + * Decode the first chroma block. For 4:2:0, this is the
> only one;
> + * for 4:2:2, it's the top block; for 4:4:4, it's the
> top-left block.
> + */
> + if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb,
> linesize_cb)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr,
> linesize_cr)) < 0)
> + return ret;
> +
> + if (s->subsampling != SHQ_SUBSAMPLING_420) {
> + /* For 4:2:2, this is the bottom block; for 4:4:4, it's
> the bottom-left block. */
> + if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb
> + 8 * linesize_cb, linesize_cb)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr
> + 8 * linesize_cr, linesize_cr)) < 0)
> + return ret;
> +
> + if (s->subsampling == SHQ_SUBSAMPLING_444) {
> + /* Top-right and bottom-right blocks. */
> + if ((ret = decode_dct_block(s, &gb, last_dc, 1,
> dest_cb + 8, linesize_cb)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 2,
> dest_cr + 8, linesize_cr)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 1,
> dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 2,
> dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
> + return ret;
> +
> + dest_cb += 8;
> + dest_cr += 8;
> + }
> + }
> + dest_y += 16;
> + dest_cb += 8;
> + dest_cr += 8;
> +
> + if (s->alpha_type == SHQ_RLE_ALPHA) {
> + /* Alpha coded using 16x8 RLE blocks. */
> + if ((ret = decode_alpha_block(s, &gb, last_alpha,
> dest_a, linesize_a)) < 0)
> + return ret;
> + if ((ret = decode_alpha_block(s, &gb, last_alpha,
> dest_a + 8 * linesize_a, linesize_a)) < 0)
> + return ret;
> + dest_a += 16;
> + } else if (s->alpha_type == SHQ_DCT_ALPHA) {
> + /* Alpha encoded exactly like luma. */
> + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a,
> linesize_a)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a
> + 8, linesize_a)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a
> + 8 * linesize_a, linesize_a)) < 0)
> + return ret;
> + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a
> + 8 * linesize_a + 8, linesize_a)) < 0)
> + return ret;
> + dest_a += 16;
> + }
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void compute_quant_matrix(int *output, int qscale)
> +{
> + int i;
> + for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[i] * qscale;
> +}
> +
> +static int speedhq_decode_frame(AVCodecContext *avctx,
> + void *data, int *got_frame,
> + AVPacket *avpkt)
> +{
> + SHQContext * const s = avctx->priv_data;
> + const uint8_t *buf = avpkt->data;
> + int buf_size = avpkt->size;
> + AVFrame *frame = data;
> + uint8_t quality;
> + uint32_t second_field_offset;
> + int ret;
> +
> + if (buf_size < 4)
> + return AVERROR_INVALIDDATA;
> +
> + quality = buf[0];
> + if (quality >= 100) {
> + return AVERROR_INVALIDDATA;
> + }
> +
> + compute_quant_matrix(s->quant_matrix, 100 - quality);
> +
> + second_field_offset = AV_RL24(buf + 1);
> + if (second_field_offset >= buf_size - 3) {
> + return AVERROR_INVALIDDATA;
> + }
> +
> + avctx->coded_width = FFALIGN(avctx->width, 16);
> + avctx->coded_height = FFALIGN(avctx->height, 16);
> +
> + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
> + return ret;
> + }
> + frame->key_frame = 1;
> +
> + if (second_field_offset == 4) {
> + /*
> + * Overlapping first and second fields is used to signal
> + * encoding only a single field (the second field then comes
> + * as a separate, later frame).
> + */
> + frame->height >>= 1;
> + if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4,
> buf_size, 1)) < 0)
> + return ret;
> + } else {
> + if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4,
> second_field_offset, 2)) < 0)
> + return ret;
> + if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1,
> second_field_offset, buf_size, 2)) < 0)
> + return ret;
> + }
> +
> + *got_frame = 1;
> + return buf_size;
> +}
> +
> +/*
> + * Alpha VLC. Run and level are independently coded, and would be
> + * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
> + * bother with combining them into one table.
> + */
> +static av_cold void compute_alpha_vlcs(void)
> +{
> + uint16_t run_code[129], level_code[256];
> + uint8_t run_bits[129], level_bits[256];
> + int run, level;
> +
> + for (run = 0; run < 128; run++) {
> + if (!run) {
> + /* 0 -> 0. */
> + run_code[run] = 0;
> + run_bits[run] = 1;
> + } else if (run <= 4) {
> + /* 10xx -> xx plus 1. */
> + run_code[run] = ((run - 1) << 2) | 1;
> + run_bits[run] = 4;
> + } else {
> + /* 111xxxxxxx -> xxxxxxxx. */
> + run_code[run] = (run << 3) | 7;
> + run_bits[run] = 10;
> + }
> + }
> +
> + /* 110 -> EOB. */
> + run_code[128] = 3;
> + run_bits[128] = 3;
> +
> + INIT_LE_VLC_STATIC(&ff_dc_alpha_run_vlc_le, ALPHA_VLC_BITS, 129,
> + run_bits, 1, 1,
> + run_code, 2, 2, 160);
> +
> + for (level = 0; level < 256; level++) {
> + int8_t signed_level = (int8_t)level;
> + int abs_signed_level = abs(signed_level);
> + int sign = (signed_level < 0) ? 1 : 0;
> +
> + if (abs_signed_level == 1) {
> + /* 1s -> -1 or +1 (depending on sign bit). */
> + level_code[level] = (sign << 1) | 1;
> + level_bits[level] = 2;
> + } else if (abs_signed_level >= 2 && abs_signed_level <= 5) {
> + /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit).
> */
> + level_code[level] = ((abs_signed_level - 2) << 3) | (sign << 2)
> | 2;
> + level_bits[level] = 5;
> + } else {
> + /*
> + * 00xxxxxxxx -> xxxxxxxx, in two's complement. 0 is
> technically an
> + * illegal code (that would be encoded by increasing run), but
> it
> + * doesn't hurt and simplifies indexing.
> + */
> + level_code[level] = level << 2;
> + level_bits[level] = 10;
> + }
> + }
> +
> + INIT_LE_VLC_STATIC(&ff_dc_alpha_level_vlc_le, ALPHA_VLC_BITS, 256,
> + level_bits, 1, 1,
> + level_code, 2, 2, 288);
> +}
> +
> +static uint32_t reverse(uint32_t num, int bits)
> +{
> + return bitswap_32(num) >> (32 - bits);
> +}
> +
> +static void reverse_code(const uint16_t *code, const uint8_t *bits,
> + uint16_t *reversed_code, int num_entries)
> +{
> + int i;
> + for (i = 0; i < num_entries; i++) {
> + reversed_code[i] = reverse(code[i], bits[i]);
> + }
> +}
> +
> +static av_cold int speedhq_decode_init(AVCodecContext *avctx)
> +{
> + static int done = 0;
> + uint16_t ff_mpeg12_vlc_dc_lum_code_reversed[12];
> + uint16_t ff_mpeg12_vlc_dc_chroma_code_reversed[12];
> + SHQContext * const s = avctx->priv_data;
> +
> + s->avctx = avctx;
> +
> + if (!done) {
> + int i;
> +
> + /* Exactly the same as MPEG-2, except little-endian. */
> + reverse_code(ff_mpeg12_vlc_dc_lum_code,
> + ff_mpeg12_vlc_dc_lum_bits,
> + ff_mpeg12_vlc_dc_lum_code_reversed,
> + 12);
> + INIT_LE_VLC_STATIC(&ff_dc_lum_vlc_le, DC_VLC_BITS, 12,
> + ff_mpeg12_vlc_dc_lum_bits, 1, 1,
> + ff_mpeg12_vlc_dc_lum_code_reversed, 2, 2, 512);
> + reverse_code(ff_mpeg12_vlc_dc_chroma_code,
> + ff_mpeg12_vlc_dc_chroma_bits,
> + ff_mpeg12_vlc_dc_chroma_code_reversed,
> + 12);
What about "storing" reverse codes in source code, so this step is not required?
More information about the ffmpeg-devel
mailing list