[FFmpeg-devel] [RFC v2 3/3] daaladec: Implement a native Daala decoder
Andreas Cadhalpun
andreas.cadhalpun at googlemail.com
Tue Dec 29 16:55:13 CET 2015
On 29.12.2015 03:12, Rostislav Pehlivanov wrote:
> Hmm, kinda odd.
> I've attached a v2 of the RFC Patch which does this:
> 1. Fixed some typos.
> 2. Fixed some missing newlines at the end of daalatab*
> 3. Moves the pix_fmt finding function inside the main decoder file.
>
> Try building now.
First, this fails building with --enable-shared, because daala_find_p_format
is not static.
Second, this decoder crashes, hangs and triggers lots of undefined behavior. :-/
Third, the decoder seems to return only the first frame, repeatedly,
instead of the following frames.
More details are interleaved in the diff below.
> diff --git a/libavcodec/daala.h b/libavcodec/daala.h
> new file mode 100644
> index 0000000..535e78f
> --- /dev/null
> +++ b/libavcodec/daala.h
> @@ -0,0 +1,78 @@
[...]
> +typedef struct DaalaBitstreamHeader {
> + uint8_t key_frame;
> + uint8_t bipred;
> + uint8_t ref_num;
> + uint8_t act_mask;
> + uint8_t qm;
> + uint8_t haar;
> + uint8_t golden;
> + uint8_t pvq_qm[DAALA_MAX_PLANES][DAALA_QM_SIZE];
> +} DaalaBitstreamHeader;
> +
> +typedef struct DaalaSharedContext {
> + DaalaBitstreamHeader h;
> +} DaalaSharedContext;
What is the point of having DaalaSharedContext in addition to DaalaBitstreamHeader?
> diff --git a/libavcodec/daala_entropy.h b/libavcodec/daala_entropy.h
> new file mode 100644
> index 0000000..3fdcaef
> --- /dev/null
> +++ b/libavcodec/daala_entropy.h
> @@ -0,0 +1,554 @@
[...]
> +/* Updates the generic exponential probability model */
> +static av_always_inline void daalaent_exp_model_update(DaalaCDF *c, int *ex, int x,
> + int xs, int id, int integrate)
> +{
> + int i, xenc;
> + ent_rng *cdf = &c->cdf[id*c->y];
> + if (cdf[15] + c->inc > 32767) {
> + for (i = 0; i < 16; i++)
> + cdf[i] = (cdf[i] >> 1) + i + 1;
> + }
> + xenc = FFMIN(15, xs);
> + for (i = xenc; i < 16; i++)
> + cdf[i] += c->inc;
> + x = FFMIN(x, 32767);
> + *ex += ((x << 16) - *ex) >> integrate;
This subtraction can overflow.
[...]
> +/* "+derf | It was a hack for the screen coding wavelet tools." */
A rather bad hack...
> +static inline int daalaent_decode_unary(DaalaEntropy *e)
> +{
> + int rval = 0;
> + while (!daalaent_decode_bits(e, 1))
This is an infinite loop that can hang the decoder.
> + rval++;
> + return rval;
> +}
[...]
> +/* Decodes quantized coefficients from the bitsteam */
> +static inline void daalaent_decode_laplace_vector(DaalaEntropy *e, dctcoef *y,
> + int n, int k, dctcoef *curr,
> + const dctcoef *means)
> +{
> + int i, exp_q8, mean_k_q8, mean_sum_ex_q8, sum_ex = 0, kn = k, ran_delta = 0;
> + if (k <= 1) {
> + daalaent_decode_laplace_delta(e, y, n, k, curr, means);
> + return;
> + }
> + if (!k) {
> + curr[DAALAENT_PVQ_COUNT] = DAALAENT_PVQ_NOVAL;
> + curr[DAALAENT_PVQ_COUNT_EX] = DAALAENT_PVQ_NOVAL;
> + curr[DAALAENT_PVQ_K] = 0;
> + curr[DAALAENT_PVQ_SUM_EX] = 0;
> + memset(y, 0, n*sizeof(dctcoef));
> + return;
> + }
> + mean_k_q8 = means[DAALAENT_PVQ_K];
> + mean_sum_ex_q8 = means[DAALAENT_PVQ_SUM_EX];
> + if (mean_k_q8 < 1 << 23)
> + exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8);
This multiplication can overflow.
> + else
> + exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8));
> + for (i = 0; i < n; i++) {
> + int x, ex;
> + if (!kn)
> + break;
> + if (kn <= 1 && i != n - 1) {
> + daalaent_decode_laplace_delta(e, y + i, n - i, kn, curr, means);
> + ran_delta = 1;
> + i = n;
> + break;
> + }
> + ex = (2*exp_q8*kn + (n - i))/(2*(n - i));
> + if (ex > kn*256)
> + ex = kn*256;
> + sum_ex += (2*256*kn + (n - i))/(2*(n - i));
All these multiplications involving kn can overflow.
> + if (i != n - 1)
> + x = daalaent_decode_laplace_pvq(e, ex, kn);
> + else
> + x = kn;
> + y[i] = x*daalaent_cphase(e, x);
> + kn -= abs(x);
> + }
> + memset(&y[i], 0, (n - i)*sizeof(dctcoef)); /* Zero the rest */
> + if (!ran_delta) {
> + curr[DAALAENT_PVQ_COUNT] = DAALAENT_PVQ_NOVAL;
> + curr[DAALAENT_PVQ_COUNT_EX] = DAALAENT_PVQ_NOVAL;
> + }
> + curr[DAALAENT_PVQ_K] = k - kn;
> + curr[DAALAENT_PVQ_SUM_EX] = sum_ex;
> +}
> +
> +/* Expectation value is in Q16 */
> +static inline int daalaent_decode_generic(DaalaEntropy *e, DaalaCDF *c, int *ex,
> + int max, int integrate)
> +{
> + int rval, lsb = 0, log_ex = daalaent_log_ex(*ex);
> + const int shift = FFMAX(0, (log_ex - 5) >> 1);
> + const int id = FFMIN(DAALAENT_MODEL_TAB - 1, log_ex);
> + const int ms = (max + (1 << shift >> 1)) >> shift;
> + int xs = (max == -1) ? 16 : FFMIN(ms + 1, 16);
> + ent_rng *cdf = &c->cdf[id*c->y];
> + if (!max)
> + return 0;
> + if ((xs = daalaent_decode_cdf(e, cdf, xs, 0, CDF_UNSCALED)) == 15) {
> + int g = ((2*(*ex) >> 8) + (1 << shift >> 1)) >> shift;
This multiplication can overflow, causing g to become negative, e.g. -256.
> + ent_win decay = FFMAX(2, FFMIN(254, 256*g/(g + 256)));
In that case this gives a division by zero crash.
> +static inline void daalaent_decode_init(DaalaEntropy *e, const uint8_t *buf,
> + int buf_size)
> +{
> + e->rbuf = buf;
> + e->erbuf = buf + buf_size;
This doesn't check if buf_size is 0. In that case e->end_window is always 0,
causing daalaent_decode_bits to always return 0, leading to the infinite loop
in daalaent_decode_unary.
> + e->buf = buf;
> + e->ebuf = buf + buf_size;
> + e->err = 0;
> + e->diff = 0;
> + e->range = 32768;
> + e->count = -15;
> + e->eos_offset = 18 - DAALAENT_WSIZE;
> + e->end_window = 0;
> + e->end_window_size = 0;
> + daalaent_fillup(e);
> +}
[...]
> diff --git a/libavcodec/daala_pvq.h b/libavcodec/daala_pvq.h
> new file mode 100644
> index 0000000..ea8a86b
> --- /dev/null
> +++ b/libavcodec/daala_pvq.h
> @@ -0,0 +1,369 @@
[...]
> +static inline void daalapvq_synth(dctcoef *xcoeff, dctcoef *ypulse, dctcoef *ref,
> + int n, double gr, uint8_t ref_p, double gain,
> + double theta, const int16_t *qmatrix,
> + const int16_t *qmatrix_inv)
> +{
> + int i, m, nn = n - ref_p, s = 0, yy = 0;
> + double scale, r[DAALAPVQ_MAX_PART_SIZE], x[DAALAPVQ_MAX_PART_SIZE];
> + if (ref_p) {
> + for (i = 0; i < n; i++)
> + r[i] = ref[i]*qmatrix[i]*DAALA_QM_SCALE_UNIT;
The 'ref[i]*qmatrix[i]' multiplication can overflow.
> + }
> + m = !ref_p ? 0 : daalapvq_householder_c(r, n, gr, &s);
> + for (i = 0; i < nn; i++)
> + yy += ypulse[i]*ypulse[i];
> + scale = !yy ? 0 : gain/sqrt(yy);
> + if (!ref_p) {
> + for (i = 0; i < n; i++)
> + xcoeff[i] = lrint((ypulse[i]*scale)*(qmatrix_inv[i]*DAALA_QM_INV_SCALE_UNIT));
> + } else {
> + scale *= sin(theta);
> + for (i = 0; i < m; i++)
> + x[i] = ypulse[i]*scale;
> + x[m] = -s*gain*cos(theta);
> + for (i = m; i < nn; i++)
> + x[i+1] = ypulse[i]*scale;
> + daalapvq_householder_a(x, r, n);
> + for (i = 0; i < n; i++)
> + xcoeff[i] = lrint(x[i]*qmatrix_inv[i]*DAALA_QM_INV_SCALE_UNIT);
> + }
> +}
> +
> +static av_always_inline void daalapvq_adapt_shuffle(int *dst, int *src, int spd,
> + int idx, int mul)
> +{
> + if (src[idx] < 1)
> + return;
> + dst[idx+0] += (mul*src[idx+0] - dst[idx+0]) >> spd;
This multiplication can overflow.
[...]
> +static inline void daalapvq_decode_vector(DaalaEntropy *e, DaalaPVQ *pvq,
> + dctcoef *out, dctcoef *ref,
> + const double beta,
> + uint8_t key_frame, int p,
> + uint8_t *skip_rest,
> + uint8_t has_err, int band_idx,
> + int qm_off, enum DaalaBsize bsize)
> +{
> + int i, k;
> + int qg = 0, skip = 0, itheta = (!!key_frame), has_ref = !key_frame;
> + double qcg, gain, theta = 0.0f, gr = 0.0f, gain_off = 0.0f;
> + dctcoef tmp[DAALAPVQ_MAX_PART_SIZE] = {0};
> +
> + const int robust = has_err || key_frame;
> + const int band_len = pvq->size[band_idx];
> + const int16_t *qmatrix = &pvq->qmatrix[qm_off];
> + const int16_t *qmatrix_inv = &pvq->qmatrix_inv[qm_off];
> +
> + if (!skip_rest[(band_idx + 2) % 3]) {
> + int iloc = (!!p)*DAALA_NBSIZES*DAALAPVQ_PARTITIONS_MAX + bsize*DAALAPVQ_PARTITIONS_MAX + band_idx;
> + i = daalaent_decode_cdf_adapt(e, &pvq->pvqtheta_gain_cdf, iloc, 8 + 7*pvq->skip[band_idx]);
> + if (!key_frame && i >= 10)
> + i++;
> + if (key_frame && i >= 8)
> + i++;
> + if (i >= 8) {
> + i -= 8;
> + skip_rest[0] = skip_rest[1] = skip_rest[2] = 1;
> + }
> + qg = i & 1;
> + itheta = (i >> 1) - 1;
> + has_ref = !(itheta == -1);
> + }
> + if (qg) {
> + int *ex = pvq->pvqgain_ex[p][bsize] + band_idx, ex_tmp = *ex;
> + DaalaCDF *mcdf = has_ref ? &pvq->pvqgain_ref_mcdf : &pvq->pvqgain_noref_mcdf;
> + qg = 1 + daalaent_decode_generic(e, mcdf, &ex_tmp, -1, 2);
> + *ex += ((qg << 16) - *ex) >> 2;
The left shift can overflow, also the subtraction, and the final addition, too.
[...]
> diff --git a/libavcodec/daaladec.c b/libavcodec/daaladec.c
> new file mode 100644
> index 0000000..3501a6b
> --- /dev/null
> +++ b/libavcodec/daaladec.c
> @@ -0,0 +1,804 @@
[...]
> +/* Get DC level */
> +static void get_haar_dc_sb(DaalaContext *s, HaarGradient *g, dctcoef *d, int x, int y,
> + uint8_t p, uint8_t lim_pass)
> +{
> + int q, q_dc;
> + int xdec = s->fmt->dec[p][0];
> + const int aw = s->width >> xdec;
> + const int ln = DAALA_LOG_BSIZE_MAX - xdec;
> + dctcoef dc_pred = 0, **dc_buf = s->haar_dc_buf[p];
> + if (!s->quantizer[p])
> + q_dc = 1;
> + else
> + q_dc = FFMAX(1, s->quantizer[p]*s->s.h.pvq_qm[p][daala_get_qm_idx(DAALA_NBSIZES - 1, 0)] >> 4);
> + if (x > 0 && y > 0) {
> + if (lim_pass) { /* ALERT: coeffs could change */
> + dc_pred = 22*dc_buf[x-1][y-0] - 9*dc_buf[x-1][y-1] +
> + 15*dc_buf[x+0][y-1] + 4*dc_buf[x+1][y-1];
> + } else {
> + dc_pred = 23*dc_buf[x-1][y-0] - 10*dc_buf[x-1][y-1] + 19*dc_buf[x-0][y-1];
> + }
These two dc_pred calculations can overflow in various ways.
> + dc_pred = (dc_pred + 16) >> 5;
> + } else {
> + dc_pred += x > 0 ? dc_buf[x-1][y-0] : 0;
> + dc_pred += y > 0 ? dc_buf[x-0][y-1] : 0;
> + }
> + q = daalaent_decode_generic(&s->e, &s->haar_dc_mcdf[p], &s->haar_sb_ex[p], -1, 2);
> + q *= daalaent_cphase(&s->e, q);
> + q = q*q_dc + dc_pred;
This multiplication can overflow.
[...]
> +/* Haar block decoding and transform */
> +static void decode_block_haar(DaalaContext *s, int x, int y, int p, enum DaalaBsize bsize)
> +{
> + int i, j, k, l, n = 1 << (bsize + 2);
> + const int dx = x << bsize, dy = y << bsize;
> + const int aw = s->width >> s->fmt->dec[p][0];
> + const int boffset = (dy << 2)*aw + (dx << 2);
> +
> + dctcoef tree[4][4];
> + dctcoef pred[DAALA_BSIZE_MAX*DAALA_BSIZE_MAX];
> + dctcoef tpred[DAALA_BSIZE_MAX*DAALA_BSIZE_MAX];
> +
> + daala_calc_prediction(s, pred, s->dcoef[p], dx, dy, p, bsize);
> + memcpy(tpred, pred, n*n*sizeof(dctcoef));
> +
> + tree[0][0] = daalaent_decode_cdf_adapt(&s->e, &s->haar_bit_cdf, p, 16);
> + if (tree[0][0] == 15)
> + tree[0][0] += daalaent_decode_unary(&s->e);
> +
> + if (tree[0][0] > 24) {
> + s->e.err = 1;
> + return;
> + } else if (tree[0][0] > 1) {
> + int tmp = daalaent_decode_bits(&s->e, tree[0][0] - 1);
> + tree[0][0] = (1 << (tree[0][0] - 1)) | tmp;
> + }
> +
> + tree[1][1] = decode_haar_coeff_tree_split(s, tree[0][0], 3, 0);
> + tree[0][1] = decode_haar_coeff_tree_split(s, tree[0][0] - tree[1][1], 4, 0);
> + tree[1][0] = tree[0][0] - tree[1][1] - tree[0][1];
> +
> + decode_tree_sum(s, pred, 1, 0, tree[0][1], bsize + 2, 0);
> + decode_tree_sum(s, pred, 0, 1, tree[1][0], bsize + 2, 1);
> + decode_tree_sum(s, pred, 1, 1, tree[1][1], bsize + 2, 2);
> +
> + for (i = 0; i < n; i++) {
> + for (j = (i == 0); j < n; j++)
> + pred[i*n + j] *= daalaent_cphase(&s->e, pred[i*n + j]);
> + }
> + for (i = 0; i < 3; i++) { /* Direction */
> + for (j = 0; j < bsize+2; j++) { /* Level */
> + int bo = (((i + 1) >> 1) << j)*n + (((i + 1) & 1) << j);
> + int q = !s->quantizer[p] ? 1 : s->quantizer[p]*daala_haar_qm[i == 2][j] >> 4;
> + for (k = 0; k < 1 << j; k++)
> + for (l = 0; l < 1 << j; l++)
> + pred[bo + k*n + l] = q*pred[bo + k*n + l] + tpred[bo + k*n + l];
The q*pred[] multiplication can overflow.
[...]
> diff --git a/libavcodec/daaladsp.c b/libavcodec/daaladsp.c
> new file mode 100644
> index 0000000..ba03520
> --- /dev/null
> +++ b/libavcodec/daaladsp.c
> @@ -0,0 +1,2123 @@
[...]
> +static av_always_inline void idct_1D_4(pixel *x, int xstride, const pixel y[4])
> +{
> + int t2h, t0 = y[0], t1 = y[1], t2 = y[2], t3 = y[3];
> + t3 += (t1*18293 + 8192) >> 14;
> + t1 -= (t3*21407 + 16384) >> 15;
> + t3 += (t1*23013 + 16384) >> 15;
> + t2 = t0 - t2;
These four lines can overflow.
> + t2h = DAALA_DCT_RSHIFT(t2, 1);
> + t0 -= t2h - DAALA_DCT_RSHIFT(t3, 1);
> + t1 = t2h - t1;
These two, as well.
> + *(x + 0*xstride) = (pixel)t0;
> + *(x + 1*xstride) = (pixel)(t2 - t1);
This subtraction can overflow.
> + *(x + 2*xstride) = (pixel)t1;
> + *(x + 3*xstride) = (pixel)(t0 - t3);
This one, too.
> +}
> +
> +static av_always_inline void idct_1D_8(pixel *x, int xstride, const pixel y[16])
> +{
> + int t1h, t4h, t6h, t0 = y[0], t1 = y[1], t2 = y[2], t3 = y[3], t4 = y[4];
> + int t5 = y[5], t6 = y[6], t7 = y[7];
> + t5 -= (t3*2485 + 4096) >> 13;
> + t3 += (t5*18205 + 16384) >> 15;
> + t5 -= (t3*2485 + 4096) >> 13;
> + t7 -= (t1*3227 + 16384) >> 15;
> + t1 += (t7*6393 + 16384) >> 15;
> + t7 -= (t1*3227 + 16384) >> 15;
> + t1 += t3;
These seven lines can overflow.
> + t1h = DAALA_DCT_RSHIFT(t1, 1);
> + t3 = t1h - t3;
This line, too.
> + t5 += t7;
> + t7 = DAALA_DCT_RSHIFT(t5, 1) - t7;
> + t3 += (t5*7489 + 4096) >> 13;
> + t5 -= (t3*11585 + 8192) >> 14;
> + t3 -= (t5*19195 + 16384) >> 15;
> + t6 += (t2*21895 + 16384) >> 15;
> + t2 -= (t6*15137 + 8192) >> 14;
> + t6 += (t2*21895 + 16384) >> 15;
> + t0 += (t4*13573 + 16384) >> 15;
> + t4 -= (t0*11585 + 8192) >> 14;
> + t0 += (t4*13573 + 16384) >> 15;
These nine lines can also overflow.
> + t4 = t2 - t4;
> + t4h = DAALA_DCT_RSHIFT(t4, 1);
> + t2 = t4h - t2;
> + t6 = t0 - t6;
> + t6h = DAALA_DCT_RSHIFT(t6, 1);
> + t0 -= t6h;
> + t7 = t6h - t7;
> + t6 -= t7;
> + t2 += DAALA_DCT_RSHIFT(t3, 1);
> + t3 = t2 - t3;
> + t5 += t4h;
> + t4 -= t5;
This can overflow.
> + t0 += t1h;
> + t1 = t0 - t1;
> + *(x + 0*xstride) = (pixel)t0;
> + *(x + 1*xstride) = (pixel)t4;
> + *(x + 2*xstride) = (pixel)t2;
> + *(x + 3*xstride) = (pixel)t6;
> + *(x + 4*xstride) = (pixel)t7;
> + *(x + 5*xstride) = (pixel)t3;
> + *(x + 6*xstride) = (pixel)t5;
> + *(x + 7*xstride) = (pixel)t1;
> +}
> +
> +static av_always_inline void idct_1D_16(pixel *x, int xstride, const pixel y[16])
> +{
> + int tfh, tbh, t1h, tdh, t8h, tch, tah, t2h;
> + int t0 = y[0], t1 = y[1], t2 = y[2], t3 = y[3], t4 = y[4], t5 = y[5];
> + int t6 = y[6], t7 = y[7], t8 = y[8], t9 = y[9], ta = y[10], tb = y[11];
> + int tc = y[12], td = y[13], te = y[14], tf = y[15];
> + t1 += (tf*13573 + 16384) >> 15;
> + tf -= (t1*11585 + 8192) >> 14;
> + t1 += ((tf*13573 + 16384) >> 15)+t7;
> + td -= (t3*10947 + 8192) >> 14;
> + t3 += (td*15137 + 8192) >> 14;
> + t5 += (tb*10947 + 8192) >> 14;
> + tb -= (t5*15137 + 8192) >> 14;
> + t5 += (tb*10947 + 8192) >> 14;
> + td += t5 - ((t3*21895 + 16384) >> 15);
These nine lines can overflow.
> + tf = t9 - tf;
> + tb += t3;
> + tfh = DAALA_DCT_RSHIFT(tf, 1);
> + t9 -= tfh;
> + tbh = DAALA_DCT_RSHIFT(tb, 1);
> + t3 += tfh - tbh;
> + t1h = DAALA_DCT_RSHIFT(t1, 1);
> + t7 = t1h - t7 + tbh;
> + tdh = DAALA_DCT_RSHIFT(td, 1);
> + t5 += t1h - tdh;
> + t9 = tdh - t9;
> + td -= t9;
> + tf = t3 - tf;
> + t1 -= t5 + ((tf*20055 + 16384) >> 15);
> + tf += (t1*23059 + 8192) >> 14;
> + t1 -= (tf*5417 + 4096) >> 13;
These three lines, too.
> + tb = t7 - tb;
> + t9 += (t7*14101 + 8192) >> 14;
This line, too.
> + t7 += (t9*3363 + 4096) >> 13;
> + t9 -= (t7*12905 + 8192) >> 14;
> + tb -= (td*4379 + 8192) >> 14;
> + td += (tb*20435 + 8192) >> 14;
> + tb -= (td*17515 + 16384) >> 15;
> + t3 += (t5*851 + 4096) >> 13;
> + t5 += (t3*14699 + 8192) >> 14;
> + t3 -= (t5*1035 + 1024) >> 11;
> + t6 -= (ta*7335 + 16384) >> 15;
> + ta -= (t6*12873 + 8192) >> 14;
> + te += (t2*2873 + 1024) >> 11;
> + t2 += (te*9041 + 16384) >> 15;
> + t6 = DAALA_DCT_RSHIFT(t2, 1) - t6 - ((ta*8593 + 8192) >> 14);
> + te = DAALA_DCT_RSHIFT(ta, 1) - te + ((t2*2275 + 1024) >> 11);
These thirteen lines, too.
> + t2 -= t6;
> + ta -= te;
> + t6 -= (ta*13573 + 16384) >> 15;
> + ta += (t6*11585 + 8192) >> 14;
> + t6 -= (ta*13573 + 16384) >> 15;
> + tc += (t4*9147 + 4096) >> 13;
> + t4 -= (tc*10703 + 8192) >> 14;
> + tc += (t4*23013 + 16384) >> 15;
These six lines, too.
[...]
> +static av_always_inline void idct_1D_32(pixel *x, int xstride, const pixel y[32])
> +{
> + int t0 = y[0], tg = y[1], t8 = y[2], to = y[3], t4 = y[4], tk = y[5];
> + int tc = y[6], ts = y[7], t2 = y[8], ti = y[9], ta = y[10], tq = y[11];
> + int t6 = y[12], tm = y[13], te = y[14], tu = y[15], t1 = y[16], th = y[17];
> + int t9 = y[18], tp = y[19], t5 = y[20], tl = y[21], td = y[22], tt = y[23];
> + int t3 = y[24], tj = y[25], tb = y[26], tr = y[27], t7 = y[28], tn = y[29];
> + int tf = y[30], tv = y[31];
> + OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
> + t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
This can overflow.
[...]
> +static av_always_inline void idct_1D_64(pixel *x, int xstride, const pixel y[64])
> +{
> + int t0 = y[0], tw = y[1], tg = y[2], tM = y[3], t8 = y[4], tE = y[5];
> + int to = y[6], tU = y[7], t4 = y[8], tA = y[9], tk = y[10], tQ = y[11];
> + int tc = y[12], tI = y[13], ts = y[14], tY = y[15], t2 = y[16], ty = y[17];
> + int ti = y[18], tO = y[19], ta = y[20], tG = y[21], tq = y[22], tW = y[23];
> + int t6 = y[24], tC = y[25], tm = y[26], tS = y[27], te = y[28], tK = y[29];
> + int tu = y[30], t_ = y[31], t1 = y[32], tx = y[33], th = y[34], tN = y[35];
> + int t9 = y[36], tF = y[37], tp = y[38], tV = y[39], t5 = y[40], tB = y[41];
> + int tl = y[42], tR = y[43], td = y[44], tJ = y[45], tt = y[46], tZ = y[47];
> + int t3 = y[48], tz = y[49], tj = y[50], tP = y[51], tb = y[52], tH = y[53];
> + int tr = y[54], tX = y[55], t7 = y[56], tD = y[57], tn = y[58], tT = y[59];
> + int tf = y[60], tL = y[61], tv = y[62], t = y[63];
> + OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
> + t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
> + th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
> + tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
This can overflow.
> +static void postfilter_4x4(pixel *dst, const pixel *src)
> +{
> + int t[4];
> + t[3] = src[0]-src[3];
> + t[2] = src[1]-src[2];
> + t[1] = src[1]-(t[2]>>1);
These two lines can overflow.
> + t[0] = src[0]-(t[3]>>1);
> + t[2] -= (t[3]*FILTER_PARAM_4_3+32)>>6;
> + t[3] -= (t[2]*FILTER_PARAM_4_2+32)>>6;
> + #if FILTER_PARAM_4_1 != 64
> + t[3] = t[3]*(1 << 6)/FILTER_PARAM_4_1;
> + #endif
> + #if FILTER_PARAM_4_0 != 64
> + t[2] = t[2]*(1 << 6)/FILTER_PARAM_4_0;
> + #endif
These four multiplications can overflow.
[...]
> +/* Chroma from luma */
> +static void daala_cfl_resample(uint8_t *_dst, int dstride, const uint8_t *_src,
> + int istride, int xdec, int ydec, int bs, int chroma_bs)
> +{
> + int i, j;
> + const int n = 4 << bs;
> + pixel *dst = (pixel *)_dst;
> + pixel *src = (pixel *)_src;
> + if (!chroma_bs && (xdec || ydec)) {
> + if (xdec) {
> + if (ydec) {
> + daala_sf_ful_up(dst, dstride, src, istride, n, n, n);
> + for (i = 0; i < 4; i++) {
> + for (j = 0; j < 4; j++) {
> + dst[i*dstride + j] = (daaladsp_cfl_scale[j][i]*dst[i*dstride + j] + 64) >> 7;
The daaladsp_cfl_scale * dst multiplication can overflow, as well.
Best regards,
Andreas
More information about the ffmpeg-devel
mailing list