[FFmpeg-cvslog] vp9: increase buffer sizes for non-420 chroma subsamplings.
Ronald S. Bultje
git at videolan.org
Fri May 1 22:30:45 CEST 2015
ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Fri Apr 24 11:04:00 2015 -0400| [2d0bea4719588aa9caa3f452596b9748ba13059e] | committer: Ronald S. Bultje
vp9: increase buffer sizes for non-420 chroma subsamplings.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2d0bea4719588aa9caa3f452596b9748ba13059e
---
libavcodec/vp9.c | 67 ++++++++++++++++++++++++++++++------------------------
1 file changed, 37 insertions(+), 30 deletions(-)
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index bd852d4..0b11bdc 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -215,7 +215,7 @@ typedef struct VP9Context {
DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
- DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
+ DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
@@ -248,8 +248,8 @@ typedef struct VP9Context {
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
struct { int x, y; } min_mv, max_mv;
- DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
- DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
+ DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
+ DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
uint16_t mvscale[3][2];
uint8_t mvstep[3][2];
} VP9Context;
@@ -307,39 +307,42 @@ static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
return 0;
}
-static int update_size(AVCodecContext *ctx, int w, int h)
+static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
{
VP9Context *s = ctx->priv_data;
uint8_t *p;
av_assert0(w > 0 && h > 0);
- if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
+ if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
return 0;
- ctx->width = w;
- ctx->height = h;
- s->sb_cols = (w + 63) >> 6;
- s->sb_rows = (h + 63) >> 6;
- s->cols = (w + 7) >> 3;
- s->rows = (h + 7) >> 3;
+ ctx->width = w;
+ ctx->height = h;
+ ctx->pix_fmt = fmt;
+ s->sb_cols = (w + 63) >> 6;
+ s->sb_rows = (h + 63) >> 6;
+ s->cols = (w + 7) >> 3;
+ s->rows = (h + 7) >> 3;
#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
av_freep(&s->intra_pred_data[0]);
- p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
+ // FIXME we slightly over-allocate here for subsampled chroma, but a little
+ // bit of padding shouldn't affect performance...
+ p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
if (!p)
return AVERROR(ENOMEM);
assign(s->intra_pred_data[0], uint8_t *, 64);
- assign(s->intra_pred_data[1], uint8_t *, 32);
- assign(s->intra_pred_data[2], uint8_t *, 32);
+ assign(s->intra_pred_data[1], uint8_t *, 64);
+ assign(s->intra_pred_data[2], uint8_t *, 64);
assign(s->above_y_nnz_ctx, uint8_t *, 16);
assign(s->above_mode_ctx, uint8_t *, 16);
assign(s->above_mv_ctx, VP56mv(*)[2], 16);
+ assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
+ assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
assign(s->above_partition_ctx, uint8_t *, 8);
assign(s->above_skip_ctx, uint8_t *, 8);
assign(s->above_txfm_ctx, uint8_t *, 8);
- assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
- assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
assign(s->above_segpred_ctx, uint8_t *, 8);
assign(s->above_intra_ctx, uint8_t *, 8);
assign(s->above_comp_ctx, uint8_t *, 8);
@@ -358,34 +361,39 @@ static int update_size(AVCodecContext *ctx, int w, int h)
static int update_block_buffers(AVCodecContext *ctx)
{
VP9Context *s = ctx->priv_data;
+ int chroma_blocks, chroma_eobs;
if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
return 0;
av_free(s->b_base);
av_free(s->block_base);
+ chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
+ chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
if (s->frames[CUR_FRAME].uses_2pass) {
int sbs = s->sb_cols * s->sb_rows;
s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
- s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
+ s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
+ 16 * 16 + 2 * chroma_eobs) * sbs);
if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
- s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
- s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
- s->uveob_base[0] = s->eob_base + 256 * sbs;
- s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
+ s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
+ s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
+ s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
+ s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
} else {
s->b_base = av_malloc(sizeof(VP9Block));
- s->block_base = av_mallocz((64 * 64 + 128) * 3);
+ s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
+ 16 * 16 + 2 * chroma_eobs);
if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + 64 * 64;
- s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
- s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
- s->uveob_base[0] = s->eob_base + 256;
- s->uveob_base[1] = s->uveob_base[0] + 64;
+ s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
+ s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
+ s->uveob_base[0] = s->eob_base + 16 * 16;
+ s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
}
s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
@@ -772,8 +780,8 @@ static int decode_frame_header(AVCodecContext *ctx,
}
/* tiling info */
- if ((res = update_size(ctx, w, h)) < 0) {
- av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
+ if ((res = update_size(ctx, w, h, fmt)) < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
return res;
}
for (s->tiling.log2_tile_cols = 0;
@@ -3960,7 +3968,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
memset(s->left_mode_ctx, NEARESTMV, 8);
}
memset(s->left_y_nnz_ctx, 0, 16);
- memset(s->left_uv_nnz_ctx, 0, 16);
+ memset(s->left_uv_nnz_ctx, 0, 32);
memset(s->left_segpred_ctx, 0, 8);
memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
@@ -4089,7 +4097,6 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx)
VP9Context *s = ctx->priv_data;
ctx->internal->allocate_progress = 1;
- ctx->pix_fmt = AV_PIX_FMT_YUV420P;
ff_vp9dsp_init(&s->dsp);
ff_videodsp_init(&s->vdsp, 8);
s->filter.sharpness = -1;
More information about the ffmpeg-cvslog
mailing list