[FFmpeg-devel] [PATCH] Mimic encoder

Michael Niedermayer michaelni
Tue Apr 29 14:45:49 CEST 2008


On Tue, Apr 29, 2008 at 03:53:44AM +0100, Ramiro Polla wrote:
> Hello,
>
> Attached files add a Mimic encoder to lavc, divided in two parts:
>
> The 01_* files split the current mimic.c into mimic.c, mimicdec.c and 
> mimic.h. The files attached are not diff'd against their ancient (except 
> for mimicdec.c) to ease review, but on commit it'll be done with svn cp.

The split looks ok


[...]
> static VlcSymbol vlc_alphabet_gen[16][8] = {
> { {  3, 0x00000001,  0, 0x00000000, }, {  4, 0x00000007,  0, 0x00000000, },
>   {  6, 0x00000027,  0, 0x00000000, }, {  8, 0x000000cf,  0, 0x00000000, },
>   { 10, 0x0000035f,  0, 0x00000000, }, { 12, 0x00000eff,  0, 0x00000000, },
>   { 17, 0x0001fd7f,  0, 0x00000000, }, { 17, 0x0001fd01,  0, 0x00000000, }, },
> { {  5, 0x00000017,  0, 0x00000000, }, {  8, 0x000000e7,  0, 0x00000000, },
>   {  9, 0x000001d7,  0, 0x00000000, }, { 12, 0x00000f8f,  0, 0x00000000, },
>   { 15, 0x00007f1f,  0, 0x00000000, }, { 16, 0x0000fe7f,  0, 0x00000000, },
>   { 27, 0x07fffff9,  7, 0x0000007f, }, { 27, 0x07fffff9,  7, 0x00000001, }, },
> { {  6, 0x00000037,  0, 0x00000000, }, {  9, 0x000001ef,  0, 0x00000000, },
>   { 12, 0x00000fd7,  0, 0x00000000, }, { 13, 0x00001fbf,  0, 0x00000000, },
>   { 25, 0x01ffff7f,  0, 0x00000000, }, { 30, 0x3ffffe3f,  0, 0x00000000, },
>   { 27, 0x07fffffa,  7, 0x0000007f, }, { 27, 0x07fffffa,  7, 0x00000001, }, },
> { {  7, 0x00000071,  0, 0x00000000, }, { 10, 0x000003ef,  0, 0x00000000, },
>   { 17, 0x0001ffdf,  0, 0x00000000, }, { 21, 0x001fffbf,  0, 0x00000000, },
>   { 26, 0x03ffff1f,  0, 0x00000000, }, { 30, 0x3ffffe7f,  0, 0x00000000, },
>   { 27, 0x07fffffb,  7, 0x0000007f, }, { 27, 0x07fffffb,  7, 0x00000001, }, },
> { {  8, 0x000000f1,  0, 0x00000000, }, { 11, 0x000007e3,  0, 0x00000000, },
>   { 18, 0x0003ffc7,  0, 0x00000000, }, { 22, 0x003fff8f,  0, 0x00000000, },
>   { 26, 0x03ffff3f,  0, 0x00000000, }, { 30, 0x3ffffebf,  0, 0x00000000, },
>   { 28, 0x0ffffff8,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { {  8, 0x000000f3,  0, 0x00000000, }, { 11, 0x000007e7,  0, 0x00000000, },
>   { 18, 0x0003ffcf,  0, 0x00000000, }, { 22, 0x003fff9f,  0, 0x00000000, },
>   { 26, 0x03ffff5f,  0, 0x00000000, }, { 30, 0x3ffffeff,  0, 0x00000000, },
>   { 28, 0x0ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { {  8, 0x000000f5,  0, 0x00000000, }, { 14, 0x00003feb,  0, 0x00000000, },
>   { 18, 0x0003ffd7,  0, 0x00000000, }, { 22, 0x003fffaf,  0, 0x00000000, },
>   { 26, 0x03ffff7f,  0, 0x00000000, }, { 31, 0x7ffffe3f,  0, 0x00000000, },
>   { 28, 0x0ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { {  9, 0x000001f3,  0, 0x00000000, }, { 14, 0x00003fef,  0, 0x00000000, },
>   { 18, 0x0003ffdf,  0, 0x00000000, }, { 22, 0x003fffbf,  0, 0x00000000, },
>   { 27, 0x07ffff1f,  0, 0x00000000, }, { 31, 0x7ffffe7f,  0, 0x00000000, },
>   { 28, 0x0ffffffb,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { {  9, 0x000001f5,  0, 0x00000000, }, { 15, 0x00007fe3,  0, 0x00000000, },
>   { 19, 0x0007ffc7,  0, 0x00000000, }, { 23, 0x007fff8f,  0, 0x00000000, },
>   { 27, 0x07ffff3f,  0, 0x00000000, }, { 31, 0x7ffffebf,  0, 0x00000000, },
>   { 29, 0x1ffffff8,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 11, 0x000007f7,  0, 0x00000000, }, { 15, 0x00007fe7,  0, 0x00000000, },
>   { 19, 0x0007ffcf,  0, 0x00000000, }, { 23, 0x007fff9f,  0, 0x00000000, },
>   { 27, 0x07ffff5f,  0, 0x00000000, }, { 31, 0x7ffffeff,  0, 0x00000000, },
>   { 29, 0x1ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 12, 0x00000ff1,  0, 0x00000000, }, { 15, 0x00007feb,  0, 0x00000000, },
>   { 19, 0x0007ffd7,  0, 0x00000000, }, { 23, 0x007fffaf,  0, 0x00000000, },
>   { 27, 0x07ffff7f,  0, 0x00000000, }, { 32, 0xfffffe3f,  0, 0x00000000, },
>   { 29, 0x1ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 12, 0x00000ff3,  0, 0x00000000, }, { 15, 0x00007fef,  0, 0x00000000, },
>   { 19, 0x0007ffdf,  0, 0x00000000, }, { 23, 0x007fffbf,  0, 0x00000000, },
>   { 28, 0x0fffff1f,  0, 0x00000000, }, { 32, 0xfffffe7f,  0, 0x00000000, },
>   { 29, 0x1ffffffb,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 12, 0x00000ff5,  0, 0x00000000, }, { 16, 0x0000ffe3,  0, 0x00000000, },
>   { 20, 0x000fffc7,  0, 0x00000000, }, { 24, 0x00ffff8f,  0, 0x00000000, },
>   { 28, 0x0fffff3f,  0, 0x00000000, }, { 32, 0xfffffebf,  0, 0x00000000, },
>   { 30, 0x1fff7400,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 12, 0x00000ff7,  0, 0x00000000, }, { 16, 0x0000ffe7,  0, 0x00000000, },
>   { 20, 0x000fffcf,  0, 0x00000000, }, { 24, 0x00ffff9f,  0, 0x00000000, },
>   { 28, 0x0fffff5f,  0, 0x00000000, }, { 32, 0xfffffeff,  0, 0x00000000, },
>   { 30, 0x3ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 13, 0x00001ff1,  0, 0x00000000, }, { 16, 0x0000ffeb,  0, 0x00000000, },
>   { 20, 0x000fffd7,  0, 0x00000000, }, { 24, 0x00ffffaf,  0, 0x00000000, },
>   { 28, 0x0fffff7f,  0, 0x00000000, }, { 27, 0x07fffff8,  6, 0x0000003f, },
>   { 30, 0x3ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
> { { 13, 0x00001ff3,  0, 0x00000000, }, {  2, 0x00000003,  0, 0x00000000, },
>   {  3, 0x00000007,  0, 0x00000000, }, { 31, 0x7ffffffb,  4, 0x0000000f, },
>   {  5, 0x0000001f,  0, 0x00000000, }, {  6, 0x0000003f,  0, 0x00000000, },
>   {  7, 0x0000007f,  0, 0x00000000, }, {  0, 0x00000000,  0, 0x00000000, }, },
> };

This looks duplicated from mimic.c

[...]
> static int mimic_encode_init(AVCodecContext *avctx)
> {
>     MimicContext *ctx = avctx->priv_data;
>     int i;
> 
>     if(!(avctx->width == 160 && avctx->height == 120) &&
>        !(avctx->width == 320 && avctx->height == 240)) {
>         av_log(avctx, AV_LOG_ERROR, "size must be 320x240 or 160x120\n");
>         return -1;
>     }
> 
>     ctx->avctx = avctx;
> 
>     for (i = 0 ; i < 3 ; i++) {
>         ctx->num_vblocks[i] = -((-avctx->height) >> (3 + !!i));
>         ctx->num_hblocks[i] =     avctx->width   >> (3 + !!i) ;
>     }
> 
>     ctx->cur_index = 15;
>     ctx->num_coeffs = 28;
> 
>     for (i = 0; i < 16; i++) {
>         if(avctx->get_buffer(avctx, &ctx->buf_ptrs[i])) {
>             av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
>             return -1;
>         }
>         ff_mimic_prepare_avpic(ctx, &ctx->flipped_ptrs[i],
>               (AVPicture*) &ctx->buf_ptrs    [i]);
>     }
> 
>     /* TODO Add a way to get quality per frame from the context. */
>     ctx->quality = ENCODER_QUALITY_DEFAULT;
> 

>     avcodec_get_frame_defaults((AVFrame*)&ctx->picture);
>     avctx->coded_frame = (AVFrame*)&ctx->picture;

senseless casts

[...]
> static void vlc_encode_block(MimicContext *ctx,
>                               DCTELEM *idct_block, const DCTELEM *dct_block,
>                               int num_coeffs, int qscale)
> {
>     const int qscale_dec = qscale << 2;
>     int num_zeroes = 0;
>     int value;
>     int i;
> 
>     memset(idct_block, 0, sizeof(DCTELEM)*64);
> 
>     value = shift_rnd(dct_block[0], 6);
>     idct_block[0] = value << 3;
> 
>     /* The DC value is written out as is. */
>     put_bits(&ctx->pb, 8, value);
> 
>     for (i = 1; i < num_coeffs && num_zeroes <= 14; i++) {
                                               ^^ ^^
If you cannot encode >14 zeros but there are >14 zeros then you should
check if its better to encoder the element most different from zero as
non zero or to encode the rest of the block as non zero.


>         int coeff;
>         value = dct_block[ff_zigzag_transposed[i]];
> 
>         if(i < 3) {
>             value = av_clip(shift_rnd(value, 7), -120, 120);
>             coeff = value << 4;
>         } else {

>             value = av_rescale(shift_rnd(value,5),1001,qscale);

no, double rounding/quantization is not acceptable

Also we have existing dct_quantize() code which also exists in MMX&co, 
that should be used. And if needed be cleaned up so it is cleanly useable.


>             value = av_clip(value, -120, 120);
>             coeff = (value * qscale_dec) / 1001;
>         }
> 
>         idct_block[ctx->scantable.permutated[i]] = coeff;
> 
>         if(value) {
>             VlcSymbol sym = vlc_alphabet[num_zeroes][FFABS(value) - 1];
> 

>             if(sym.length1 <= 0)
>                 break;

When can this be true?


[...]
> static uint8_t *encode_diff(MimicContext *ctx, int plane, int is_chroma,
>                             uint8_t *cur, uint8_t *prev, int offset,
>                             int stride, int rows)
> {
>     uint8_t *ret = NULL;
>     int match;
>     int i;
> 
>     match = ctx->dsp.sse[1](NULL, prev, cur, stride, rows);

This should use the function selected with mb_cmp


> 
>     if(match < threshold[is_chroma]) {
>         put_bits(&ctx->pb, 1, !is_chroma);
>         ret = prev;
>     } else if(!is_chroma) {
>         int num_backrefs = av_clip(ctx->avctx->frame_number - 1, 0, 15);
>         int best_match = threshold[is_chroma];
>         int best_index = 0;
> 
>         put_bits(&ctx->pb, 1, 0);
> 
>         for (i = 1; i <= num_backrefs; i++) {
>             int backref = (ctx->cur_index + i) & 15;
>             uint8_t *backbuf = ctx->flipped_ptrs[backref].data[plane] + offset;
> 
>             match = ctx->dsp.sse[1](NULL, backbuf, cur, stride, rows);
> 
>             if(match < best_match) {
>                 best_index = i;
>                 best_match = match;
>             }
>         }
> 
>         if(best_index) {
>             int backref = (ctx->cur_index + best_index) & 15;
> 
>             ret = ctx->flipped_ptrs[backref].data[plane] + offset;
>             put_bits(&ctx->pb, 1, 1);
>             put_bits(&ctx->pb, 4, best_index);
>         }
>     }

You should encode the block in all possible choices and select the one
which minimizes SSE + lambda2*bitrate. Where lambda2 is a constant set
based on quality. At least when mb_decission == FF_MB_DECISION_RD
if mb_decission is something else then you can use such a heuristic
as above ...


lambda= AVFrame.quality;
lambda2= (lambda*lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;

and then minimize (SSE<<FF_LAMBDA_SHIFT) + lambda2*bitrate

Why is this better than a simple threshold?
Well, if you have 2 independant blocks and you choose for each the encoding
which minimizes SSE[block_i] + C*bitrate[block_i] then its obvious that
both together will be encoded so as SSE + C*bitrate of both are at their
global minimum.

Why is this mimimum a good choice?
Because each such minimum for each specific constant corresponds to a
encoding which maximizes the quality (minimizes sse) for a given bitrate.


> 
>     return ret;
> }
> 

> static void encode_plane(MimicContext *ctx, int plane, int is_pframe)
> {
>     const int is_chroma = !!plane;
>     const int stride = ctx->flipped_ptrs[ctx->cur_index ].linesize[plane];
>     uint8_t *cur     = ctx->flipped_ptrs[ctx->cur_index ].data    [plane];
>     uint8_t *prev    = ctx->flipped_ptrs[ctx->prev_index].data    [plane];
>     const int qscale = av_clip(10000-ctx->quality,is_chroma?1000:2000,10000);
>     int rows_shift = 0;
>     int offset = 0;
>     int x, y;
> 

>     /* Bleed bottom line for 160x120 videos */
>     if(plane && ctx->avctx->height & 15) {
>         ctx->dsp.draw_edges(cur, stride,
>                             ctx->avctx->width>>1, ctx->avctx->height>>1, 4);
>         rows_shift = 8;
>     }

Reflection is better then duplication for "invissible pixels" quality wise
IIRC.

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The educated differ from the uneducated as much as the living from the
dead. -- Aristotle 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080429/bb3c1831/attachment.pgp>



More information about the ffmpeg-devel mailing list