[Ffmpeg-devel] [RFC] [PATCH] FLC/FLX/DTA encoder

Thu Feb 22 17:17:00 CET 2007

Hi

On Sun, Feb 18, 2007 at 02:19:11AM +0100, Alex Beregszaszi wrote:
> Hi,
> 
> attached is a huge patch adding three decoders: FLC, FLX and DTA.
> 
> FLC is the 8bit RLE based format, FLX supports 15bits, while DTA adds
> new compression methods to FLX.
> 
> The IFR (interframe reoder) code in palette.c reorders a frame in order
> to achieve only palette changes, thus possibly never emit any newly
> coded frames, but only palette differences. It is used in the encoder.
> 
> Known:
> * current patch has hard tabs and maybe identation issues (but any
> cosmetic recommendations are welcomed)
> 
> Question:
> * should it be splitted into flicenc.c ?
> 
> Applying: patch is made against current SVN. You need to place palette.c
> and h into libavcodec/.
> 
> --
> Alex Beregszaszi
> 

[...]

> +    uint32_t *pal = (uint32_t*)p->data[1], *oldpal = (uint32_t*)s->prev_frame.data[1];

id add a newline in the middle for readability

[...]
> +static int encode_black(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> +    int i;
> +
> +    // FIXME: make it faster
> +    for (i = 0; i < avctx->height*p->linesize[0]; i++)
> +        if (p->data[0][i] != 0)
> +            break;
> +
> +    if (i != avctx->height*p->linesize[0])
> +        return -1; // not full black

for (i = 0; i < avctx->height*p->linesize[0]; i++)
    if (p->data[0][i])
        return -1; // not full black

> +
> +    AV_WL32(buf, 6); // size field
> +    AV_WL16(buf+4, FLI_BLACK); // type
> +
> +    return 6;
> +}
> +
> +static int encode_copy(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> +    int pos = 6, i;
> +    const int stride = avctx->width*((avctx->bits_per_sample+7)/8);

as avctx->width*((avctx->bits_per_sample+7)/8) occurs several times maybe a
bytes_per_line in FlicEncodeContext would be a good idea?

[...]
> +static int encode_brun(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> +    int lines, pos = 6, y_ptr = 0, pixel_ptr;
> +    unsigned char *pixels = p->data[0];
> +    const int stride = avctx->width*((avctx->bits_per_sample+7)/8);
> +//    const int stride = avctx->width;
> +
> +    AV_WL16(buf+4, FLI_BRUN); // type
> +    
> +    for (lines = 0; lines < avctx->height; lines++) {
> +	int nppos = pos, npackets = 0;
> +	int pixel_countdown = stride;
> +	int copy_pos = pos; 
> +	unsigned int run_count = 0;
> +
> +	pixel_ptr = y_ptr;
> +
> +	pos++; // number of packets, patched later
> +
> +	while (pixel_countdown > 0) {
> +	    // no point checking for a run if only 1 byte left
> +	    if ((pixel_countdown > 2) && 
> +		(pixels[pixel_ptr] == pixels[pixel_ptr+1]) &&
> +		(pixels[pixel_ptr] == pixels[pixel_ptr+2])) {
> +		// 3 bytes the same in a row, so start counting the run
> +		// 3 is the threshold, because a run of two forces and extra byte
> +		
> +		// flush old run
> +		if (run_count != 0) {
> +		    AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> +		    run_count = 0;
> +		}
> +
> +		run_count = 3;
> +		while ((pixels[pixel_ptr] == pixels[pixel_ptr+run_count]) && 
> +		       (run_count < pixel_countdown) && 
> +		       (run_count < 127)) {
> +		    run_count++;
> +		}
> +		npackets++;
> +		AV_WL8(buf+pos, (signed char)(run_count));

what does the signed char cast do here?

> +		pos++;
> +		AV_WL8(buf+pos, pixels[pixel_ptr]);
> +		pos++;
> +		pixel_ptr       += run_count;
> +		pixel_countdown -= run_count;
> +		run_count = 0;
> +	    } else {
> +	        // 3 bytes in a row not able to be RLE'd, so emit the copy
> +
> +		// flush old run
> +	        if (run_count == 128) {
> +		    AV_WL8(buf+copy_pos, (signed char)(0-run_count));

AV_WL8(buf+copy_pos, 128);

> +		    run_count = 0;
> +                }
> +
> +		// start of a copy run
> +		if (run_count == 0)
> +		{
> +		    copy_pos = pos; // written later 
> +		    pos++;
> +		    npackets++;
> +		}
> +		run_count++;
> +		AV_WL8(buf+pos, pixels[pixel_ptr]);
> +		pos++;
> +		pixel_ptr++;
> +		pixel_countdown--;
> +	    }
> +        }
> +
> +	// flush old run
> +	if (run_count != 0) {
> +	    AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> +	    run_count = 0;
> +        }
> +
> +	// this frame would be invalid this way, so just report failed
> +	// FIXME: try solvin it in a better way
> +	if (npackets > 255) 
> +	    return -1;
> +
> +	AV_WL8(buf+nppos, npackets);

what about

while (pixel < end) {
    int run=1;

    while(pixel+1 < end && run < 127 && pixel[0] == pixel[1]){
        run++;
        pixel++;
    }
    if(run > limit){
        *buf++= run;
        *buf++= *pixel++;
    }else{
        pixel -= run - 2;
        run=1;
        while(pixel+1 < end && run < 127 && pixel[0] != pixel[1]){
            run++;
            pixel++;
        }
        *buf++= - run;
        memcpy(buf, pixel-run, run);
        buf+= run;
    }
    npackets++;
    if(npackets>255){
        limit++;
        goto retry_from_begin;
    }
}

> +	
> +	y_ptr += p->linesize[0];
> +    }
> +
> +    // padding
> +    if ((pos % 2) == 1) {
> +	AV_WL8(buf+pos, 0);
> +	pos++;
> +    }
> +    
> +    AV_WL32(buf, pos); // size field
> +
> +    return pos;
> +}
> +
> +static int encode_dta_brun(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> +    int lines, pos = 6, y_ptr = 0, pixel_ptr;
> +    unsigned short *pixels = (unsigned short*)p->data[0];
> +
> +    // FIXME: fix this to handle 24bit input too
> +    if (avctx->codec_id != CODEC_ID_FLIDTA ||
> +	avctx->bits_per_sample == 8 ||
> +	avctx->bits_per_sample == 24)
> +	return -1;
> +
> +    AV_WL16(buf+4, FLI_DTA_BRUN); // type
> +    
> +    for (lines = 0; lines < avctx->height; lines++) {
> +	int nppos = pos, npackets = 0;
> +	int pixel_countdown = avctx->width;
> +	int copy_pos = pos; 
> +	unsigned int run_count = 0;
> +
> +	pixel_ptr = y_ptr;
> +
> +	pos++; // number of packets, patched later
> +
> +	while (pixel_countdown > 0) {
> +	    // no point checking for a run if only 1 byte left
> +	    if ((pixel_countdown > 1) && 
> +		(pixels[pixel_ptr] == pixels[pixel_ptr+1])) {
> +		// 2 bytes the same in a row, so start counting the run
> +		
> +		// flush old run
> +		if (run_count != 0) {
> +		    AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> +		    run_count = 0;
> +		}
> +
> +		run_count = 2;
> +		while ((pixels[pixel_ptr] == pixels[pixel_ptr+run_count]) && 
> +		       (run_count < pixel_countdown) && 
> +		       (run_count < 127)) {
> +		    run_count++;
> +		}
> +		npackets++;
> +		AV_WL8(buf+pos, (signed char)(run_count));
> +		pos++;
> +		AV_WL16(buf+pos, pixels[pixel_ptr]);
> +		pos += 2;
> +		pixel_ptr       += run_count;
> +		pixel_countdown -= run_count;
> +		run_count = 0;
> +	    } else {
> +		// 2 bytes in a row not able to be RLE'd, so emit the copy
> +		
> +		// flush old run
> +	        if (run_count == 128) {
> +		    AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> +		    run_count = 0;
> +                }
> +
> +                // start of a copy run
> +		if (run_count == 0)
> +		{
> +		    copy_pos = pos; // written later 
> +		    pos++;
> +		    npackets++;
> +		}
> +		run_count++;
> +		AV_WL16(buf+pos, pixels[pixel_ptr]);
> +		pos += 2;
> +		pixel_ptr++;
> +		pixel_countdown--;
> +	    }
> +        }
> +
> +	// flush old run
> +	if (run_count != 0) {
> +	    AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> +	    run_count = 0;
> +        }
> +
> +	// this frame would be invalid this way, so just report failed
> +	// FIXME: try solvin it in a better way
> +	if (npackets > 255) 
> +	    return -1;
> +
> +	AV_WL8(buf+nppos, npackets);
> +	
> +	y_ptr += p->linesize[0]/((avctx->bits_per_sample+7)/8);
> +    }
> +
> +    // padding
> +    if ((pos % 2) == 1) {
> +	AV_WL8(buf+pos, 0);
> +	pos++;
> +    }
> +        
> +    AV_WL32(buf, pos); // size field
> +
> +    return pos;
> +}

very similar to encode_brun, this should be merged

> +
> +static int encode_lc(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {

this too and several other functions are very similar to the previous rle
encoders and must be merged

[...]

> +
> +// 8 bit FLC DELTA
> +static int encode_delta8(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
[...]
> +			    while (run_count > 254) {
> +				npackets++;
> +				AV_WL8(buf+pos, 254); // skip
> +				pos++;
> +				AV_WL8(buf+pos, 0x00); 
> +				pos++;

these really should be *bug++= X or bytestream_put_be16() could be used both
should be more readable

also the function is VERY messy and full of duplicated code

[...]
> +    // FIXME: this is rather unoptimal, but simple
> +    for (i = 0; flic_encoders[i>>1].encode != NULL; i++) {
> +        if ((avctx->codec_id == CODEC_ID_FLIC && !flic_encoders[i>>1].is_flc) ||
> +            (avctx->codec_id == CODEC_ID_FLIX && !flic_encoders[i>>1].is_flx) ||
> +            (avctx->codec_id == CODEC_ID_FLIDTA && !flic_encoders[i>>1].is_dta))
> +            continue;
> +        if (i & 1) {
> +            if (avctx->bits_per_sample == 8 && remapped)
> +                sizes[i] = flic_encoders[i>>1].encode(avctx, &(s->remap_frame), buf+chunkpos);
> +            else
> +                sizes[i] = -1;

using INT_MAX should avoid a few checks below

> +        } else
> +            sizes[i] = flic_encoders[i>>1].encode(avctx, p, buf+chunkpos);
> +//        av_log(avctx, AV_LOG_INFO, "Encoder %s (%s) size: %d + palette: %d\n",
> +//            flic_encoders[i>>1].name, (i&1) ? "remapped" : "original",
> +//            sizes[i], palette_sizes[i&1]);
> +        if (sizes[i] != -1)
> +            sizes[i] += palette_sizes[i&1];
> +        if (sizes[i] != -1 && ((sizes[i] < cur) || cur == -1)) {
> +            cur = sizes[i];
> +            best = i;
> +        }
> +    }
[...]

> /*
>  * Palette optimisations
>  * Copyright (c) 2006 Sakura Industries Ltd.
>  * Author: Steven Johnson

the palette stuff belongs into a seperate patch!

[...]
> typedef struct IFR_Match {
>     uint32_t CurIdx;			  /// Index in Current
>     uint32_t PrevIdx;			  /// that matches this Index in previous
> 
>     void *AsocState;                      /// Associated state -needed for qsort.
> } IFR_Match;

IIRC doxygen wants ///< for comments placed like that

[...]
> static int ifr_matchcompare(const void* a, const void* b) {
>     // We want qsort to sort in ascending order, so comparison test is reversed.
>     IFR_Match *f = *(IFR_Match**) a; 
>     IFR_Match *s = *(IFR_Match**) b;
>     
>     // note: f->AsocState->ConcordanceMatrix must == s->AsocState->ConcordanceMatrix
>     // so we use the same matrix, from f as it should generate more efficient code.
>     IFRPaletteOptimiseState *state = (IFRPaletteOptimiseState *)f->AsocState;
> 
>     if (state->ConcordanceMatrix[f->PrevIdx][f->CurIdx] <
>         state->ConcordanceMatrix[s->PrevIdx][s->CurIdx]) {
> 	return 1;
>     } else if (state->ConcordanceMatrix[f->PrevIdx][f->CurIdx] >
>                state->ConcordanceMatrix[s->PrevIdx][s->CurIdx]) {
> 	return -1;
>     }

return value0-value1; should do

> 
>     return 0; // Must be equal.
> }
> 
> int ff_ifr_optimise(AVFrame *cur, AVFrame *prev, AVFrame *remap, int width, int height)
> {

very very messy

concordance[256*256][2];
for all pixels
    concordance[256*prevpix + pixel][0]++;
for all x
    concordance[x][1]= x;

sort concordance[2] elements per [0]

for(x=0; x<256*256; x++){
    int match= concordance[x][1];
    int m0= match & 255;
    int m1= match >> 8;
    if(mapped[0][m0]>=0 || mapped[1][m1]>=0)
        continue;
    mapped[0][m0]= m1;
    mapped[1][m1]= m0;
}

PS: the solution is of course not optimal but i see no obvious way how to
find the optimal solution in P time

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070222/b034b24c/attachment.pgp>