[Ffmpeg-devel] [RFC] [PATCH] FLC/FLX/DTA encoder
Michael Niedermayer
michaelni
Thu Feb 22 17:17:00 CET 2007
Hi
On Sun, Feb 18, 2007 at 02:19:11AM +0100, Alex Beregszaszi wrote:
> Hi,
>
> attached is a huge patch adding three decoders: FLC, FLX and DTA.
>
> FLC is the 8bit RLE based format, FLX supports 15bits, while DTA adds
> new compression methods to FLX.
>
> The IFR (interframe reoder) code in palette.c reorders a frame in order
> to achieve only palette changes, thus possibly never emit any newly
> coded frames, but only palette differences. It is used in the encoder.
>
> Known:
> * current patch has hard tabs and maybe identation issues (but any
> cosmetic recommendations are welcomed)
>
> Question:
> * should it be splitted into flicenc.c ?
>
> Applying: patch is made against current SVN. You need to place palette.c
> and h into libavcodec/.
>
> --
> Alex Beregszaszi
>
[...]
> + uint32_t *pal = (uint32_t*)p->data[1], *oldpal = (uint32_t*)s->prev_frame.data[1];
id add a newline in the middle for readability
[...]
> +static int encode_black(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> + int i;
> +
> + // FIXME: make it faster
> + for (i = 0; i < avctx->height*p->linesize[0]; i++)
> + if (p->data[0][i] != 0)
> + break;
> +
> + if (i != avctx->height*p->linesize[0])
> + return -1; // not full black
for (i = 0; i < avctx->height*p->linesize[0]; i++)
if (p->data[0][i])
return -1; // not full black
> +
> + AV_WL32(buf, 6); // size field
> + AV_WL16(buf+4, FLI_BLACK); // type
> +
> + return 6;
> +}
> +
> +static int encode_copy(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> + int pos = 6, i;
> + const int stride = avctx->width*((avctx->bits_per_sample+7)/8);
as avctx->width*((avctx->bits_per_sample+7)/8) occurs several times maybe a
bytes_per_line in FlicEncodeContext would be a good idea?
[...]
> +static int encode_brun(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> + int lines, pos = 6, y_ptr = 0, pixel_ptr;
> + unsigned char *pixels = p->data[0];
> + const int stride = avctx->width*((avctx->bits_per_sample+7)/8);
> +// const int stride = avctx->width;
> +
> + AV_WL16(buf+4, FLI_BRUN); // type
> +
> + for (lines = 0; lines < avctx->height; lines++) {
> + int nppos = pos, npackets = 0;
> + int pixel_countdown = stride;
> + int copy_pos = pos;
> + unsigned int run_count = 0;
> +
> + pixel_ptr = y_ptr;
> +
> + pos++; // number of packets, patched later
> +
> + while (pixel_countdown > 0) {
> + // no point checking for a run if only 1 byte left
> + if ((pixel_countdown > 2) &&
> + (pixels[pixel_ptr] == pixels[pixel_ptr+1]) &&
> + (pixels[pixel_ptr] == pixels[pixel_ptr+2])) {
> + // 3 bytes the same in a row, so start counting the run
> + // 3 is the threshold, because a run of two forces and extra byte
> +
> + // flush old run
> + if (run_count != 0) {
> + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> + run_count = 0;
> + }
> +
> + run_count = 3;
> + while ((pixels[pixel_ptr] == pixels[pixel_ptr+run_count]) &&
> + (run_count < pixel_countdown) &&
> + (run_count < 127)) {
> + run_count++;
> + }
> + npackets++;
> + AV_WL8(buf+pos, (signed char)(run_count));
what does the signed char cast do here?
> + pos++;
> + AV_WL8(buf+pos, pixels[pixel_ptr]);
> + pos++;
> + pixel_ptr += run_count;
> + pixel_countdown -= run_count;
> + run_count = 0;
> + } else {
> + // 3 bytes in a row not able to be RLE'd, so emit the copy
> +
> + // flush old run
> + if (run_count == 128) {
> + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
AV_WL8(buf+copy_pos, 128);
> + run_count = 0;
> + }
> +
> + // start of a copy run
> + if (run_count == 0)
> + {
> + copy_pos = pos; // written later
> + pos++;
> + npackets++;
> + }
> + run_count++;
> + AV_WL8(buf+pos, pixels[pixel_ptr]);
> + pos++;
> + pixel_ptr++;
> + pixel_countdown--;
> + }
> + }
> +
> + // flush old run
> + if (run_count != 0) {
> + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> + run_count = 0;
> + }
> +
> + // this frame would be invalid this way, so just report failed
> + // FIXME: try solvin it in a better way
> + if (npackets > 255)
> + return -1;
> +
> + AV_WL8(buf+nppos, npackets);
what about
while (pixel < end) {
int run=1;
while(pixel+1 < end && run < 127 && pixel[0] == pixel[1]){
run++;
pixel++;
}
if(run > limit){
*buf++= run;
*buf++= *pixel++;
}else{
pixel -= run - 2;
run=1;
while(pixel+1 < end && run < 127 && pixel[0] != pixel[1]){
run++;
pixel++;
}
*buf++= - run;
memcpy(buf, pixel-run, run);
buf+= run;
}
npackets++;
if(npackets>255){
limit++;
goto retry_from_begin;
}
}
> +
> + y_ptr += p->linesize[0];
> + }
> +
> + // padding
> + if ((pos % 2) == 1) {
> + AV_WL8(buf+pos, 0);
> + pos++;
> + }
> +
> + AV_WL32(buf, pos); // size field
> +
> + return pos;
> +}
> +
> +static int encode_dta_brun(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> + int lines, pos = 6, y_ptr = 0, pixel_ptr;
> + unsigned short *pixels = (unsigned short*)p->data[0];
> +
> + // FIXME: fix this to handle 24bit input too
> + if (avctx->codec_id != CODEC_ID_FLIDTA ||
> + avctx->bits_per_sample == 8 ||
> + avctx->bits_per_sample == 24)
> + return -1;
> +
> + AV_WL16(buf+4, FLI_DTA_BRUN); // type
> +
> + for (lines = 0; lines < avctx->height; lines++) {
> + int nppos = pos, npackets = 0;
> + int pixel_countdown = avctx->width;
> + int copy_pos = pos;
> + unsigned int run_count = 0;
> +
> + pixel_ptr = y_ptr;
> +
> + pos++; // number of packets, patched later
> +
> + while (pixel_countdown > 0) {
> + // no point checking for a run if only 1 byte left
> + if ((pixel_countdown > 1) &&
> + (pixels[pixel_ptr] == pixels[pixel_ptr+1])) {
> + // 2 bytes the same in a row, so start counting the run
> +
> + // flush old run
> + if (run_count != 0) {
> + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> + run_count = 0;
> + }
> +
> + run_count = 2;
> + while ((pixels[pixel_ptr] == pixels[pixel_ptr+run_count]) &&
> + (run_count < pixel_countdown) &&
> + (run_count < 127)) {
> + run_count++;
> + }
> + npackets++;
> + AV_WL8(buf+pos, (signed char)(run_count));
> + pos++;
> + AV_WL16(buf+pos, pixels[pixel_ptr]);
> + pos += 2;
> + pixel_ptr += run_count;
> + pixel_countdown -= run_count;
> + run_count = 0;
> + } else {
> + // 2 bytes in a row not able to be RLE'd, so emit the copy
> +
> + // flush old run
> + if (run_count == 128) {
> + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> + run_count = 0;
> + }
> +
> + // start of a copy run
> + if (run_count == 0)
> + {
> + copy_pos = pos; // written later
> + pos++;
> + npackets++;
> + }
> + run_count++;
> + AV_WL16(buf+pos, pixels[pixel_ptr]);
> + pos += 2;
> + pixel_ptr++;
> + pixel_countdown--;
> + }
> + }
> +
> + // flush old run
> + if (run_count != 0) {
> + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> + run_count = 0;
> + }
> +
> + // this frame would be invalid this way, so just report failed
> + // FIXME: try solvin it in a better way
> + if (npackets > 255)
> + return -1;
> +
> + AV_WL8(buf+nppos, npackets);
> +
> + y_ptr += p->linesize[0]/((avctx->bits_per_sample+7)/8);
> + }
> +
> + // padding
> + if ((pos % 2) == 1) {
> + AV_WL8(buf+pos, 0);
> + pos++;
> + }
> +
> + AV_WL32(buf, pos); // size field
> +
> + return pos;
> +}
very similar to encode_brun, this should be merged
> +
> +static int encode_lc(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
this too and several other functions are very similar to the previous rle
encoders and must be merged
[...]
> +
> +// 8 bit FLC DELTA
> +static int encode_delta8(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
[...]
> + while (run_count > 254) {
> + npackets++;
> + AV_WL8(buf+pos, 254); // skip
> + pos++;
> + AV_WL8(buf+pos, 0x00);
> + pos++;
these really should be *bug++= X or bytestream_put_be16() could be used both
should be more readable
also the function is VERY messy and full of duplicated code
[...]
> + // FIXME: this is rather unoptimal, but simple
> + for (i = 0; flic_encoders[i>>1].encode != NULL; i++) {
> + if ((avctx->codec_id == CODEC_ID_FLIC && !flic_encoders[i>>1].is_flc) ||
> + (avctx->codec_id == CODEC_ID_FLIX && !flic_encoders[i>>1].is_flx) ||
> + (avctx->codec_id == CODEC_ID_FLIDTA && !flic_encoders[i>>1].is_dta))
> + continue;
> + if (i & 1) {
> + if (avctx->bits_per_sample == 8 && remapped)
> + sizes[i] = flic_encoders[i>>1].encode(avctx, &(s->remap_frame), buf+chunkpos);
> + else
> + sizes[i] = -1;
using INT_MAX should avoid a few checks below
> + } else
> + sizes[i] = flic_encoders[i>>1].encode(avctx, p, buf+chunkpos);
> +// av_log(avctx, AV_LOG_INFO, "Encoder %s (%s) size: %d + palette: %d\n",
> +// flic_encoders[i>>1].name, (i&1) ? "remapped" : "original",
> +// sizes[i], palette_sizes[i&1]);
> + if (sizes[i] != -1)
> + sizes[i] += palette_sizes[i&1];
> + if (sizes[i] != -1 && ((sizes[i] < cur) || cur == -1)) {
> + cur = sizes[i];
> + best = i;
> + }
> + }
[...]
> /*
> * Palette optimisations
> * Copyright (c) 2006 Sakura Industries Ltd.
> * Author: Steven Johnson
the palette stuff belongs into a seperate patch!
[...]
> typedef struct IFR_Match {
> uint32_t CurIdx; /// Index in Current
> uint32_t PrevIdx; /// that matches this Index in previous
>
> void *AsocState; /// Associated state -needed for qsort.
> } IFR_Match;
IIRC doxygen wants ///< for comments placed like that
[...]
> static int ifr_matchcompare(const void* a, const void* b) {
> // We want qsort to sort in ascending order, so comparison test is reversed.
> IFR_Match *f = *(IFR_Match**) a;
> IFR_Match *s = *(IFR_Match**) b;
>
> // note: f->AsocState->ConcordanceMatrix must == s->AsocState->ConcordanceMatrix
> // so we use the same matrix, from f as it should generate more efficient code.
> IFRPaletteOptimiseState *state = (IFRPaletteOptimiseState *)f->AsocState;
>
> if (state->ConcordanceMatrix[f->PrevIdx][f->CurIdx] <
> state->ConcordanceMatrix[s->PrevIdx][s->CurIdx]) {
> return 1;
> } else if (state->ConcordanceMatrix[f->PrevIdx][f->CurIdx] >
> state->ConcordanceMatrix[s->PrevIdx][s->CurIdx]) {
> return -1;
> }
return value0-value1; should do
>
> return 0; // Must be equal.
> }
>
> int ff_ifr_optimise(AVFrame *cur, AVFrame *prev, AVFrame *remap, int width, int height)
> {
very very messy
concordance[256*256][2];
for all pixels
concordance[256*prevpix + pixel][0]++;
for all x
concordance[x][1]= x;
sort concordance[2] elements per [0]
for(x=0; x<256*256; x++){
int match= concordance[x][1];
int m0= match & 255;
int m1= match >> 8;
if(mapped[0][m0]>=0 || mapped[1][m1]>=0)
continue;
mapped[0][m0]= m1;
mapped[1][m1]= m0;
}
PS: the solution is of course not optimal but i see no obvious way how to
find the optimal solution in P time
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070222/b034b24c/attachment.pgp>
More information about the ffmpeg-devel
mailing list