[Ffmpeg-devel] [RFC] [PATCH] FLC/FLX/DTA encoder
Alex Beregszaszi
alex
Tue Feb 27 01:48:05 CET 2007
Hi,
> > +static int encode_black(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> > + int i;
> > +
> > + // FIXME: make it faster
> > + for (i = 0; i < avctx->height*p->linesize[0]; i++)
> > + if (p->data[0][i] != 0)
> > + break;
> > +
> > + if (i != avctx->height*p->linesize[0])
> > + return -1; // not full black
>
> for (i = 0; i < avctx->height*p->linesize[0]; i++)
> if (p->data[0][i])
> return -1; // not full black
ok
> > +static int encode_copy(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> > + int pos = 6, i;
> > + const int stride = avctx->width*((avctx->bits_per_sample+7)/8);
>
> as avctx->width*((avctx->bits_per_sample+7)/8) occurs several times maybe a
> bytes_per_line in FlicEncodeContext would be a good idea?
I may add it.
> [...]
> > +static int encode_brun(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> > + int lines, pos = 6, y_ptr = 0, pixel_ptr;
> > + unsigned char *pixels = p->data[0];
> > + const int stride = avctx->width*((avctx->bits_per_sample+7)/8);
> > +// const int stride = avctx->width;
> > +
> > + AV_WL16(buf+4, FLI_BRUN); // type
> > +
> > + for (lines = 0; lines < avctx->height; lines++) {
> > + int nppos = pos, npackets = 0;
> > + int pixel_countdown = stride;
> > + int copy_pos = pos;
> > + unsigned int run_count = 0;
> > +
> > + pixel_ptr = y_ptr;
> > +
> > + pos++; // number of packets, patched later
> > +
> > + while (pixel_countdown > 0) {
> > + // no point checking for a run if only 1 byte left
> > + if ((pixel_countdown > 2) &&
> > + (pixels[pixel_ptr] == pixels[pixel_ptr+1]) &&
> > + (pixels[pixel_ptr] == pixels[pixel_ptr+2])) {
> > + // 3 bytes the same in a row, so start counting the run
> > + // 3 is the threshold, because a run of two forces and extra byte
> > +
> > + // flush old run
> > + if (run_count != 0) {
> > + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> > + run_count = 0;
> > + }
> > +
> > + run_count = 3;
> > + while ((pixels[pixel_ptr] == pixels[pixel_ptr+run_count]) &&
> > + (run_count < pixel_countdown) &&
> > + (run_count < 127)) {
> > + run_count++;
> > + }
> > + npackets++;
> > + AV_WL8(buf+pos, (signed char)(run_count));
>
> what does the signed char cast do here?
The field must be signed, as negative run_count means copy, while
positive means run. Or vice versa. Better if I add AV_WL8S ? I dont
think.
> > + pos++;
> > + AV_WL8(buf+pos, pixels[pixel_ptr]);
> > + pos++;
> > + pixel_ptr += run_count;
> > + pixel_countdown -= run_count;
> > + run_count = 0;
> > + } else {
> > + // 3 bytes in a row not able to be RLE'd, so emit the copy
> > +
> > + // flush old run
> > + if (run_count == 128) {
> > + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
>
> AV_WL8(buf+copy_pos, 128);
-128
> > + run_count = 0;
> > + }
> > +
> > + // start of a copy run
> > + if (run_count == 0)
> > + {
> > + copy_pos = pos; // written later
> > + pos++;
> > + npackets++;
> > + }
> > + run_count++;
> > + AV_WL8(buf+pos, pixels[pixel_ptr]);
> > + pos++;
> > + pixel_ptr++;
> > + pixel_countdown--;
> > + }
> > + }
> > +
> > + // flush old run
> > + if (run_count != 0) {
> > + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> > + run_count = 0;
> > + }
> > +
> > + // this frame would be invalid this way, so just report failed
> > + // FIXME: try solvin it in a better way
> > + if (npackets > 255)
> > + return -1;
> > +
> > + AV_WL8(buf+nppos, npackets);
>
> what about
>
> while (pixel < end) {
> int run=1;
>
> while(pixel+1 < end && run < 127 && pixel[0] == pixel[1]){
> run++;
> pixel++;
> }
> if(run > limit){
> *buf++= run;
> *buf++= *pixel++;
> }else{
> pixel -= run - 2;
> run=1;
> while(pixel+1 < end && run < 127 && pixel[0] != pixel[1]){
> run++;
> pixel++;
> }
> *buf++= - run;
> memcpy(buf, pixel-run, run);
> buf+= run;
> }
> npackets++;
> if(npackets>255){
> limit++;
> goto retry_from_begin;
> }
> }
I was not using gotos.
> > +static int encode_dta_brun(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> > + int lines, pos = 6, y_ptr = 0, pixel_ptr;
> > + unsigned short *pixels = (unsigned short*)p->data[0];
> > +
> > + // FIXME: fix this to handle 24bit input too
> > + if (avctx->codec_id != CODEC_ID_FLIDTA ||
> > + avctx->bits_per_sample == 8 ||
> > + avctx->bits_per_sample == 24)
> > + return -1;
> > +
> > + AV_WL16(buf+4, FLI_DTA_BRUN); // type
> > +
> > + for (lines = 0; lines < avctx->height; lines++) {
> > + int nppos = pos, npackets = 0;
> > + int pixel_countdown = avctx->width;
> > + int copy_pos = pos;
> > + unsigned int run_count = 0;
> > +
> > + pixel_ptr = y_ptr;
> > +
> > + pos++; // number of packets, patched later
> > +
> > + while (pixel_countdown > 0) {
> > + // no point checking for a run if only 1 byte left
> > + if ((pixel_countdown > 1) &&
> > + (pixels[pixel_ptr] == pixels[pixel_ptr+1])) {
> > + // 2 bytes the same in a row, so start counting the run
> > +
> > + // flush old run
> > + if (run_count != 0) {
> > + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> > + run_count = 0;
> > + }
> > +
> > + run_count = 2;
> > + while ((pixels[pixel_ptr] == pixels[pixel_ptr+run_count]) &&
> > + (run_count < pixel_countdown) &&
> > + (run_count < 127)) {
> > + run_count++;
> > + }
> > + npackets++;
> > + AV_WL8(buf+pos, (signed char)(run_count));
> > + pos++;
> > + AV_WL16(buf+pos, pixels[pixel_ptr]);
> > + pos += 2;
> > + pixel_ptr += run_count;
> > + pixel_countdown -= run_count;
> > + run_count = 0;
> > + } else {
> > + // 2 bytes in a row not able to be RLE'd, so emit the copy
> > +
> > + // flush old run
> > + if (run_count == 128) {
> > + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> > + run_count = 0;
> > + }
> > +
> > + // start of a copy run
> > + if (run_count == 0)
> > + {
> > + copy_pos = pos; // written later
> > + pos++;
> > + npackets++;
> > + }
> > + run_count++;
> > + AV_WL16(buf+pos, pixels[pixel_ptr]);
> > + pos += 2;
> > + pixel_ptr++;
> > + pixel_countdown--;
> > + }
> > + }
> > +
> > + // flush old run
> > + if (run_count != 0) {
> > + AV_WL8(buf+copy_pos, (signed char)(0-run_count));
> > + run_count = 0;
> > + }
> > +
> > + // this frame would be invalid this way, so just report failed
> > + // FIXME: try solvin it in a better way
> > + if (npackets > 255)
> > + return -1;
> > +
> > + AV_WL8(buf+nppos, npackets);
> > +
> > + y_ptr += p->linesize[0]/((avctx->bits_per_sample+7)/8);
> > + }
> > +
> > + // padding
> > + if ((pos % 2) == 1) {
> > + AV_WL8(buf+pos, 0);
> > + pos++;
> > + }
> > +
> > + AV_WL32(buf, pos); // size field
> > +
> > + return pos;
> > +}
>
> very similar to encode_brun, this should be merged
With two exceptions: the run_count field is exchanged (- means X, +
means Y) and it works on pixels and not bytes. Its splitted for speed
reason.
> > +static int encode_lc(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
>
> this too and several other functions are very similar to the previous rle
> encoders and must be merged
Similar to encode_brun in principle, as all the RLE is based on the same
principle. I may add put_run() and put_copy() if that factorizing
satisfies you.
> > +// 8 bit FLC DELTA
> > +static int encode_delta8(AVCodecContext *avctx, AVFrame *p, unsigned char *buf) {
> [...]
> > + while (run_count > 254) {
> > + npackets++;
> > + AV_WL8(buf+pos, 254); // skip
> > + pos++;
> > + AV_WL8(buf+pos, 0x00);
> > + pos++;
>
> these really should be *bug++= X or bytestream_put_be16() could be used both
> should be more readable
I thought about moving to bytestream_ already, but the time I wrote it
there was no such layer yet.
> also the function is VERY messy and full of duplicated code
Yep, this function should be cleaned up in a way.
> > + // FIXME: this is rather unoptimal, but simple
> > + for (i = 0; flic_encoders[i>>1].encode != NULL; i++) {
> > + if ((avctx->codec_id == CODEC_ID_FLIC && !flic_encoders[i>>1].is_flc) ||
> > + (avctx->codec_id == CODEC_ID_FLIX && !flic_encoders[i>>1].is_flx) ||
> > + (avctx->codec_id == CODEC_ID_FLIDTA && !flic_encoders[i>>1].is_dta))
> > + continue;
> > + if (i & 1) {
> > + if (avctx->bits_per_sample == 8 && remapped)
> > + sizes[i] = flic_encoders[i>>1].encode(avctx, &(s->remap_frame), buf+chunkpos);
> > + else
> > + sizes[i] = -1;
>
> using INT_MAX should avoid a few checks below
You prefer it that way?
> > /*
> > * Palette optimisations
> > * Copyright (c) 2006 Sakura Industries Ltd.
> > * Author: Steven Johnson
>
> the palette stuff belongs into a seperate patch!
Anyway, would you accept this (palette.c) theoretically? If yes, where
should it reside? swscaler? libavcodec?
> [...]
> > typedef struct IFR_Match {
> > uint32_t CurIdx; /// Index in Current
> > uint32_t PrevIdx; /// that matches this Index in previous
> >
> > void *AsocState; /// Associated state -needed for qsort.
> > } IFR_Match;
>
> IIRC doxygen wants ///< for comments placed like that
Ok
> [...]
> > static int ifr_matchcompare(const void* a, const void* b) {
> > // We want qsort to sort in ascending order, so comparison test is reversed.
> > IFR_Match *f = *(IFR_Match**) a;
> > IFR_Match *s = *(IFR_Match**) b;
> >
> > // note: f->AsocState->ConcordanceMatrix must == s->AsocState->ConcordanceMatrix
> > // so we use the same matrix, from f as it should generate more efficient code.
> > IFRPaletteOptimiseState *state = (IFRPaletteOptimiseState *)f->AsocState;
> >
> > if (state->ConcordanceMatrix[f->PrevIdx][f->CurIdx] <
> > state->ConcordanceMatrix[s->PrevIdx][s->CurIdx]) {
> > return 1;
> > } else if (state->ConcordanceMatrix[f->PrevIdx][f->CurIdx] >
> > state->ConcordanceMatrix[s->PrevIdx][s->CurIdx]) {
> > return -1;
> > }
>
> return value0-value1; should do
done
> > return 0; // Must be equal.
> > }
> >
> > int ff_ifr_optimise(AVFrame *cur, AVFrame *prev, AVFrame *remap, int width, int height)
> > {
>
> very very messy
>
> concordance[256*256][2];
> for all pixels
> concordance[256*prevpix + pixel][0]++;
> for all x
> concordance[x][1]= x;
>
> sort concordance[2] elements per [0]
>
> for(x=0; x<256*256; x++){
> int match= concordance[x][1];
> int m0= match & 255;
> int m1= match >> 8;
> if(mapped[0][m0]>=0 || mapped[1][m1]>=0)
> continue;
> mapped[0][m0]= m1;
> mapped[1][m1]= m0;
> }
>
> PS: the solution is of course not optimal but i see no obvious way how to
> find the optimal solution in P time
I never reviewed that code and I am not familiar with it, I would ask
the author.
Steve, could you comment on this?
--
Alex Beregszaszi
More information about the ffmpeg-devel
mailing list