[Ffmpeg-devel] tiff encoder (qualification task for GSoC)

Sat Mar 31 14:42:46 CEST 2007

Hi

On Sat, Mar 31, 2007 at 12:20:46PM +0200, Bartlomiej Wolowiec wrote:
> On Friday 30 March 2007 23:09, Michael Niedermayer wrote:
[...]
> > > +
> > > +/**
> > > + * Put n values to buffer
> > > + *
> > > + * @param p Pointer to pointer to output buffer
> > > + * @param n Number of values
> > > + * @param val Pointer to values
> > > + * @param type type of values
> > > + */
> > > +static void tnput(uint8_t ** p, int n, uint8_t * val, enum TiffTypes
> > > type) +{
> > > +    int i;
> > > +    for (i = 0; i < n; i++) {
> > > +        switch (type) {
> > > +        case TIFF_STRING:
> > > +        case TIFF_BYTE:
> > > +            bytestream_put_byte(p, *val);
> > > +            val++;
> > > +            break;
> > > +        case TIFF_SHORT:
> > > +            bytestream_put_le16(p, *((uint16_t *) val));
> > > +            val += 2;
> > > +            break;
> > > +        case TIFF_LONG:
> > > +            bytestream_put_le32(p, *((uint32_t *) val));
> > > +            val += 4;
> > > +            break;
> > > +        case TIFF_RATIONAL:
> > > +            bytestream_put_le32(p, *((uint32_t *) val));
> > > +            val += 4;
> > > +            bytestream_put_le32(p, *((uint32_t *) val));
> > > +            val += 4;
> > > +            break;
> > > +        default:
> > > +            assert(0 && "not implemented");
> > > +        }
> > > +    }
> > > +}
> >
> > this function can easily be implemented in a quarter of the code
> 
> hmm.. I remove val+=, but I don't know how can I simplify it more.

something like:

static void tnput(uint8_t ** p, int n, uint8_t * val, enum TiffTypes type, int flip){
    int i;
#ifdef WORDS_BIGENDIAN
    flip ^= ((int[]){0,0,0,1,3,3})[type];
#endif
    for(i=0; i < n*type_sizes2[type]; i++)
        *(*p)++ = val[i ^ flip];
}

> 
> >
> >
> > [...]
> >
> > > +            //reverse (LE)
> > > +            for (i = 0; i < entry->count / 2; i++) {
> > > +                for (j = 0; j < ts; j++) {
> > > +                    tmp = *(ptr + i * ts + j);
> > > +                    *(ptr + i * ts + j) =
> > > +                        *(ptr + ((entry->count - i - 1) * ts) + j);
> > > +                    *(ptr + ((entry->count - i - 1) * ts) + j) = tmp;
> > > +                }
> > > +            }
> > > +            tnput(buf, entry->count, entry->val, entry->type);
> >
> > you reshuffle the data twice here
> 
> I don't exactly understand what do you mean. If data size is less than four 
> bytes, the data is written in offset field, but firstly I should reverse it.

its odd that its not stored in the same order as in the >4 case, anyway it can
be done in a single shuffle operation with my suggested tnput()

> 
> > [...]
> >
> > > +    case TIFF_PACKBITS:
> > > +
> > > +        from = src;
> > > +        val = *(src++);
> > > +        while (src < end) {
> > > +            // max 128 bytes in direct copy
> > > +            if (src - from >= 127) {
> > > +                *(dst++) = src - from - 1;
> > > +                memcpy(dst, from, src - from);
> > > +                dst += src - from;
> > > +                from = src;
> > > +            }
> > > +            if (val == *src) {
> > > +                // how long is run ?
> > > +                run_end = src;
> > > +                while (run_end < end && *run_end == val
> > > +                       && run_end - from < 128)
> > > +                    run_end++;
> > > +
> > > +                if (run_end - src == 1 && src - from >= 2) {
> > > +                    src++;
> > > +                } else {
> > > +
> > > +                    // write bytes from buffer
> > > +                    if (src - from >= 2) {
> > > +                        *(dst++) = src - from - 2;
> > > +                        memcpy(dst, from, src - from - 1);
> > > +                        dst += src - from - 1;
> > > +                        from = src - 1;
> > > +                    }
> > > +
> > > +                    src = run_end;
> > > +
> > > +                    *(dst++) = from - src + 1;
> > > +                    *(dst++) = val;
> > > +                    from = src;
> > > +                }
> > > +            }
> > > +            val = *src;
> > > +            src++;
> > > +        }
> > > +        // write buffer
> > > +        src = FFMIN(src, end);
> > > +        if (src - from > 0) {
> > > +            *(dst++) = src - from - 1;
> > > +            for (; from != src; from++) {
> > > +                *(dst++) = *from;
> > > +            }
> > > +        }
> >
> > this is duplicate relative to targaenc.c
> 
> But targa_encode_rle use a little different coding. It takes the bpp argument 
> and my tests showed that it is slower for bpp=1...

well, merge both codes into a generic rle encoder in rle.c/h and use that
there are many codecs which use rle duplicating it for each is not ok
and dont hesitate to optimize rle.c/h if you want though iam not insisting
on this, what i insist on though is that code duplication is not ok

> 
> > [...]
> >
> > > +    if (buf_size <
> > > +        (((s->height * s->width * s->bpp >> 3) * 129) >> 7) + 128 +
> > > +        TIFF_MAX_ENTRY * 12 + strips * 8) {
> > > +        av_log(s->avctx, AV_LOG_ERROR, "Buffer is too small\n");
> > > +        return -1;
> > > +    }
> >
> > this check is buggy
> 
> Ok, I added check_size to my code.

which is more complex and too looks buggy though i didnt check it
carefull so i could be wrong ...

> 
> > > +        for (i = 0; i < strips; i++) {
> > > +            strip_offsets[i] = s->offset + ptr - buf;
> > > +            zn = 0;
> > > +            for (j = 0; j < s->rps && i * s->rps + j < s->height; j++) {
> > > +                memcpy(zbuf + j * bpr,
> > > +                       p->data[0] + (i * s->rps + j) * p->linesize[0],
> > > +                       bpr);
> > > +                zn += bpr;
> > > +            }
> > > +            if ((n = encode_strip(s, zbuf, ptr, zn, s->compr)) < 0) {
> > > +                av_log(s->avctx, AV_LOG_ERROR, "Encode strip failed\n");
> > > +                av_free(strip_sizes);
> > > +                av_free(strip_offsets);
> > > +                av_free(zbuf);
> > > +                return -1;
> > > +            }
> > > +            ptr += n;
> > > +            strip_sizes[i] = ptr - buf - strip_offsets[i] + s->offset;
> > > +        }
> >
> > whats the strip loop good for? deflate is encoded as single strip
> >
> > also the av_free(strip*) return -1 occurs 3 times in the code
> 
> I moved strip* to TiffContext and now av_free is in tiff_encoder_end.

a simple goto fail:

fail:
av_free(strip_sizes);
av_free(strip_offsets);
return -1;

seems simpler to me

[...]
> +    int rps;                    ///< row per strip
> +    uint8_t *entries;                   ///< entires in header
> +    int num_entries;            ///< number of entires
> +        uint8_t **buf;                          ///< actual position in buffer
> +        uint8_t *buf_start;                     ///< pointer to first byte in buffer
> +        int buf_size;                           ///< buffer size
> +        uint32_t *strip_offsets;        ///< strips offsets in file
> +        uint32_t *strip_sizes;          ///< strips sizes

indention looks wrong
also the ///< are not aligned nicely

[...]
> +/**
> + * Put entry to stream
> + *
> + * @param buf Pointer to pointer to output buffer
> + * @param tag Tag that identifies the entry
> + * @param type Entry type
> + * @param count The number of values
> + * @param ptr_val Pointer to values
> + * @return 0 - all data writed, 1 - need write set offset
> + */
> +static int put_tag(uint8_t ** buf,
> +                   enum TiffTags tag, enum TiffTypes type, int count,
> +                   void *ptr_val)
> +{
> +    uint8_t *buf_tmp;
> +
> +    bytestream_put_le16(buf, tag);
> +    bytestream_put_le16(buf, type);
> +    bytestream_put_le32(buf, count);
> +
> +    buf_tmp = *buf;

> +    if ((type == TIFF_LONG && count == 1)
> +        || (type == TIFF_SHORT && count <= 2)
> +        || (type == TIFF_BYTE && count <= 4)) {

these 3 lines can be simplified

> +        int i, j, ts;
> +        uint8_t *ptr = ptr_val;
> +        uint8_t tmp;
> +        ts = type_sizes[type];
> +        //reverse (LE)
> +        for (i = 0; i < count / 2; i++) {
> +            for (j = 0; j < ts; j++) {
> +                tmp = *(ptr + i * ts + j);
> +                *(ptr + i * ts + j) = *(ptr + ((count - i - 1) * ts) + j);
> +                *(ptr + ((count - i - 1) * ts) + j) = tmp;
> +            }
> +        }
> +        tnput(buf, count, ptr_val, type);
> +        *buf = buf_tmp + 4;
> +        return 0;
> +    } else {
> +        bytestream_put_le32(buf, 0);

> +        *buf = buf_tmp + 4;

whats this line good for?

> +        return 1;
> +    }
> +}
> +
> +/**
> + * Add entry to directory in tiff header.
> + * @param s Tiff context
> + * @param tag Tag that identifies the entry
> + * @param type Entry type
> + * @param count The number of values
> + * @param ptr_val Pointer to values
> + */
> +static void add_entry(TiffEncoderContext * s,
> +                      enum TiffTags tag, enum TiffTypes type, int count,
> +                      void *ptr_val)
> +{
> +    uint8_t *entries_ptr = s->entries + 12 * s->num_entries;
> +
> +    if (s->num_entries == TIFF_MAX_ENTRY) {
> +        av_log(s->avctx, AV_LOG_ERROR, "Too many entries in tiff dir!!\n");
> +    }

as you dont prevent writing in this (impossible) case you could as well
remove this check or change it to a assert()

> +
> +
> +    if (check_size(s, 12))
> +        return;
> +
> +    if (put_tag(&entries_ptr, tag, type, count, ptr_val)) {
> +        if (check_size(s, count * type_sizes2[type]))
> +            return;
> +        entries_ptr -= 4;
> +        bytestream_put_le32(&entries_ptr, *s->buf - s->buf_start);      // write offset to dir
> +        tnput(s->buf, count, ptr_val, type);
> +    }

this should be simpler if put_tag() where not a seperate function

[...]

> +    default:
> +        av_log(s->avctx, AV_LOG_ERROR,
> +               "Chosen compression is not supported\n");
> +        return -1;

how can this be reached?

> +    }
> +}
> +
> +/**
> + * Write yuv line in tiffs order.
> + *
> + * @param s Tiff context
> + * @param line Line buffer
> + * @param y Line position
> + * @pram yuv_y Number of bytes of luma corresponding to chrominance
> + */
> +
> +static void make_yuv_line(TiffEncoderContext * s, uint8_t * line, int y,
> +                          int yuv_y)
> +{
> +    uint8_t *line_ptr = line;
> +    AVFrame *const p = (AVFrame *) & s->picture;
> +    int j, k;
> +
> +    for (j = 0; j < s->width / yuv_y; j++) {
> +        memcpy(line_ptr, p->data[0] + y * p->linesize[0] + j * yuv_y,
> +               yuv_y);
> +        line_ptr += yuv_y;
> +        *line_ptr++ = p->data[1][y * p->linesize[1] + j];
> +        *line_ptr++ = p->data[2][y * p->linesize[2] + j];
> +
> +    }
> +
> +    // write end bytes
> +    for (k = 0; k < yuv_y; k++) {
> +        if (j * yuv_y + k < s->width)
> +            *line_ptr++ = p->data[0][y * p->linesize[0] + j * yuv_y + k];
> +        else
> +            *line_ptr++ = 0;
> +    }
> +    *line_ptr++ = p->data[1][y * p->linesize[1] + j - 1];
> +    *line_ptr = p->data[2][y * p->linesize[2] + j - 1];

thinking about this a little, this looks wrong, its converting planar
yuv to packed, you rather should support the packed format used in tiff
directly and not support the planar ones, and if ffmpeg happens not to
support the exact packed format (i didnt check) then the correct action
would be either add support in a seperate patch or drop yuv in tiff
support for now

[...]
> +    memcpy(s->bpp_tab, (uint16_t[]) {8, 8, 8, 8}, 8);

now as bpp_tab is always constant it doesnt need to be in the context anymore
unless its clear that it will have to be changed in the future for other
pixel formats? (which i dont know so maybe its better like it is)

[...]
> +    if (!isYUV(avctx->pix_fmt)) {
> +        s->bpp_tab_size = (s->bpp >> 3);
> +    }
> +
> +    if (s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE)
> +        //best choose for DEFLATE
> +        s->rps = s->height;
> +    else
> +        s->rps = FFMAX(8192 / (((s->width * s->bpp) >> 3) + 1), 1);     // suggest size of strip
> +
> +
> +    strips = (s->height - 1) / s->rps + 1;
> +
> +    if (check_size(s, 8))
> +        return -1;
> +
> +    // write header
> +    bytestream_put_le16(&ptr, 0x4949);
> +    bytestream_put_le16(&ptr, 42);
> +
> +    offset = ptr;
> +    bytestream_put_le32(&ptr, 0);
> +
> +    s->strip_sizes = av_mallocz(4 * strips);
> +    s->strip_offsets = av_mallocz(4 * strips);
> +
> +    b_width = (isYUV(avctx->pix_fmt)) ? // bytes per row
> +        ((s->width + yuv_y - 1) / yuv_y) * (yuv_y + 2)
> +        : ((s->width * s->bpp + 7) >> 3);

if the variable contains the bytes per row why not call it bytes_per_row?

> +
> +    if (isYUV(avctx->pix_fmt)) {
> +        line = av_malloc(b_width + yuv_y + 2);
> +    }

i see several isYUV checks, these can be merged

> +#ifdef CONFIG_ZLIB
> +    if (s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE) {
> +        uint8_t *zbuf;
> +        int zlen, zn;
> +        int j;

> +        int bpr = b_width;

why not use b_width directly?

> +
> +        zlen = bpr * s->rps;
> +        zbuf = av_malloc(zlen);
> +        s->strip_offsets[0] = ptr - buf;
> +        zn = 0;
> +        for (j = 0; j < s->rps; j++) {
> +            if (isYUV(avctx->pix_fmt)) {
> +                make_yuv_line(s, line, j, yuv_y);
> +            } else {
> +                line = p->data[0] + j * p->linesize[0];
> +            }
> +
> +            memcpy(zbuf + j * bpr, line, bpr);
> +            zn += bpr;
> +        }
> +        if ((n = encode_strip(s, zbuf, ptr, zn, s->compr)) < 0) {
> +            av_log(s->avctx, AV_LOG_ERROR, "Encode strip failed\n");
> +            av_free(zbuf);
> +            if (isYUV(avctx->pix_fmt)) {
> +                av_free(line);
> +            }
> +            return -1;
> +        }
> +        ptr += n;
> +        s->strip_sizes[0] = ptr - buf - s->strip_offsets[0];
> +        av_free(zbuf);
> +    } else
> +#endif
> +    {
> +        start_strip = ptr;
> +
> +
> +        for (i = 0; i < s->height; i++) {
> +            if (s->strip_sizes[i / s->rps] == 0) {
> +                s->strip_offsets[i / s->rps] = ptr - buf;
> +                start_strip = ptr;
> +            }
> +            if (isYUV(avctx->pix_fmt)) {
> +                make_yuv_line(s, line, i, yuv_y);
> +            } else {
> +                line = p->data[0] + i * p->linesize[0];
> +            }
> +
> +            if ((n = encode_strip(s, line, ptr, b_width, s->compr)) < 0) {
> +                av_log(s->avctx, AV_LOG_ERROR, "Encode strip failed\n");
> +                if (isYUV(avctx->pix_fmt)) {
> +                    av_free(line);
> +                }
> +                return -1;
> +            }
> +            ptr += n;

the code here and in the deflate case above looks somewhat similar, cant
they be merged?

[...]
> +        static const uint32_t yuvt1[6] = { 299, 1000, 587, 1000, 114, 1000 };   //CCIR recomendation 600-1 (used in ffmpeg?)
> +        uint16_t yuv_subsampling[2] = { yuv_v, yuv_h };
> +
> +        add_entry(s, TIFF_YCBCR_COEFFICIENTS, TIFF_RATIONAL, 3,
> +                  (uint32_t *) yuvt1);

whats the cast good for?

[...]
> +    if (avctx->pix_fmt == PIX_FMT_PAL8) {
> +        int rgb;
> +        uint16_t pal[256 * 3];
> +        for (i = 0; i < 256; i++) {
> +            rgb = *(int *) (p->data[1] + i * 4);

shouldnt that be uint32_t ?

and
for (i = 0; i < 256; i++) {
    int rgb= ...
is a little simpler IMHO

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In a rich man's house there is no place to spit but his face.
-- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070331/858d4840/attachment.pgp>