[FFmpeg-devel] [PATCH 1/2] apng: Support inter-frame compression
Donny Yang
work at kota.moe
Mon Jul 13 10:17:39 CEST 2015
The current algorithm is just "try all the combinations, and pick the best".
It's not very fast either, probably due to a lot of copying, but will do for
an initial implementation.
---
libavcodec/pngenc.c | 423 +++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 387 insertions(+), 36 deletions(-)
diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
index 7a9d0b0..c78c8dc 100644
--- a/libavcodec/pngenc.c
+++ b/libavcodec/pngenc.c
@@ -36,6 +36,14 @@
#define IOBUF_SIZE 4096
+typedef struct APNGFctlChunk {
+ uint32_t sequence_number;
+ uint32_t width, height;
+ uint32_t x_offset, y_offset;
+ uint16_t delay_num, delay_den;
+ uint8_t dispose_op, blend_op;
+} APNGFctlChunk;
+
typedef struct PNGEncContext {
AVClass *class;
HuffYUVEncDSPContext hdsp;
@@ -59,6 +67,12 @@ typedef struct PNGEncContext {
// APNG
uint32_t palette_checksum; // Used to ensure a single unique palette
uint32_t sequence_number;
+
+ AVFrame *prev_frame;
+ AVFrame *last_frame;
+ APNGFctlChunk last_frame_fctl;
+ uint8_t *last_frame_packet;
+ size_t last_frame_packet_size;
} PNGEncContext;
static void png_get_interlaced_row(uint8_t *dst, int row_size,
@@ -403,7 +417,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
uint8_t *progressive_buf = NULL;
uint8_t *top_buf = NULL;
- row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
+ row_size = (pict->width * s->bits_per_pixel + 7) >> 3;
crow_base = av_malloc((row_size + 32) << (s->filter_type == PNG_FILTER_VALUE_MIXED));
if (!crow_base) {
@@ -430,16 +444,16 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
for (pass = 0; pass < NB_PASSES; pass++) {
/* NOTE: a pass is completely omitted if no pixels would be
* output */
- pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, avctx->width);
+ pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, pict->width);
if (pass_row_size > 0) {
top = NULL;
- for (y = 0; y < avctx->height; y++)
+ for (y = 0; y < pict->height; y++)
if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
ptr = p->data[0] + y * p->linesize[0];
FFSWAP(uint8_t *, progressive_buf, top_buf);
png_get_interlaced_row(progressive_buf, pass_row_size,
s->bits_per_pixel, pass,
- ptr, avctx->width);
+ ptr, pict->width);
crow = png_choose_filter(s, crow_buf, progressive_buf,
top, pass_row_size, s->bits_per_pixel >> 3);
png_write_row(avctx, crow, pass_row_size + 1);
@@ -449,7 +463,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
}
} else {
top = NULL;
- for (y = 0; y < avctx->height; y++) {
+ for (y = 0; y < pict->height; y++) {
ptr = p->data[0] + y * p->linesize[0];
crow = png_choose_filter(s, crow_buf, ptr, top,
row_size, s->bits_per_pixel >> 3);
@@ -530,6 +544,275 @@ static int encode_png(AVCodecContext *avctx, AVPacket *pkt,
return 0;
}
+static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input,
+ APNGFctlChunk *fctl_chunk, uint8_t bpp)
+{
+ // output: background, input: foreground
+ // output the image such that when blended with the background, will produce the foreground
+
+ uint32_t x, y;
+ uint32_t leftmost_x = input->width;
+ uint32_t rightmost_x = 0;
+ uint32_t topmost_y = input->height;
+ uint32_t bottommost_y = 0;
+ const uint8_t *input_data = input->data[0];
+ uint8_t *output_data = output->data[0];
+ size_t input_linesize = input->linesize[0];
+ size_t output_linesize;
+
+ // Find bounding box of changes
+ for (y = 0; y < input->height; ++y) {
+ for (x = 0; x < input->width; ++x) {
+ if (!memcmp(input_data + bpp * x, output_data + bpp * x, bpp))
+ continue;
+
+ if (x < leftmost_x)
+ leftmost_x = x;
+ if (x >= rightmost_x)
+ rightmost_x = x + 1;
+ if (y < topmost_y)
+ topmost_y = y;
+ if (y >= bottommost_y)
+ bottommost_y = y + 1;
+ }
+
+ input_data += input_linesize;
+ output_data += input_linesize;
+ }
+
+ if (leftmost_x == input->width && rightmost_x == 0) {
+ // Empty frame
+ // APNG does not support empty frames, so we make it a 1x1 frame
+ leftmost_x = topmost_y = 0;
+ rightmost_x = bottommost_y = 1;
+ }
+
+ output_linesize = FFALIGN(output->width * bpp, 32);
+
+ // Do actual inverse blending
+ if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) {
+ output_data = output->data[0];
+ for (y = topmost_y; y < bottommost_y; ++y) {
+ memcpy(output_data,
+ input->data[0] + input_linesize * y + bpp * leftmost_x,
+ bpp * (rightmost_x - leftmost_x));
+ output_data += output_linesize;
+ }
+ } else { // APNG_BLEND_OP_OVER
+ size_t transparent_palette_index;
+ uint32_t *palette;
+
+ switch (input->format) {
+ case AV_PIX_FMT_RGBA64BE:
+ case AV_PIX_FMT_YA16BE:
+ case AV_PIX_FMT_RGBA:
+ case AV_PIX_FMT_GRAY8A:
+ break;
+
+ case AV_PIX_FMT_PAL8:
+ palette = (uint32_t*)input->data[1];
+ for (transparent_palette_index = 0; transparent_palette_index < 256; ++transparent_palette_index)
+ if (palette[transparent_palette_index] >> 24 == 0)
+ break;
+ break;
+
+ default:
+ // No alpha, so blending not possible
+ return -1;
+ }
+
+ for (y = topmost_y; y < bottommost_y; ++y) {
+ uint8_t *foreground = input->data[0] + input_linesize * y + bpp * leftmost_x;
+ uint8_t *background = output->data[0] + input_linesize * y + bpp * leftmost_x;
+ output_data = output->data[0] + output_linesize * (y - topmost_y);
+ for (x = leftmost_x; x < rightmost_x; ++x, foreground += bpp, background += bpp, output_data += bpp) {
+ if (!memcmp(foreground, background, bpp)) {
+ if (input->format == AV_PIX_FMT_PAL8) {
+ if (transparent_palette_index == 256) {
+ // Need fully transparent colour, but none exists
+ return -1;
+ }
+
+ *output_data = transparent_palette_index;
+ } else {
+ memset(output_data, 0, bpp);
+ }
+ continue;
+ }
+
+ // Check for special alpha values, since full inverse
+ // alpha-on-alpha blending is rarely possible, and when
+ // possible, doesn't compress much better than
+ // APNG_BLEND_OP_SOURCE blending
+ switch (input->format) {
+ case AV_PIX_FMT_RGBA64BE:
+ if (((uint16_t*)foreground)[3] == 0xffff ||
+ ((uint16_t*)background)[3] == 0)
+ break;
+ return -1;
+
+ case AV_PIX_FMT_YA16BE:
+ if (((uint16_t*)foreground)[1] == 0xffff ||
+ ((uint16_t*)background)[1] == 0)
+ break;
+ return -1;
+
+ case AV_PIX_FMT_RGBA:
+ if (foreground[3] == 0xff || background[3] == 0)
+ break;
+ return -1;
+
+ case AV_PIX_FMT_GRAY8A:
+ if (foreground[1] == 0xff || background[1] == 0)
+ break;
+ return -1;
+
+ case AV_PIX_FMT_PAL8:
+ if (palette[*foreground] >> 24 == 0xff ||
+ palette[*background] >> 24 == 0)
+ break;
+ return -1;
+ }
+
+ memmove(output_data, foreground, bpp);
+ }
+ }
+ }
+
+ output->width = rightmost_x - leftmost_x;
+ output->height = bottommost_y - topmost_y;
+ output->linesize[0] = output_linesize;
+ fctl_chunk->width = output->width;
+ fctl_chunk->height = output->height;
+ fctl_chunk->x_offset = leftmost_x;
+ fctl_chunk->y_offset = topmost_y;
+
+ return 0;
+}
+
+static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict,
+ APNGFctlChunk *best_fctl_chunk, APNGFctlChunk *best_last_fctl_chunk)
+{
+ PNGEncContext *s = avctx->priv_data;
+ int ret;
+ uint32_t y;
+ AVFrame* diffFrame;
+ uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
+ uint8_t *original_bytestream, *original_bytestream_end;
+ uint8_t *temp_bytestream = 0, *temp_bytestream_end;
+ uint32_t best_sequence_number;
+ uint8_t *best_bytestream;
+ size_t best_bytestream_size = SIZE_MAX;
+ APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk;
+ APNGFctlChunk fctl_chunk = *best_fctl_chunk;
+
+ if (avctx->frame_number == 0) {
+ best_fctl_chunk->width = pict->width;
+ best_fctl_chunk->height = pict->height;
+ best_fctl_chunk->x_offset = 0;
+ best_fctl_chunk->y_offset = 0;
+ best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE;
+ return encode_frame(avctx, pict);
+ }
+
+ diffFrame = av_frame_alloc();
+ if (!diffFrame)
+ return AVERROR(ENOMEM);
+
+ diffFrame->format = pict->format;
+ diffFrame->width = pict->width;
+ diffFrame->height = pict->height;
+ if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0)
+ goto fail;
+
+ original_bytestream = s->bytestream;
+ original_bytestream_end = s->bytestream_end;
+
+ temp_bytestream = av_malloc(original_bytestream_end - original_bytestream);
+ temp_bytestream_end = temp_bytestream + (original_bytestream_end - original_bytestream);
+ if (!temp_bytestream) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3; ++last_fctl_chunk.dispose_op) {
+ // 0: APNG_DISPOSE_OP_NONE
+ // 1: APNG_DISPOSE_OP_BACKGROUND
+ // 2: APNG_DISPOSE_OP_PREVIOUS
+
+ for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2; ++fctl_chunk.blend_op) {
+ // 0: APNG_BLEND_OP_SOURCE
+ // 1: APNG_BLEND_OP_OVER
+
+ uint32_t original_sequence_number = s->sequence_number, sequence_number;
+ uint8_t *bytestream_start = s->bytestream;
+ size_t bytestream_size;
+
+ // Do disposal
+ if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+ memcpy(diffFrame->data[0], s->last_frame->data[0],
+ s->last_frame->linesize[0] * s->last_frame->height);
+
+ if (last_fctl_chunk.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
+ for (y = last_fctl_chunk.y_offset; y < last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) {
+ size_t row_start = s->last_frame->linesize[0] * y + bpp * last_fctl_chunk.x_offset;
+ memset(diffFrame->data[0] + row_start, 0, bpp * last_fctl_chunk.width);
+ }
+ }
+ } else {
+ if (!s->prev_frame)
+ continue;
+
+ memcpy(diffFrame->data[0], s->prev_frame->data[0],
+ s->prev_frame->linesize[0] * s->prev_frame->height);
+ }
+
+ // Do inverse blending
+ if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp) < 0)
+ continue;
+
+ // Do encoding
+ ret = encode_frame(avctx, diffFrame);
+ sequence_number = s->sequence_number;
+ s->sequence_number = original_sequence_number;
+ bytestream_size = s->bytestream - bytestream_start;
+ s->bytestream = bytestream_start;
+ if (ret < 0)
+ goto fail;
+
+ if (bytestream_size < best_bytestream_size) {
+ *best_fctl_chunk = fctl_chunk;
+ *best_last_fctl_chunk = last_fctl_chunk;
+
+ best_sequence_number = sequence_number;
+ best_bytestream = s->bytestream;
+ best_bytestream_size = bytestream_size;
+
+ if (best_bytestream == original_bytestream) {
+ s->bytestream = temp_bytestream;
+ s->bytestream_end = temp_bytestream_end;
+ } else {
+ s->bytestream = original_bytestream;
+ s->bytestream_end = original_bytestream_end;
+ }
+ }
+ }
+ }
+
+ s->sequence_number = best_sequence_number;
+ s->bytestream = original_bytestream + best_bytestream_size;
+ s->bytestream_end = original_bytestream_end;
+ if (best_bytestream != original_bytestream)
+ memcpy(original_bytestream, best_bytestream, best_bytestream_size);
+
+ ret = 0;
+
+fail:
+ av_freep(&temp_bytestream);
+ av_frame_free(&diffFrame);
+ return ret;
+}
+
static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *pict, int *got_packet)
{
@@ -537,9 +820,9 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
int ret;
int enc_row_size;
size_t max_packet_size;
- uint8_t buf[26];
+ APNGFctlChunk fctl_chunk;
- if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
+ if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), ~0U, pict->data[1], 256 * sizeof(uint32_t));
if (avctx->frame_number == 0) {
@@ -560,47 +843,111 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
);
if (max_packet_size > INT_MAX)
return AVERROR(ENOMEM);
- ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
- if (ret < 0)
- return ret;
-
- s->bytestream_start =
- s->bytestream = pkt->data;
- s->bytestream_end = pkt->data + pkt->size;
if (avctx->frame_number == 0) {
+ s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE);
+ if (!avctx->extradata)
+ return AVERROR(ENOMEM);
+
ret = encode_headers(avctx, pict);
if (ret < 0)
return ret;
- avctx->extradata = av_malloc(s->bytestream - s->bytestream_start);
- if (!avctx->extradata)
+ avctx->extradata_size = s->bytestream - avctx->extradata;
+
+ s->last_frame_packet = av_malloc(max_packet_size);
+ if (!s->last_frame_packet)
return AVERROR(ENOMEM);
- avctx->extradata_size = s->bytestream - s->bytestream_start;
- memcpy(avctx->extradata, s->bytestream_start, s->bytestream - s->bytestream_start);
+ } else if (s->last_frame) {
+ ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
+ if (ret < 0)
+ return ret;
- s->bytestream = s->bytestream_start;
+ memcpy(pkt->data, s->last_frame_packet, s->last_frame_packet_size);
+ pkt->size = s->last_frame_packet_size;
+ pkt->pts = pkt->dts = s->last_frame->pts;
}
- AV_WB32(buf, s->sequence_number);
- AV_WB32(buf + 4, avctx->width);
- AV_WB32(buf + 8, avctx->height);
- AV_WB32(buf + 12, 0); // x offset
- AV_WB32(buf + 16, 0); // y offset
- AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing)
- AV_WB16(buf + 22, 0); // delay denominator
- buf[24] = APNG_DISPOSE_OP_BACKGROUND;
- buf[25] = APNG_BLEND_OP_SOURCE;
- png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26);
- ++s->sequence_number;
+ if (pict) {
+ s->bytestream_start =
+ s->bytestream = s->last_frame_packet;
+ s->bytestream_end = s->bytestream + max_packet_size;
- ret = encode_frame(avctx, pict);
- if (ret < 0)
- return ret;
+ // We're encoding the frame first, so we have to do a bit of shuffling around
+ // to have the image data write to the correct place in the buffer
+ fctl_chunk.sequence_number = s->sequence_number;
+ ++s->sequence_number;
+ s->bytestream += 26 + 12;
- pkt->size = s->bytestream - s->bytestream_start;
- pkt->flags |= AV_PKT_FLAG_KEY;
- *got_packet = 1;
+ ret = apng_encode_frame(avctx, pict, &fctl_chunk, &s->last_frame_fctl);
+ if (ret < 0)
+ return ret;
+
+ fctl_chunk.delay_num = 0; // delay filled in during muxing
+ fctl_chunk.delay_den = 0;
+ } else {
+ s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE;
+ }
+
+ if (s->last_frame) {
+ uint8_t* last_fctl_chunk_start = pkt->data;
+ uint8_t buf[26];
+
+ AV_WB32(buf + 0, s->last_frame_fctl.sequence_number);
+ AV_WB32(buf + 4, s->last_frame_fctl.width);
+ AV_WB32(buf + 8, s->last_frame_fctl.height);
+ AV_WB32(buf + 12, s->last_frame_fctl.x_offset);
+ AV_WB32(buf + 16, s->last_frame_fctl.y_offset);
+ AV_WB16(buf + 20, s->last_frame_fctl.delay_num);
+ AV_WB16(buf + 22, s->last_frame_fctl.delay_den);
+ buf[24] = s->last_frame_fctl.dispose_op;
+ buf[25] = s->last_frame_fctl.blend_op;
+ png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T', 'L'), buf, 26);
+
+ *got_packet = 1;
+ }
+
+ if (pict) {
+ if (!s->last_frame) {
+ s->last_frame = av_frame_alloc();
+ if (!s->last_frame)
+ return AVERROR(ENOMEM);
+ } else if (s->last_frame_fctl.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+ if (!s->prev_frame) {
+ s->prev_frame = av_frame_alloc();
+ if (!s->prev_frame)
+ return AVERROR(ENOMEM);
+
+ s->prev_frame->format = pict->format;
+ s->prev_frame->width = pict->width;
+ s->prev_frame->height = pict->height;
+ if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0)
+ return ret;
+ }
+
+ // Do disposal, but not blending
+ memcpy(s->prev_frame->data[0], s->last_frame->data[0],
+ s->last_frame->linesize[0] * s->last_frame->height);
+ if (s->last_frame_fctl.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
+ uint32_t y;
+ uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
+ for (y = s->last_frame_fctl.y_offset; y < s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) {
+ size_t row_start = s->last_frame->linesize[0] * y + bpp * s->last_frame_fctl.x_offset;
+ memset(s->prev_frame->data[0] + row_start, 0, bpp * s->last_frame_fctl.width);
+ }
+ }
+ }
+
+ av_frame_unref(s->last_frame);
+ ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
+ if (ret < 0)
+ return ret;
+
+ s->last_frame_fctl = fctl_chunk;
+ s->last_frame_packet_size = s->bytestream - s->bytestream_start;
+ } else {
+ av_frame_free(&s->last_frame);
+ }
return 0;
}
@@ -714,6 +1061,9 @@ static av_cold int png_enc_close(AVCodecContext *avctx)
deflateEnd(&s->zstream);
av_frame_free(&avctx->coded_frame);
+ av_frame_free(&s->last_frame);
+ av_frame_free(&s->prev_frame);
+ av_freep(&s->last_frame_packet);
return 0;
}
@@ -769,6 +1119,7 @@ AVCodec ff_apng_encoder = {
.init = png_enc_init,
.close = png_enc_close,
.encode2 = encode_apng,
+ .capabilities = CODEC_CAP_DELAY,
.pix_fmts = (const enum AVPixelFormat[]) {
AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
--
2.4.5
More information about the ffmpeg-devel
mailing list