[FFmpeg-devel] [PATCH 11/25] avcodec/magicyuv: Avoid AV_QSORT when creating Huffman table
Paul B Mahol
onemda at gmail.com
Sat Sep 26 13:57:35 EEST 2020
On Sat, Sep 26, 2020 at 12:27:50PM +0200, Andreas Rheinhardt wrote:
> The MagicYUV format stores Huffman tables in its bitstream by coding
> the length of a given symbol; it does not code the actual code directly,
> instead this is to be inferred by the rule that a symbol is to the left
> of every shorter symbol in the Huffman tree and that for symbols of the
> same length the symbol is ascending from left to right.
>
> Our decoder implemented this by first sorting the array containing
> length and symbol of each element according to descending length and
> for equal length, according to ascending symbol. Afterwards, the current
> state in the tree got encoded in a variable code; if the next array entry
> had length len, then the len most significant bits of code contained
> the code of this entry. Whenever an entry of the array of length
> len was processed, code was incremented by 1U << (32 - len). So two
> entries of length len have the same effect as incrementing code by
> 1U << (32 - (len - 1)), which corresponds to the parent node of length
> len - 1 of the two nodes of length len etc.
>
> This commit modifies this to avoid sorting the entries before
> calculating the codes. This is done by calculating how many non-leaf
> nodes there are on each level of the tree before calculating the codes.
> Afterwards every leaf node on this level gets assigned the number of
> nodes already on this level as code. This of course works only because
> the entries are already sorted by their symbol initially, so that this
> algorithm indeed gives ascending symbols from left to right on every
> level.
>
> This offers both speed- as well as (obvious) codesize advantages. With
> Clang 10 the number of decicycles for build_huffman decreased from
> 1561987 to 1228405; for GCC 9 it went from 1825096 decicyles to 1429921.
> These tests were carried out with a sample with 150 frames that was
> looped 13 times; and this was iterated 10 times. The earlier reference
> point here is from the point when the loop generating the codes was
> traversed in reverse order (as the patch reversing the order led to
> performance penalties).
ok
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
> ---
> libavcodec/magicyuv.c | 36 ++++++++++++++++++------------------
> 1 file changed, 18 insertions(+), 18 deletions(-)
>
> diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c
> index 17dea69d76..7dded9b457 100644
> --- a/libavcodec/magicyuv.c
> +++ b/libavcodec/magicyuv.c
> @@ -25,7 +25,6 @@
> #define CACHED_BITSTREAM_READER !ARCH_X86_32
>
> #include "libavutil/pixdesc.h"
> -#include "libavutil/qsort.h"
>
> #include "avcodec.h"
> #include "bytestream.h"
> @@ -74,26 +73,24 @@ typedef struct MagicYUVContext {
> LLVidDSPContext llviddsp;
> } MagicYUVContext;
>
> -static int huff_cmp_len(const void *a, const void *b)
> +static int huff_build(HuffEntry he[], uint16_t codes_count[33],
> + VLC *vlc, int nb_elems)
> {
> - const HuffEntry *aa = a, *bb = b;
> - return (bb->len - aa->len) * 4096 + aa->sym - bb->sym;
> -}
> -
> -static int huff_build(HuffEntry he[], VLC *vlc, int nb_elems)
> -{
> - uint32_t code;
> -
> - AV_QSORT(he, nb_elems, HuffEntry, huff_cmp_len);
> + unsigned nb_codes = 0, max = 0;
> +
> + for (int i = 32; i > 0; i--) {
> + uint16_t curr = codes_count[i]; // # of leafs of length i
> + codes_count[i] = nb_codes / 2; // # of non-leaf nodes on level i
> + nb_codes = codes_count[i] + curr; // # of nodes on level i
> + if (curr && !max)
> + max = i;
> + }
>
> - code = 0;
> for (unsigned i = 0; i < nb_elems; i++) {
> - he[i].code = code >> (32 - he[i].len);
> - code += 0x80000000u >> (he[i].len - 1);
> + he[i].code = codes_count[he[i].len];
> + codes_count[he[i].len]++;
> }
> -
> - ff_free_vlc(vlc);
> - return ff_init_vlc_sparse(vlc, FFMIN(he[0].len, 12), nb_elems,
> + return ff_init_vlc_sparse(vlc, FFMIN(max, 12), nb_elems,
> &he[0].len, sizeof(he[0]), sizeof(he[0].len),
> &he[0].code, sizeof(he[0]), sizeof(he[0].code),
> &he[0].sym, sizeof(he[0]), sizeof(he[0].sym), 0);
> @@ -389,6 +386,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
> MagicYUVContext *s = avctx->priv_data;
> GetByteContext gb;
> HuffEntry he[4096];
> + uint16_t length_count[33] = { 0 };
> int i = 0, j = 0, k;
>
> bytestream2_init(&gb, table, table_size);
> @@ -409,6 +407,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
> return AVERROR_INVALIDDATA;
> }
>
> + length_count[x] += l;
> for (; j < k; j++) {
> he[j].sym = j;
> he[j].len = x;
> @@ -416,7 +415,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
>
> if (j == max) {
> j = 0;
> - if (huff_build(he, &s->vlc[i], max)) {
> + if (huff_build(he, length_count, &s->vlc[i], max)) {
> av_log(avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
> return AVERROR_INVALIDDATA;
> }
> @@ -424,6 +423,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
> if (i == s->planes) {
> break;
> }
> + memset(length_count, 0, sizeof(length_count));
> }
> }
>
> --
> 2.25.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list