[FFmpeg-devel] [PATCH v2] avfilter: compress CUDA PTX code if possible
Philip Langdale
philipl at overt.org
Fri Jun 18 23:45:59 EEST 2021
On Sat, 12 Jun 2021 18:47:50 +0200
Timo Rothenpieler <timo at rothenpieler.org> wrote:
> ---
> .gitignore | 1 +
> compat/cuda/ptx2c.sh | 34 ------------
> configure | 17 ++++++
> ffbuild/.gitignore | 1 +
> ffbuild/bin2c.c | 76 ++++++++++++++++++++++++++
> ffbuild/common.mak | 28 ++++++++--
> libavfilter/Makefile | 14 +++--
> libavfilter/cuda/load_helper.c | 96
> +++++++++++++++++++++++++++++++++ libavfilter/cuda/load_helper.h |
> 28 ++++++++++ libavfilter/vf_format_cuda.c | 7 ++-
> libavfilter/vf_overlay_cuda.c | 8 +--
> libavfilter/vf_scale_cuda.c | 24 ++++++---
> libavfilter/vf_thumbnail_cuda.c | 7 ++-
> libavfilter/vf_yadif_cuda.c | 7 ++-
> 14 files changed, 287 insertions(+), 61 deletions(-)
> delete mode 100755 compat/cuda/ptx2c.sh
> create mode 100644 ffbuild/bin2c.c
> create mode 100644 libavfilter/cuda/load_helper.c
> create mode 100644 libavfilter/cuda/load_helper.h
I just had comments about one file:
> diff --git a/libavfilter/cuda/load_helper.c
> b/libavfilter/cuda/load_helper.c new file mode 100644
> index 0000000000..62d644c29a
> --- /dev/null
> +++ b/libavfilter/cuda/load_helper.c
> @@ -0,0 +1,96 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "config.h"
> +
> +#include "libavutil/hwcontext.h"
> +#include "libavutil/hwcontext_cuda_internal.h"
> +#include "libavutil/cuda_check.h"
> +
> +#if CONFIG_PTX_COMPRESSION
> +#include <zlib.h>
> +#define CHUNK_SIZE 1024 * 64
> +#endif
> +
> +#include "load_helper.h"
> +
> +#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, cu, x)
> +
> +int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx,
> CUmodule *cu_module,
> + const unsigned char *data, const unsigned
> int length) +{
> + CudaFunctions *cu = hwctx->internal->cuda_dl;
> +
> +#if CONFIG_PTX_COMPRESSION
> + z_stream stream = { 0 };
> + uint8_t *buf, *tmp;
> + uint64_t buf_size;
> + int ret;
> +
> + if (inflateInit2(&stream, 32 + 15) != Z_OK) {
Can you add a comment explaining the magic numbers?
> + av_log(avctx, AV_LOG_ERROR, "Error during zlib
> initialisation: %s\n", stream.msg);
> + return AVERROR(ENOSYS);
> + }
> +
> + buf_size = CHUNK_SIZE * 4;
> + buf = av_realloc(NULL, buf_size);
> + if (!buf) {
> + inflateEnd(&stream);
> + return AVERROR(ENOMEM);
> + }
> +
> + stream.next_in = data;
> + stream.avail_in = length;
> +
> + do {
> + stream.avail_out = buf_size - stream.total_out;
> + stream.next_out = buf + stream.total_out;
> +
> + ret = inflate(&stream, Z_FINISH);
> + if (ret != Z_OK && ret != Z_STREAM_END) {
> + av_log(avctx, AV_LOG_ERROR, "zlib inflate error: %s\n",
> stream.msg);
> + inflateEnd(&stream);
> + av_free(buf);
> + return AVERROR(EINVAL);
> + }
> +
> + if (stream.avail_out == 0) {
> + buf_size += CHUNK_SIZE;
> + tmp = av_realloc(buf, buf_size);
> + if (!tmp) {
> + inflateEnd(&stream);
> + av_free(buf);
> + return AVERROR(ENOMEM);
> + }
> + buf = tmp;
> + }
> + } while (ret != Z_STREAM_END);
> +
> + // NULL-terminate string
> + // there is guaranteed to be space for this, due to condition in
> loop
This is because it will still grow the buffer if avail_out is zero at
the time you hit Z_STREAM_END?
> + buf[stream.total_out] = 0;
> +
> + inflateEnd(&stream);
> +
> + ret = CHECK_CU(cu->cuModuleLoadData(cu_module, buf));
> + av_free(buf);
> + return ret;
> +#else
> + return CHECK_CU(cu->cuModuleLoadData(cu_module, data));
> +#endif
> +}
Otherwise, LGTM.
Thanks,
--phil
More information about the ffmpeg-devel
mailing list