[FFmpeg-devel] [PATCH v2] avfilter: compress CUDA PTX code if possible
Timo Rothenpieler
timo at rothenpieler.org
Sat Jun 19 00:07:58 EEST 2021
On 18.06.2021 22:45, Philip Langdale wrote:
> On Sat, 12 Jun 2021 18:47:50 +0200
> Timo Rothenpieler <timo at rothenpieler.org> wrote:
>
>> ---
>> .gitignore | 1 +
>> compat/cuda/ptx2c.sh | 34 ------------
>> configure | 17 ++++++
>> ffbuild/.gitignore | 1 +
>> ffbuild/bin2c.c | 76 ++++++++++++++++++++++++++
>> ffbuild/common.mak | 28 ++++++++--
>> libavfilter/Makefile | 14 +++--
>> libavfilter/cuda/load_helper.c | 96
>> +++++++++++++++++++++++++++++++++ libavfilter/cuda/load_helper.h |
>> 28 ++++++++++ libavfilter/vf_format_cuda.c | 7 ++-
>> libavfilter/vf_overlay_cuda.c | 8 +--
>> libavfilter/vf_scale_cuda.c | 24 ++++++---
>> libavfilter/vf_thumbnail_cuda.c | 7 ++-
>> libavfilter/vf_yadif_cuda.c | 7 ++-
>> 14 files changed, 287 insertions(+), 61 deletions(-)
>> delete mode 100755 compat/cuda/ptx2c.sh
>> create mode 100644 ffbuild/bin2c.c
>> create mode 100644 libavfilter/cuda/load_helper.c
>> create mode 100644 libavfilter/cuda/load_helper.h
>
> I just had comments about one file:
>
>> diff --git a/libavfilter/cuda/load_helper.c
>> b/libavfilter/cuda/load_helper.c new file mode 100644
>> index 0000000000..62d644c29a
>> --- /dev/null
>> +++ b/libavfilter/cuda/load_helper.c
>> @@ -0,0 +1,96 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> 02110-1301 USA
>> + */
>> +
>> +#include "config.h"
>> +
>> +#include "libavutil/hwcontext.h"
>> +#include "libavutil/hwcontext_cuda_internal.h"
>> +#include "libavutil/cuda_check.h"
>> +
>> +#if CONFIG_PTX_COMPRESSION
>> +#include <zlib.h>
>> +#define CHUNK_SIZE 1024 * 64
>> +#endif
>> +
>> +#include "load_helper.h"
>> +
>> +#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, cu, x)
>> +
>> +int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx,
>> CUmodule *cu_module,
>> + const unsigned char *data, const unsigned
>> int length) +{
>> + CudaFunctions *cu = hwctx->internal->cuda_dl;
>> +
>> +#if CONFIG_PTX_COMPRESSION
>> + z_stream stream = { 0 };
>> + uint8_t *buf, *tmp;
>> + uint64_t buf_size;
>> + int ret;
>> +
>> + if (inflateInit2(&stream, 32 + 15) != Z_OK) {
>
> Can you add a comment explaining the magic numbers?
I have no idea what those numbers do, I copied them from http.c and they
work.
>> + av_log(avctx, AV_LOG_ERROR, "Error during zlib
>> initialisation: %s\n", stream.msg);
>> + return AVERROR(ENOSYS);
>> + }
>> +
>> + buf_size = CHUNK_SIZE * 4;
>> + buf = av_realloc(NULL, buf_size);
>> + if (!buf) {
>> + inflateEnd(&stream);
>> + return AVERROR(ENOMEM);
>> + }
>> +
>> + stream.next_in = data;
>> + stream.avail_in = length;
>> +
>> + do {
>> + stream.avail_out = buf_size - stream.total_out;
>> + stream.next_out = buf + stream.total_out;
>> +
>> + ret = inflate(&stream, Z_FINISH);
>> + if (ret != Z_OK && ret != Z_STREAM_END) {
>> + av_log(avctx, AV_LOG_ERROR, "zlib inflate error: %s\n",
>> stream.msg);
>> + inflateEnd(&stream);
>> + av_free(buf);
>> + return AVERROR(EINVAL);
>> + }
>> +
>> + if (stream.avail_out == 0) {
>> + buf_size += CHUNK_SIZE;
>> + tmp = av_realloc(buf, buf_size);
>> + if (!tmp) {
>> + inflateEnd(&stream);
>> + av_free(buf);
>> + return AVERROR(ENOMEM);
>> + }
>> + buf = tmp;
>> + }
>> + } while (ret != Z_STREAM_END);
>> +
>> + // NULL-terminate string
>> + // there is guaranteed to be space for this, due to condition in
>> loop
>
> This is because it will still grow the buffer if avail_out is zero at
> the time you hit Z_STREAM_END?
If avail_out was 0, the condition right above it would have grown it, so
it has at least one byte free.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4494 bytes
Desc: S/MIME Cryptographic Signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20210618/774ba603/attachment.bin>
More information about the ffmpeg-devel
mailing list