[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130327
Michael Niedermayer
michaelni at gmx.at
Wed Mar 27 18:14:20 CET 2013
On Wed, Mar 27, 2013 at 09:21:09PM +0800, Wei Gao wrote:
>
> configure | 4
> libavutil/Makefile | 3
> libavutil/opencl.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> libavutil/opencl.h | 197 +++++++++++++++
> 4 files changed, 878 insertions(+)
> dfbeaa4af370343e8f62d4b398c49c1f9531d242 0001-opencl-wrapper-based-on-comments-on-20130327.patch
> From 544da77d67c6c27e415363c3ebd2f1894f98932e Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Wed, 27 Mar 2013 21:17:29 +0800
> Subject: [PATCH] opencl wrapper based on comments on 20130327
>
> ---
> configure | 4 +
> libavutil/Makefile | 3 +
> libavutil/opencl.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> libavutil/opencl.h | 197 ++++++++++++++++
> 4 files changed, 878 insertions(+)
> create mode 100644 libavutil/opencl.c
> create mode 100644 libavutil/opencl.h
>
> diff --git a/configure b/configure
> index 8443db4..9c42a85 100755
> --- a/configure
> +++ b/configure
> @@ -233,6 +233,7 @@ External library support:
> --enable-libxvid enable Xvid encoding via xvidcore,
> native MPEG-4/Xvid encoder exists [no]
> --enable-openal enable OpenAL 1.1 capture support [no]
> + --enable-opencl enable OpenCL code
> --enable-openssl enable openssl [no]
> --enable-x11grab enable X11 grabbing [no]
> --enable-zlib enable zlib [autodetect]
> @@ -1178,6 +1179,7 @@ EXTERNAL_LIBRARY_LIST="
> libxavs
> libxvid
> openal
> + opencl
> openssl
> x11grab
> zlib
> @@ -3982,6 +3984,7 @@ enabled openal && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
> die "ERROR: openal not found"; } &&
> { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
> die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
> +enabled opencl && require2 opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL
> enabled openssl && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
> check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
> check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
> @@ -4350,6 +4353,7 @@ echo "network support ${network-no}"
> echo "threading support ${thread_type-no}"
> echo "safe bitstream reader ${safe_bitstream_reader-no}"
> echo "SDL support ${sdl-no}"
> +echo "opencl enabled ${opencl-no}"
> echo "texi2html enabled ${texi2html-no}"
> echo "perl enabled ${perl-no}"
> echo "pod2man enabled ${pod2man-no}"
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 103ce5e..6375e10 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -52,6 +52,8 @@ HEADERS = adler32.h \
>
> HEADERS-$(CONFIG_LZO) += lzo.h
>
> +HEADERS-$(CONFIG_OPENCL) += opencl.h
> +
> ARCH_HEADERS = bswap.h \
> intmath.h \
> intreadwrite.h \
> @@ -115,6 +117,7 @@ SKIPHEADERS-$(HAVE_MACHINE_RW_BARRIER) += atomic_suncc.h
> SKIPHEADERS-$(HAVE_MEMORYBARRIER) += atomic_win32.h
> SKIPHEADERS-$(HAVE_SYNC_VAL_COMPARE_AND_SWAP) += atomic_gcc.h
>
> +OBJS-$(CONFIG_OPENCL) += opencl.o
> TESTPROGS = adler32 \
> aes \
> atomic \
this should be where the other OBJS-$(...) are
> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> new file mode 100644
> index 0000000..1888a34
> --- /dev/null
> +++ b/libavutil/opencl.c
> @@ -0,0 +1,674 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl.h"
> +#include "avstring.h"
> +#include "log.h"
> +#include "avassert.h"
> +
> +#if !HAVE_MEMORYBARRIER && !HAVE_SYNC_VAL_COMPARE_AND_SWAP && !HAVE_MACHINE_RW_BARRIER
> +#if HAVE_PTHREADS
> +
> +#include <pthread.h>
> +static pthread_mutex_t atomic_opencl_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +static void lock_opencl(void)
> +{
> + pthread_mutex_lock(&atomic_opencl_lock);
> +}
> +
> +static void unlock_opencl(void)
> +{
> + pthread_mutex_unlock(&atomic_opencl_lock);
> +}
> +
> +#elif !HAVE_THREADS
> +
> +static void lock_opencl(void)
> +{
> +}
> +
> +static void unlock_opencl(void)
> +{
> +}
> +
> +#endif
> +#else
> +static void lock_opencl(void)
> +{
> +}
> +
> +static void unlock_opencl(void)
> +{
> +}
Empty functions will not prevent thread saftey issues
[...]
[...]
> +static int compile_kernel_file(GPUEnv *gpu_env, const char *build_option)
> +{
> + cl_int status;
> + char *temp, *source_str = NULL;
> + size_t source_str_len = 0;
> + int i, ret = 0;
> +
> + if (gpu_env->program)
> + return ret;
> +
> + for (i = 0; i < gpu_env->kernel_function_count; i++) {
> + source_str_len += strlen(gpu_env->kernel_code[i]);
> + }
> + source_str = av_mallocz(source_str_len + 1);
> + if (!source_str) {
> + return AVERROR(ENOMEM);
> + }
> + temp = source_str;
> + for (i = 0; i < gpu_env->kernel_function_count; i++) {
> + memcpy(temp, gpu_env->kernel_code[i], strlen(gpu_env->kernel_code[i]));
> + temp += strlen(gpu_env->kernel_code[i]);
> + }
If i understand this correctly, all registered filter source is
concatenated together and then compiled.
This is quite wastefull considerung that only a small subset of
filters will be used in most use cases.
Even in the future when things are precompiled it would be wastefull
to load all when only 5% are used
[...]
> +
> +int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
> +{
> + cl_int status;
> + *cl_buf = clCreateBuffer(gpu_env.context, flags, cl_buf_size, host_ptr, &status);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> + }
> + return 0;
> +}
> +
> +void av_opencl_buffer_release(cl_mem cl_buf)
> +{
> + cl_int status = 0;
> + if (!cl_buf)
> + return;
> + status = clReleaseMemObject(cl_buf);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL buffer: %s\n", opencl_errstr(status));
> + }
> +}
> +
> +int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
> +{
> + cl_int status;
> + void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, dst_cl_buf,
> + CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
> + 0, NULL, NULL, &status);
gpu_env.command_queue is a global field how is this supposed to work ?
there can be multiple filters from multiple threads accessing it
I do not really know OpenCL but shouldnt each filter instance have its
own command_queue ?
also the function is set to blocking mode, this does not seem ideal
from a performance point of view
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
When you are offended at any man's fault, turn to yourself and study your
own failings. Then you will forget your anger. -- Epictetus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130327/a542e06d/attachment.asc>
More information about the ffmpeg-devel
mailing list