[FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn: DNNAsyncExecModule Execution Failure Handling

Mon Aug 9 13:12:49 EEST 2021

> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of
> Shubhanshu Saxena
> Sent: 2021年8月8日 18:56
> To: ffmpeg-devel at ffmpeg.org
> Cc: Shubhanshu Saxena <shubhanshu.e01 at gmail.com>
> Subject: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn:
> DNNAsyncExecModule Execution Failure Handling
> 
> This commit adds the case handling if the asynchronous execution of a request
> fails by checking the exit status of the thread when joining before starting
> another execution. On failure, it does the cleanup as well.
> 
> Signed-off-by: Shubhanshu Saxena <shubhanshu.e01 at gmail.com>
> ---
>  libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++----
>  libavfilter/dnn/dnn_backend_tf.c     | 10 +++++++++-
>  2 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/libavfilter/dnn/dnn_backend_common.c
> b/libavfilter/dnn/dnn_backend_common.c
> index 470fffa2ae..426683b73d 100644
> --- a/libavfilter/dnn/dnn_backend_common.c
> +++ b/libavfilter/dnn/dnn_backend_common.c
> @@ -23,6 +23,9 @@
> 
>  #include "dnn_backend_common.h"
> 
> +#define DNN_ASYNC_SUCCESS (void *)0
> +#define DNN_ASYNC_FAIL (void *)-1
> +
>  int ff_check_exec_params(void *ctx, DNNBackendType backend,
> DNNFunctionType func_type, DNNExecBaseParams *exec_params)  {
>      if (!exec_params) {
> @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args)
>      DNNAsyncExecModule *async_module = args;
>      void *request = async_module->args;
> 
> -    async_module->start_inference(request);
> +    if (async_module->start_inference(request) != DNN_SUCCESS) {
> +        return DNN_ASYNC_FAIL;
> +    }
>      async_module->callback(request);
> -    return NULL;
> +    return DNN_ASYNC_SUCCESS;
>  }
> 
>  DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule
> *async_module)  {
> +    void *status = 0;
>      if (!async_module) {
>          return DNN_ERROR;
>      }
>  #if HAVE_PTHREAD_CANCEL
> -    pthread_join(async_module->thread_id, NULL);
> +    pthread_join(async_module->thread_id, &status);
> +    if (status == DNN_ASYNC_FAIL) {
> +        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
> +        return DNN_ERROR;
> +    }
>  #endif
>      async_module->start_inference = NULL;
>      async_module->callback = NULL;
> @@ -101,6 +111,7 @@ DNNReturnType
> ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
> DNNReturnType ff_dnn_start_inference_async(void *ctx,
> DNNAsyncExecModule *async_module)  {
>      int ret;
> +    void *status = 0;
> 
>      if (!async_module) {
>          av_log(ctx, AV_LOG_ERROR, "async_module is null when starting async
> inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType
> ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
>      }
> 
>  #if HAVE_PTHREAD_CANCEL
> -    pthread_join(async_module->thread_id, NULL);
> +    pthread_join(async_module->thread_id, &status);
> +    if (status == DNN_ASYNC_FAIL) {
> +        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous
> inference failed.\n");
> +        return DNN_ERROR;
> +    }
>      ret = pthread_create(&async_module->thread_id, NULL,
> async_thread_routine, async_module);
>      if (ret != 0) {
>          av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n"); diff --git
> a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
> index fb3f6f5ea6..ffec1b1328 100644
> --- a/libavfilter/dnn/dnn_backend_tf.c
> +++ b/libavfilter/dnn/dnn_backend_tf.c
> @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow);
> 
>  static DNNReturnType execute_model_tf(TFRequestItem *request, Queue
> *inference_queue);  static void infer_completion_callback(void *args);
> +static inline void destroy_request_item(TFRequestItem **arg);
> 
>  static void free_buffer(void *data, size_t length)  { @@ -172,6 +173,10 @@
> static DNNReturnType tf_start_inference(void *args)
>                    request->status);
>      if (TF_GetCode(request->status) != TF_OK) {
>          av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request-
> >status));
> +        tf_free_request(infer_request);
> +        if (ff_safe_queue_push_back(tf_model->request_queue, request) < 0) {
> +            destroy_request_item(&request);
> +        }
>          return DNN_ERROR;
>      }
>      return DNN_SUCCESS;
> @@ -1095,7 +1100,10 @@ static DNNReturnType
> execute_model_tf(TFRequestItem *request, Queue *inference_q
>      }
> 
>      if (task->async) {
> -        return ff_dnn_start_inference_async(ctx, &request->exec_module);
> +        if (ff_dnn_start_inference_async(ctx, &request->exec_module) !=
> DNN_SUCCESS) {
> +            goto err;
> +        }
> +        return DNN_SUCCESS;
>      } else {
>          if (tf_start_inference(request) != DNN_SUCCESS) {
>              goto err;
> --
> 2.25.1

LGTM, those patches function well and tensorflow backend performs much better.

> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request at ffmpeg.org
> with subject "unsubscribe".