[FFmpeg-devel] [PATCH] checkasm: Generalize crash handling

Rémi Denis-Courmont remi at remlab.net
Thu Dec 21 22:16:09 EET 2023


Le tiistaina 19. joulukuuta 2023, 14.02.00 EET Martin Storsjö a écrit :
> This replaces the riscv specific handling from
> 7212466e735aa187d82f51dadbce957fe3da77f0 (which essentially is
> reverted, together with 286d6742218ba0235c32876b50bf593cb1986353)
> with a different implementation of the same (plus a bit more), based
> on the corresponding feature in dav1d's checkasm, supporting both Unix
> and Windows.
> 
> See in particular dav1d commits
> 0b6ee30eab2400e4f85b735ad29a68a842c34e21 and
> 0421f787ea592fd2cc74c887f20b8dc31393788b, authored by
> Henrik Gramner.
> 
> The overall approach is the same; set up a signal handler,
> store the state with setjmp/sigsetjmp, jump out of the crashing
> function with longjmp/siglongjmp.
> 
> The main difference is in what happens when the signal handler
> is invoked. In the previous implementation, it would resume from
> right before calling the crashing function, and then skip that call
> based on the setjmp return value.
> 
> In the imported implementation from dav1d, we return to right before
> the check_func() call, which will skip testing the current function
> (as the pointer is the same as it was before).
> 
> Other differences are:
> - Support for other signal handling mechanisms (Windows
>   AddVectoredExceptionHandler)
> - Using RtlCaptureContext/RtlRestoreContext instead of setjmp/longjmp
>   on Windows with SEH (which adds the design limitation that it doesn't
>   return a value like setjmp does)
> - Only catching signals once per function - if more than one
>   signal is delivered before signal handling is reenabled, any
>   signal is handled as it would without our handler
> - Not using an arch specific signal handler written in assembly
> ---
>  tests/checkasm/checkasm.c       | 100 ++++++++++++++++++++++++++------
>  tests/checkasm/checkasm.h       |  79 ++++++++++++++++++-------
>  tests/checkasm/riscv/checkasm.S |  12 ----
>  3 files changed, 140 insertions(+), 51 deletions(-)
> 
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 6318d9296b..668034c67f 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -23,8 +23,10 @@
>  #include "config.h"
>  #include "config_components.h"
> 
> -#ifndef _GNU_SOURCE
> -# define _GNU_SOURCE // for syscall (performance monitoring API),
> strsignal() +#if CONFIG_LINUX_PERF
> +# ifndef _GNU_SOURCE
> +#  define _GNU_SOURCE // for syscall (performance monitoring API)
> +# endif
>  #endif
> 
>  #include <signal.h>
> @@ -326,6 +328,7 @@ static struct {
>      const char *cpu_flag_name;
>      const char *test_name;
>      int verbose;
> +    int catch_signals;

AFAICT, this needs to be volatile sigatomic_t

>  } state;
> 
>  /* PRNG state */
> @@ -627,6 +630,64 @@ static CheckasmFunc *get_func(CheckasmFunc **root,
> const char *name) return f;
>  }
> 
> +checkasm_context checkasm_context_buf;
> +
> +/* Crash handling: attempt to catch crashes and handle them
> + * gracefully instead of just aborting abruptly. */
> +#ifdef _WIN32
> +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
> +static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
> +    const char *err;
> +
> +    if (!state.catch_signals)
> +        return EXCEPTION_CONTINUE_SEARCH;
> +
> +    switch (e->ExceptionRecord->ExceptionCode) {
> +    case EXCEPTION_FLT_DIVIDE_BY_ZERO:
> +    case EXCEPTION_INT_DIVIDE_BY_ZERO:
> +        err = "fatal arithmetic error";
> +        break;
> +    case EXCEPTION_ILLEGAL_INSTRUCTION:
> +    case EXCEPTION_PRIV_INSTRUCTION:
> +        err = "illegal instruction";
> +        break;
> +    case EXCEPTION_ACCESS_VIOLATION:
> +    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
> +    case EXCEPTION_DATATYPE_MISALIGNMENT:
> +    case EXCEPTION_STACK_OVERFLOW:
> +        err = "segmentation fault";
> +        break;
> +    case EXCEPTION_IN_PAGE_ERROR:
> +        err = "bus error";
> +        break;
> +    default:
> +        return EXCEPTION_CONTINUE_SEARCH;
> +    }
> +    state.catch_signals = 0;
> +    checkasm_fail_func("%s", err);
> +    checkasm_load_context();
> +    return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc
> */ +}
> +#endif
> +#else
> +static void signal_handler(const int s) {
> +    if (state.catch_signals) {
> +        state.catch_signals = 0;
> +        checkasm_fail_func("%s",
> +                           s == SIGFPE ? "fatal arithmetic error" :
> +                           s == SIGILL ? "illegal instruction" :
> +                           s == SIGBUS ? "bus error" :
> +                                         "segmentation fault");

The current code for the error print-out is both simpler and more versatile, 
so I don't get this.

> +        checkasm_load_context();
> +    } else {
> +        /* fall back to the default signal handler */
> +        static const struct sigaction default_sa = { .sa_handler = SIG_DFL
> }; +        sigaction(s, &default_sa, NULL);
> +        raise(s);

Why raise here? Returning from the handler will reevaluate the same code with 
the same thread state, and trigger the default signal handler anyway (since 
you don't modify the user context).

> +    }
> +}
> +#endif
> +
>  /* Perform tests and benchmarks for the specified cpu flag if supported by
> the host */ static void check_cpu_flag(const char *name, int flag)
>  {
> @@ -737,18 +798,24 @@ int main(int argc, char *argv[])
>      unsigned int seed = av_get_random_seed();
>      int i, ret = 0;
> 
> +#ifdef _WIN32
> +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
> +    AddVectoredExceptionHandler(0, signal_handler);
> +#endif
> +#else
> +    const struct sigaction sa = {
> +        .sa_handler = signal_handler,
> +        .sa_flags = SA_NODEFER,

That does not look very sane to me. If a recursive signal occurs, processing 
it recursively is NOT a good idea. This would cause an infinite loop, 
eventually a literal stack overflow after maxing out the processor for a while. 
I'd rather let the OS kernel deal with the problem, by killing the process or 
whatever the last resort is.

> +    };
> +    sigaction(SIGBUS,  &sa, NULL);
> +    sigaction(SIGFPE,  &sa, NULL);
> +    sigaction(SIGILL,  &sa, NULL);
> +    sigaction(SIGSEGV, &sa, NULL);
> +#endif
>  #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
>      if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
>          checkasm_checked_call = checkasm_checked_call_vfp;
>  #endif
> -#if ARCH_RISCV && HAVE_RV
> -    struct sigaction act = {
> -        .sa_handler = checkasm_handle_signal,
> -        .sa_flags = 0,
> -    };
> -
> -    sigaction(SIGILL, &act, NULL);
> -#endif
> 
>      if (!tests[0].func || !cpus[0].flag) {
>          fprintf(stderr, "checkasm: no tests to perform\n");
> @@ -876,15 +943,6 @@ void checkasm_fail_func(const char *msg, ...)
>      }
>  }
> 
> -void checkasm_fail_signal(int signum)
> -{
> -#ifdef __GLIBC__
> -    checkasm_fail_func("fatal signal %d: %s", signum, strsignal(signum));
> -#else
> -    checkasm_fail_func("fatal signal %d", signum);
> -#endif
> -}
> -
>  /* Get the benchmark context of the current function */
>  CheckasmPerf *checkasm_get_perf_context(void)
>  {
> @@ -932,6 +990,10 @@ void checkasm_report(const char *name, ...)
>      }
>  }
> 
> +void checkasm_set_signal_handler_state(const int enabled) {
> +    state.catch_signals = enabled;
> +}
> +
>  #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
>  int checkasm_check_##type(const char *const file, const int line, \
>                            const type *buf1, ptrdiff_t stride1, \
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index e3c19510fa..3d1d16d5bb 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -23,7 +23,6 @@
>  #ifndef TESTS_CHECKASM_CHECKASM_H
>  #define TESTS_CHECKASM_CHECKASM_H
> 
> -#include <setjmp.h>
>  #include <stdint.h>
>  #include "config.h"
> 
> @@ -43,6 +42,27 @@
>  #include "libavutil/lfg.h"
>  #include "libavutil/timer.h"
> 
> +#if !ARCH_X86_32 && defined(_WIN32)
> +/* setjmp/longjmp on Windows on architectures using SEH (all except x86_32)
> + * will try to use SEH to unwind the stack, which doesn't work for
> assembly + * functions without unwind information. */
> +#include <windows.h>
> +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
> +#define checkasm_context CONTEXT
> +#define checkasm_save_context() RtlCaptureContext(&checkasm_context_buf)
> +#define checkasm_load_context() RtlRestoreContext(&checkasm_context_buf,
> NULL) +#else
> +#define checkasm_context void*
> +#define checkasm_save_context() do {} while (0)
> +#define checkasm_load_context() do {} while (0)
> +#endif
> +#else
> +#include <setjmp.h>
> +#define checkasm_context jmp_buf
> +#define checkasm_save_context() setjmp(checkasm_context_buf)
> +#define checkasm_load_context() longjmp(checkasm_context_buf, 1)
> +#endif

Been there done that and it did not end well.
sigsetjmp() & co are necessary here.

> +
>  void checkasm_check_aacencdsp(void);
>  void checkasm_check_aacpsdsp(void);
>  void checkasm_check_ac3dsp(void);
> @@ -105,9 +125,10 @@ struct CheckasmPerf;
>  void *checkasm_check_func(void *func, const char *name, ...)
> av_printf_format(2, 3); int checkasm_bench_func(void);
>  void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
> -void checkasm_fail_signal(int signum);
>  struct CheckasmPerf *checkasm_get_perf_context(void);
>  void checkasm_report(const char *name, ...) av_printf_format(1, 2);
> +void checkasm_set_signal_handler_state(int enabled);
> +extern checkasm_context checkasm_context_buf;
> 
>  /* float compare utilities */
>  int float_near_ulp(float a, float b, unsigned max_ulp);
> @@ -131,7 +152,7 @@ static av_unused void *func_ref, *func_new;
>  #define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
> 
>  /* Decide whether or not the specified function needs to be tested */
> -#define check_func(func, ...) (func_ref = checkasm_check_func((func_new =
> func), __VA_ARGS__))
> +#define check_func(func, ...)
> (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func),
> __VA_ARGS__))
> 
>  /* Declare the function prototype. The first argument is the return value,
> the remaining * arguments are the function parameters. Naming parameters is
> optional. */ @@ -146,7 +167,10 @@ static av_unused void *func_ref,
> *func_new;
>  #define report checkasm_report
> 
>  /* Call the reference function */
> -#define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
> +#define call_ref(...)\
> +    (checkasm_set_signal_handler_state(1),\
> +     ((func_type *)func_ref)(__VA_ARGS__));\
> +    checkasm_set_signal_handler_state(0)
> 
>  #if ARCH_X86 && HAVE_X86ASM
>  /* Verifies that clobbered callee-saved registers are properly saved and
> restored @@ -179,16 +203,22 @@ void checkasm_stack_clobber(uint64_t
> clobber, ...); ((cpu_flags) & av_get_cpu_flags()) ? (void
> *)checkasm_checked_call_emms : \ (void *)checkasm_checked_call; #define
> CLOB (UINT64_C(0xdeadbeefdeadbeef))
> -#define call_new(...)
> (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,C
> LOB,\ +#define call_new(...) (checkasm_set_signal_handler_state(1),\
> +                      
> checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CL
> OB,\ CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ -                 
>     checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) +                  
>     checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__));\ +                
>      checkasm_set_signal_handler_state(0)
>  #elif ARCH_X86_32
>  #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) =
> (void *)checkasm_checked_call; #define declare_new_float(ret, ...) ret
> (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call_float;
> #define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *,
> __VA_ARGS__) = \ ((cpu_flags) & av_get_cpu_flags()) ? (void
> *)checkasm_checked_call_emms :        \ (void *)checkasm_checked_call;
> -#define call_new(...) checked_call(func_new, __VA_ARGS__)
> +#define call_new(...)\
> +    (checkasm_set_signal_handler_state(1),\
> +     checked_call(func_new, __VA_ARGS__, 15, 14, 13, 12,\
> +                  11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));\
> +    checkasm_set_signal_handler_state(0)
>  #endif
>  #elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
>  /* Use a dummy argument, to offset the real parameters by 2, not only 1.
> @@ -200,7 +230,10 @@ extern void (*checkasm_checked_call)(void *func, int
> dummy, ...); #define declare_new(ret, ...) ret (*checked_call)(void *, int
> dummy, __VA_ARGS__, \ int, int, int, int, int, int, int, int, \ int, int,
> int, int, int, int, int) = (void *)checkasm_checked_call; -#define
> call_new(...) checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5,
> 4, 3, 2, 1, 0, 0, 0, 0) +#define call_new(...) \
> +    (checkasm_set_signal_handler_state(1),\
> +     checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
> 1, 0, 0, 0, 0));\ +    checkasm_set_signal_handler_state(0)
>  #elif ARCH_AARCH64 && !defined(__APPLE__)
>  void checkasm_stack_clobber(uint64_t clobber, ...);
>  void checkasm_checked_call(void *func, ...);
> @@ -209,35 +242,39 @@ void checkasm_checked_call(void *func, ...);
>                                                    int, int, int, int, int,
> int, int)\ = (void *)checkasm_checked_call;
>  #define CLOB (UINT64_C(0xdeadbeefdeadbeef))
> -#define call_new(...)
> (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,C
> LOB,CLOB,\ +#define call_new(...) (checkasm_set_signal_handler_state(1),\
> +                      
> checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CL
> OB,CLOB,\ CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
> checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\ -                
>                   7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0)) +          
>                         7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\ +  
>                   checkasm_set_signal_handler_state(0)
>  #elif ARCH_RISCV
> -void checkasm_set_function(void *, sigjmp_buf);
> +void checkasm_set_function(void *);
>  void *checkasm_get_wrapper(void);
> -void checkasm_handle_signal(int signum);
> 
>  #if HAVE_RV && (__riscv_xlen == 64) && defined (__riscv_d)
>  #define declare_new(ret, ...) \
> -    int checked_call_signum = 0; \
> -    sigjmp_buf checked_call_jb; \
>      ret (*checked_call)(__VA_ARGS__) = checkasm_get_wrapper();
>  #define call_new(...) \
> -    (checkasm_set_function(func_new, checked_call_jb), \
> -     (checked_call_signum = sigsetjmp(checked_call_jb, 1)) == 0 \
> -        ? checked_call(__VA_ARGS__) \
> -        : (checkasm_fail_signal(checked_call_signum), 0))
> +    (checkasm_set_signal_handler_state(1),\
> +     checkasm_set_function(func_new), checked_call(__VA_ARGS__));\
> +    checkasm_set_signal_handler_state(0)
>  #else
>  #define declare_new(ret, ...)
> -#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
> +#define call_new(...)\
> +    (checkasm_set_signal_handler_state(1),\
> +     ((func_type *)func_new)(__VA_ARGS__));\
> +    checkasm_set_signal_handler_state(0)
>  #endif
>  #else
>  #define declare_new(ret, ...)
>  #define declare_new_float(ret, ...)
>  #define declare_new_emms(cpu_flags, ret, ...)
>  /* Call the function */
> -#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
> +#define call_new(...)\
> +    (checkasm_set_signal_handler_state(1),\
> +     ((func_type *)func_new)(__VA_ARGS__));\
> +    checkasm_set_signal_handler_state(0)
>  #endif
> 
>  #ifndef declare_new_emms
> @@ -284,6 +321,7 @@ typedef struct CheckasmPerf {
>              uint64_t tsum = 0;\
>              int ti, tcount = 0;\
>              uint64_t t = 0; \
> +            checkasm_set_signal_handler_state(1);\
>              for (ti = 0; ti < BENCH_RUNS; ti++) {\
>                  PERF_START(t);\
>                  tfunc(__VA_ARGS__);\
> @@ -299,6 +337,7 @@ typedef struct CheckasmPerf {
>              emms_c();\
>              perf->cycles += t;\
>              perf->iterations++;\
> +            checkasm_set_signal_handler_state(0);\
>          }\
>      } while (0)
>  #else
> diff --git a/tests/checkasm/riscv/checkasm.S
> b/tests/checkasm/riscv/checkasm.S index 971d881157..73ca85f344 100644
> --- a/tests/checkasm/riscv/checkasm.S
> +++ b/tests/checkasm/riscv/checkasm.S
> @@ -41,7 +41,6 @@ endconst
> 
>  checked_func:
>          .quad   0
> -        .quad   0
> 
>  saved_regs:
>          /* Space to spill RA, SP, GP, TP, S0-S11 and FS0-FS11 */
> @@ -53,7 +52,6 @@ func checkasm_set_function
>          la.tls.ie t0, checked_func
>          add     t0, tp, t0
>          sd      a0, (t0)
> -        sd      a1, 8(t0)
>          ret
>  endfunc
> 
> @@ -177,14 +175,4 @@ func checkasm_get_wrapper, v
>          call    checkasm_fail_func
>          j       4b
>  endfunc
> -
> -func checkasm_handle_signal
> -        mv      a1, a0
> -        la.tls.ie a0, checked_func
> -        add     a0, tp, a0
> -        ld      a0, 8(a0)
> -        beqz    a0, 8f
> -        tail    siglongjmp
> -8:      tail    abort /* No jump buffer to go to */
> -endfunc
>  #endif


-- 
レミ・デニ-クールモン
http://www.remlab.net/





More information about the ffmpeg-devel mailing list