candle-flash-attn 0.10.2

Flash attention layer for the candle ML framework.
1
2
3
4
5
6
7
8
#pragma once

#define C10_CUDA_CHECK(EXPR)                                        \
  do {                                                              \
    const cudaError_t __err = EXPR;                                 \
  } while (0)

#define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())