#pragma once
#include <cstddef>
#include <cstdint>
#ifdef _WIN32
#define EXPORT __declspec(dllexport)
#else
#define EXPORT __attribute__((visibility("default")))
#endif
extern "C"
{
EXPORT void cpu_strided_add_f32(const float *a, const size_t *a_strides,
const float *b, const size_t *b_strides,
float *c, const size_t *c_strides,
const size_t *shape, int ndim,
size_t total_elements);
EXPORT int gpu_strided_add_f32(const float *a, const size_t *a_strides,
const float *b, const size_t *b_strides,
float *c, const size_t *c_strides,
const size_t *shape, int ndim,
size_t total_elements, void *stream);
EXPORT void cpu_strided_add_i32(const int32_t *a, const size_t *a_strides,
const int32_t *b, const size_t *b_strides,
int32_t *c, const size_t *c_strides,
const size_t *shape, int ndim,
size_t total_elements);
EXPORT int gpu_strided_add_i32(const int32_t *a, const size_t *a_strides,
const int32_t *b, const size_t *b_strides,
int32_t *c, const size_t *c_strides,
const size_t *shape, int ndim,
size_t total_elements, void *stream);
EXPORT void cpu_strided_copy(const uint8_t *src, size_t src_offset,
const size_t *src_strides, int ndim,
const size_t *shape,
uint8_t *dst, size_t dst_offset,
const size_t *dst_strides,
size_t elem_size, size_t total_elements);
EXPORT int gpu_strided_copy(const uint8_t *src, size_t src_offset,
const size_t *src_strides, int ndim,
const size_t *shape,
uint8_t *dst, size_t dst_offset,
const size_t *dst_strides,
size_t elem_size, size_t total_elements,
void *stream);
EXPORT void cpu_contiguous(const uint8_t *src, size_t src_offset,
const size_t *src_strides, int ndim,
const size_t *shape,
uint8_t *dst, size_t elem_size,
size_t total_elements);
EXPORT int gpu_contiguous(const uint8_t *src, size_t src_offset,
const size_t *src_strides, int ndim,
const size_t *shape,
uint8_t *dst, size_t elem_size,
size_t total_elements,
void *stream);
EXPORT void cpu_matmul_strided_f32(
const float *A, size_t a_stride_row, size_t a_stride_col,
const float *B, size_t b_stride_row, size_t b_stride_col,
float *C, size_t c_stride_row, size_t c_stride_col,
int M, int N, int K);
EXPORT int gpu_matmul_strided_f32(
const float *A, size_t a_stride_row, size_t a_stride_col,
const float *B, size_t b_stride_row, size_t b_stride_col,
float *C, size_t c_stride_row, size_t c_stride_col,
int M, int N, int K, void *stream);
}