use core::ffi::c_void;
#[allow(dead_code)]
#[allow(improper_ctypes)]
extern "C" {
pub fn moe_gemm_wmma(
input: *const c_void, weights: *const c_void, sorted_token_ids: *const i32, expert_ids: *const i32, topk_weights: *const f32,
output: *mut c_void, expert_counts: *mut i32, expert_offsets: *mut i32, num_experts: i32,
topk: i32,
size_m: i32,
size_n: i32,
size_k: i32,
dtype: i32, is_prefill: bool,
stream: i64,
);
pub fn moe_gemm_gguf(
input: *const f32, weights: *const c_void, sorted_token_ids: *const i32,
expert_ids: *const i32,
topk_weights: *const f32, output: *mut c_void, num_experts: i32,
topk: i32,
size_m: i32,
size_n: i32,
size_k: i32,
gguf_dtype: i32, stream: i64,
);
pub fn moe_gemm_gguf_prefill(
input: *const c_void, weights: *const u8, sorted_token_ids: *const i32,
expert_ids: *const i32, topk_weights: *const f32, output: *mut c_void, num_experts: i32,
topk: i32,
size_m: i32,
size_n: i32,
size_k: i32,
input_dtype: i32, gguf_dtype: i32, stream: i64,
);
pub fn launch_mmvq_gguf_q4_0_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_1_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_0_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_1_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q8_0_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q2_k_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q3_k_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_k_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_k_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q6_k_bf16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_0_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_1_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_0_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_1_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q8_0_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q2_k_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q3_k_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_k_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_k_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q6_k_f32_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_0_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_1_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_0_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_1_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q8_0_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q2_k_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q3_k_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q4_k_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q5_k_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_q6_k_f16_plain(
vx: *const c_void,
vy: *const c_void,
dst: *mut c_void,
ncols_x: i32,
nrows_x: i32,
stride_col_y: i32,
stride_col_dst: i32,
b_size: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_quantize_q8_1_bf16(
x: *const c_void,
vy: *mut c_void,
kx: i32,
kx_padded: i32,
num_rows: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_quantize_q8_1_f16(
x: *const c_void,
vy: *mut c_void,
kx: i32,
kx_padded: i32,
num_rows: i32,
stream: *mut c_void,
);
pub fn launch_mmvq_gguf_quantize_q8_1_f32(
x: *const c_void,
vy: *mut c_void,
kx: i32,
kx_padded: i32,
num_rows: i32,
stream: *mut c_void,
);
pub fn launch_mmq_quantize_q8_1_D4(
x: *const c_void,
ids: *const i32,
vy: *mut c_void,
type_x: i32,
ne00: i64,
s01: i64,
s02: i64,
s03: i64,
ne0: i64,
ne1: i64,
ne2: i64,
ne3: i64,
stream: *mut c_void,
);
pub fn launch_mmq_quantize_q8_1_DS4(
x: *const c_void,
ids: *const i32,
vy: *mut c_void,
type_x: i32,
ne00: i64,
s01: i64,
s02: i64,
s03: i64,
ne0: i64,
ne1: i64,
ne2: i64,
ne3: i64,
stream: *mut c_void,
);
pub fn launch_mmq_quantize_q8_1_D2S6(
x: *const c_void,
ids: *const i32,
vy: *mut c_void,
type_x: i32,
ne00: i64,
s01: i64,
s02: i64,
s03: i64,
ne0: i64,
ne1: i64,
ne2: i64,
ne3: i64,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q4_0(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q4_1(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q5_0(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q5_1(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q8_0(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q2_k(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q3_k(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q4_k(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q5_k(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
pub fn launch_mmq_gguf_q6_k(
tmp_fixup: *mut c_void,
x: *const c_void,
y: *const c_void,
dst: *mut c_void,
ncols_x: i64,
nrows_x: i64,
ncols_y: i64,
stride_row_x: i64,
stride_col_dst: i64,
cc: i32,
nsm: i32,
smpbo: i64,
warp_size: i32,
stream: *mut c_void,
);
}