#[cfg(test)]
#[cfg(feature = "cuda")]
mod tests {
use super::*;
use crate::cuda::executor::test_fixtures::{
generate_q4_0_weights, generate_q5_0_weights, generate_q8_0_weights,
};
fn create_executor() -> Option<CudaExecutor> {
CudaExecutor::new(0).ok()
}
#[test]
fn test_q8_0_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let blocks = (n as usize) * (k as usize / 32);
let weights = generate_q8_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let weight_ptr = weight_buf.as_ptr();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q8_0_gemv_into(weight_ptr, &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q8_0_gemv_into_large() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 512u32;
let n = 128u32;
let blocks = (n as usize) * (k as usize / 32);
let weights = generate_q8_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = vec![0.5f32; k as usize];
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q8_0_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q5_0_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let blocks = (n as usize) * (k as usize / 32);
let weights = generate_q5_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q5_0_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q5_0_gemv_into_large() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 512u32;
let n = 128u32;
let blocks = (n as usize) * (k as usize / 32);
let weights = generate_q5_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = vec![0.5f32; k as usize];
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q5_0_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q4_0_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let blocks = (n as usize) * (k as usize / 32);
let weights = generate_q4_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q4_0_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q4_0_gemv_into_single_row() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 1u32;
let blocks = k as usize / 32;
let weights = generate_q4_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = vec![1.0f32; k as usize];
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q4_0_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q4_1_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let blocks = (n as usize) * (k as usize / 32);
let weights = generate_q4_0_weights(blocks);
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q4_1_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q5k_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let superblocks = (n as usize) * (k as usize / 256);
let weights = vec![0u8; superblocks * 176];
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q5k_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_q6k_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let superblocks = (n as usize) * (k as usize / 256);
let weights = vec![0u8; superblocks * 210];
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result = exec.q6k_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_coalesced_q6k_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let k = 256u32;
let n = 64u32;
let superblocks = (n as usize) * (k as usize / 256);
let weights = vec![0u8; superblocks * 210];
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..k as usize).map(|i| (i as f32) * 0.01).collect();
let output = vec![0.0f32; n as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result =
exec.coalesced_q6k_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, n, k);
let _ = result;
}
#[test]
fn test_batched_q6k_gemv_into_basic() {
let Some(mut exec) = create_executor() else {
return;
};
let m = 4u32; let k = 256u32;
let n = 64u32;
let superblocks = (n as usize) * (k as usize / 256);
let weights = vec![0u8; superblocks * 210];
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = (0..(m * k) as usize).map(|i| (i as f32) * 0.001).collect();
let output = vec![0.0f32; (m * n) as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result =
exec.batched_q6k_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, m, n, k);
let _ = result;
}
#[test]
fn test_batched_q6k_gemv_into_m8() {
let Some(mut exec) = create_executor() else {
return;
};
let m = 8u32;
let k = 256u32;
let n = 32u32;
let superblocks = (n as usize) * (k as usize / 256);
let weights = vec![0u8; superblocks * 210];
let weight_buf = GpuBuffer::from_host(&exec.context, &weights).unwrap();
let input: Vec<f32> = vec![0.5f32; (m * k) as usize];
let output = vec![0.0f32; (m * n) as usize];
let input_buf = GpuBuffer::from_host(&exec.context, &input).unwrap();
let output_buf = GpuBuffer::from_host(&exec.context, &output).unwrap();
let result =
exec.batched_q6k_gemv_into(weight_buf.as_ptr(), &input_buf, &output_buf, m, n, k);
let _ = result;
}
}