use super::*;
use crate::tensor::Tensor;
use std::time::Duration;
#[test]
fn test_cpu_matmul_1x1() {
let a = vec![2.0f32];
let b = vec![3.0f32];
let c = cpu_matmul(&a, &b, 1, 1, 1);
assert_eq!(c.len(), 1);
assert!((c[0] - 6.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_identity() {
let identity = vec![1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0];
let vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let c = cpu_matmul(&identity, &vec, 3, 3, 3);
assert_eq!(c.len(), 9);
for i in 0..9 {
assert!((c[i] - vec[i]).abs() < 1e-5);
}
}
#[test]
fn test_cpu_matmul_rectangular() {
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let b = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
];
let c = cpu_matmul(&a, &b, 2, 3, 4);
assert_eq!(c.len(), 8);
assert!((c[0] - 38.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_m1_triggers_vector_path() {
let a = vec![1.0, 2.0, 3.0, 4.0]; let b = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let c = cpu_matmul(&a, &b, 1, 4, 2);
assert_eq!(c.len(), 2);
assert!((c[0] - 50.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_large_parallel_path() {
let k = 64;
let n = 2048;
let a: Vec<f32> = (0..k).map(|i| (i % 10) as f32 * 0.1).collect();
let b: Vec<f32> = (0..k * n).map(|i| (i % 7) as f32 * 0.1).collect();
let c = cpu_matmul(&a, &b, 1, k, n);
assert_eq!(c.len(), n);
assert!(c.iter().all(|&x| x.is_finite()));
}
#[test]
fn test_cpu_matmul_transpose_b_basic() {
let a = vec![1.0, 2.0, 3.0, 4.0]; let b = vec![1.0, 0.0, 0.0, 1.0]; let c = cpu_matmul_transpose_b(&a, &b, 2, 2, 2);
assert_eq!(c.len(), 4);
assert!((c[0] - 1.0).abs() < 1e-5);
assert!((c[3] - 4.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_transpose_b_rectangular() {
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; let b = vec![
1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0,
]; let c = cpu_matmul_transpose_b(&a, &b, 2, 3, 4);
assert_eq!(c.len(), 8);
assert!((c[0] - 1.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_transpose_b_attention_style() {
let seq = 4;
let head_dim = 8;
let q: Vec<f32> = (0..seq * head_dim).map(|i| (i as f32) * 0.01).collect();
let k: Vec<f32> = (0..seq * head_dim).map(|i| (i as f32) * 0.01).collect();
let scores = cpu_matmul_transpose_b(&q, &k, seq, head_dim, seq);
assert_eq!(scores.len(), seq * seq);
assert!(scores.iter().all(|&x| x.is_finite()));
}
#[test]
fn test_transpose_2x2() {
let data = vec![1.0, 2.0, 3.0, 4.0];
let t = transpose(&data, 2, 2);
assert_eq!(t, vec![1.0, 3.0, 2.0, 4.0]);
}
#[test]
fn test_transpose_2x3() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let t = transpose(&data, 2, 3);
assert_eq!(t, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
}
#[test]
fn test_transpose_identity_square() {
let data = vec![1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0];
let t = transpose(&data, 3, 3);
assert_eq!(t, data);
}
#[test]
fn test_transpose_round_trip() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let t1 = transpose(&data, 2, 3);
let t2 = transpose(&t1, 3, 2);
assert_eq!(t2, data);
}
#[test]
fn test_cpu_matmul_transposed_simd_basic() {
let a = vec![1.0, 2.0, 3.0, 4.0]; let weight_t = vec![
1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ]; let bias = vec![0.0, 0.0, 0.0];
let c = cpu_matmul_transposed_simd(&a, &weight_t, &bias, 4, 3);
assert_eq!(c.len(), 3);
assert!((c[0] - 1.0).abs() < 1e-5);
assert!((c[1] - 2.0).abs() < 1e-5);
assert!((c[2] - 3.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_transposed_simd_with_bias() {
let a = vec![1.0, 1.0]; let weight_t = vec![1.0, 1.0, 2.0, 2.0]; let bias = vec![10.0, 20.0];
let c = cpu_matmul_transposed_simd(&a, &weight_t, &bias, 2, 2);
assert!((c[0] - 12.0).abs() < 1e-5);
assert!((c[1] - 24.0).abs() < 1e-5);
}
#[test]
fn test_cpu_matmul_transposed_simd_large_parallel() {
let k = 64;
let n = 8192;
let a: Vec<f32> = vec![0.1; k];
let weight_t: Vec<f32> = vec![0.1; n * k];
let bias: Vec<f32> = vec![0.0; n];
let c = cpu_matmul_transposed_simd(&a, &weight_t, &bias, k, n);
assert_eq!(c.len(), n);
let expected = k as f32 * 0.01;
assert!((c[0] - expected).abs() < 1e-4);
assert!((c[n - 1] - expected).abs() < 1e-4);
}
#[test]
fn test_gpu_compute_matmul_tensor_basic() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = Tensor::from_vec(vec![2, 3], vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
let b = Tensor::from_vec(
vec![3, 2],
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
)
.unwrap();
let c = compute.matmul_tensor(&a, &b).expect("matmul_tensor");
assert_eq!(c.shape(), &[2, 2]);
assert!((c.data()[0] - 22.0).abs() < 1e-5);
}
#[test]
fn test_gpu_compute_matmul_tensor_non_2d_error() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = Tensor::from_vec(vec![4], vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let b = Tensor::from_vec(vec![4], vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let result = compute.matmul_tensor(&a, &b);
assert!(result.is_err());
}
#[test]
fn test_gpu_compute_matmul_tensor_dim_mismatch() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = Tensor::from_vec(vec![2, 3], vec![1.0; 6]).unwrap();
let b = Tensor::from_vec(vec![4, 2], vec![1.0; 8]).unwrap();
let result = compute.matmul_tensor(&a, &b);
assert!(result.is_err());
}
#[test]
fn test_gpu_compute_matmul_a_size_mismatch() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = vec![1.0, 2.0, 3.0]; let b = vec![1.0, 2.0, 3.0, 4.0]; let result = compute.matmul(&a, &b, 2, 2, 2);
assert!(result.is_err());
}
#[test]
fn test_gpu_compute_matmul_b_size_mismatch() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = vec![1.0, 2.0, 3.0, 4.0]; let b = vec![1.0, 2.0, 3.0]; let result = compute.matmul(&a, &b, 2, 2, 2);
assert!(result.is_err());
}
#[test]
fn test_gpu_compute_dot_length_mismatch() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = vec![1.0, 2.0, 3.0];
let b = vec![1.0, 2.0];
let result = compute.dot(&a, &b);
assert!(result.is_err());
}
#[test]
fn test_gpu_compute_dot_success() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 6.0];
let result = compute.dot(&a, &b).expect("dot product");
assert!((result - 32.0).abs() < 1e-5);
}
#[test]
fn test_gpu_compute_relu_values() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let input = vec![-2.0, -1.0, 0.0, 1.0, 2.0];
let result = compute.relu(&input).expect("relu");
assert_eq!(result, vec![0.0, 0.0, 0.0, 1.0, 2.0]);
}
#[test]
fn test_gpu_compute_sigmoid_values() {
let mut compute = GpuCompute::new(ComputeBackend::Cpu).expect("CPU backend");
let input = vec![0.0, 1.0, -1.0];
let result = compute.sigmoid(&input).expect("sigmoid");
assert!((result[0] - 0.5).abs() < 1e-5);
assert!((result[1] - 0.731).abs() < 0.01);
assert!((result[2] - 0.269).abs() < 0.01);
}
#[test]
fn test_hybrid_scheduler_matmul_pooled() {
let mut scheduler = HybridScheduler::new().expect("scheduler");
let a = vec![1.0, 2.0, 3.0, 4.0];
let b = vec![5.0, 6.0, 7.0, 8.0];
let result = scheduler.matmul_pooled(&a, &b, 2, 2, 2).expect("matmul_pooled");
assert_eq!(result.len(), 4);
assert!((result[0] - 19.0).abs() < 1e-5);
scheduler.release_buffer(result);
let result2 = scheduler.matmul_pooled(&a, &b, 2, 2, 2).expect("matmul_pooled 2");
assert_eq!(result2.len(), 4);
}
#[test]
fn test_hybrid_scheduler_matmul_async() {
let mut scheduler = HybridScheduler::new().expect("scheduler");
let a = vec![1.0, 2.0, 3.0, 4.0];
let b = vec![5.0, 6.0, 7.0, 8.0];
let async_result = scheduler.matmul_async(&a, &b, 2, 2, 2).expect("matmul_async");
assert!(async_result.is_ready());
let data = async_result.wait();
assert_eq!(data.len(), 4);
assert!((data[0] - 19.0).abs() < 1e-5);
}
#[test]
fn test_hybrid_scheduler_pool_stats() {
let mut scheduler = HybridScheduler::new().expect("scheduler");
let stats = scheduler.pool_stats();
assert_eq!(stats.cached_buffers, 0);
let a = vec![1.0; 1024];
let b = vec![1.0; 1024];
let result = scheduler.matmul_pooled(&a, &b, 1, 1024, 1).expect("matmul");
scheduler.release_buffer(result);
let stats = scheduler.pool_stats();
assert!(stats.cached_buffers > 0 || stats.cached_bytes > 0);
}
#[test]
fn test_hybrid_scheduler_matmul_transpose_b_cpu() {
let mut scheduler = HybridScheduler::with_threshold(1_000_000).expect("scheduler");
let a = vec![1.0, 2.0, 3.0, 4.0]; let b = vec![1.0, 2.0, 3.0, 4.0]; let result = scheduler.matmul_transpose_b(&a, &b, 2, 2, 2).expect("transpose_b");
assert_eq!(result.len(), 4);
}
#[test]
fn test_hybrid_scheduler_threshold_accessors() {
let scheduler = HybridScheduler::with_threshold(12345).expect("scheduler");
assert_eq!(scheduler.gpu_threshold(), 12345);
}
#[test]
fn test_gpu_buffer_pool_oversized_request() {
let mut pool = GpuBufferPool::new();
let huge = pool.acquire(100_000_000);
assert!(huge.len() >= 100_000_000);
pool.release(huge);
}
#[test]
fn test_gpu_buffer_pool_exact_bucket_boundary() {
let mut pool = GpuBufferPool::new();
let buf = pool.acquire(1024);
assert!(buf.len() >= 1024);
pool.release(buf);
let buf2 = pool.acquire(1024);
assert!(buf2.len() >= 1024);
}
#[test]
fn test_gpu_buffer_pool_max_per_bucket() {
let mut pool = GpuBufferPool::new();
for _ in 0..10 {
let buf = pool.acquire(1024);
pool.release(buf);
}
let stats = pool.stats();
assert!(stats.cached_buffers <= 4);
}
#[test]
fn test_inference_metrics_high_percentile() {
let mut metrics = InferenceMetrics::new();
for i in 1..=100 {
metrics.record_inference(Duration::from_millis(i), 1);
}
let p100 = metrics.latency_percentile(100).expect("p100");
assert_eq!(p100, Duration::from_millis(100));
let p0 = metrics.latency_percentile(0).expect("p0");
assert_eq!(p0, Duration::from_millis(1));
}
#[test]
fn test_inference_metrics_throughput_immediate() {
let metrics = InferenceMetrics::new();
let throughput = metrics.throughput();
assert!(throughput >= 0.0);
}
include!("metrics_tests_compute_backend.rs");