use ghostflow_core::prelude::*;
#[cfg(test)]
mod phase1_tests {
use super::*;
#[test]
fn test_blas_matmul() {
let a = Tensor::randn(&[100, 100]);
let b = Tensor::randn(&[100, 100]);
let c = a.matmul(&b).unwrap();
assert_eq!(c.dims(), &[100, 100]);
let data = c.data_f32();
assert!(data.iter().any(|&x| x != 0.0));
}
#[test]
fn test_simd_relu() {
let input = Tensor::from_slice(&[-2.0f32, -1.0, 0.0, 1.0, 2.0], &[5]).unwrap();
let output = input.relu();
let expected = vec![0.0, 0.0, 0.0, 1.0, 2.0];
let result = output.data_f32();
for (r, e) in result.iter().zip(expected.iter()) {
assert!((r - e).abs() < 1e-6);
}
}
#[test]
fn test_simd_gelu() {
let input = Tensor::from_slice(&[0.0f32, 1.0, -1.0], &[3]).unwrap();
let output = input.gelu();
assert!(output.data_f32()[0].abs() < 0.01);
assert!((output.data_f32()[1] - 0.84).abs() < 0.1);
}
#[test]
fn test_parallel_softmax() {
let input = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0], &[4]).unwrap();
let output = input.softmax(-1);
let sum: f32 = output.data_f32().iter().sum();
assert!((sum - 1.0).abs() < 1e-5);
assert!(output.data_f32().iter().all(|&x| x > 0.0));
}
#[test]
fn test_blocked_matmul_correctness() {
let a = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0], &[2, 2]).unwrap();
let b = Tensor::from_slice(&[5.0f32, 6.0, 7.0, 8.0], &[2, 2]).unwrap();
println!("A shape: {:?}, data: {:?}", a.shape().dims(), a.data_f32());
println!("B shape: {:?}, data: {:?}", b.shape().dims(), b.data_f32());
let c = a.matmul(&b).unwrap();
println!("C shape: {:?}", c.shape().dims());
let result = c.data_f32();
println!("Result: {:?}", result);
println!("Expected: [19, 22, 43, 50]");
assert!((result[0] - 19.0).abs() < 1e-5, "result[0]={}, expected=19", result[0]);
assert!((result[1] - 22.0).abs() < 1e-5, "result[1]={}, expected=22", result[1]);
assert!((result[2] - 43.0).abs() < 1e-5, "result[2]={}, expected=43", result[2]);
assert!((result[3] - 50.0).abs() < 1e-5, "result[3]={}, expected=50", result[3]);
}
}
#[cfg(test)]
mod phase2_tests {
use super::*;
use ghostflow_core::ops::conv::conv2d_optimized;
#[test]
fn test_im2col_convolution() {
let input = Tensor::randn(&[2, 3, 32, 32]);
let weight = Tensor::randn(&[16, 3, 3, 3]);
let bias = Some(Tensor::zeros(&[16]));
let output = conv2d_optimized(&input, &weight, bias.as_ref(), (1, 1), (1, 1)).unwrap();
assert_eq!(output.dims(), &[2, 16, 32, 32]);
let data = output.data_f32();
assert!(data.iter().any(|&x| x != 0.0));
}
#[test]
fn test_conv_stride() {
let input = Tensor::randn(&[1, 3, 32, 32]);
let weight = Tensor::randn(&[16, 3, 3, 3]);
let output = conv2d_optimized(&input, &weight, None, (2, 2), (1, 1)).unwrap();
assert_eq!(output.dims(), &[1, 16, 16, 16]);
}
#[test]
fn test_conv_padding() {
let input = Tensor::randn(&[1, 3, 32, 32]);
let weight = Tensor::randn(&[16, 3, 3, 3]);
let output = conv2d_optimized(&input, &weight, None, (1, 1), (1, 1)).unwrap();
assert_eq!(output.dims(), &[1, 16, 32, 32]);
let output_no_pad = conv2d_optimized(&input, &weight, None, (1, 1), (0, 0)).unwrap();
assert_eq!(output_no_pad.dims(), &[1, 16, 30, 30]);
}
}
#[cfg(test)]
mod phase3_tests {
use ghostflow_core::Tensor;
#[test]
#[cfg(feature = "cuda")]
fn test_gpu_memory_pool() {
use ghostflow_cuda::get_global_gpu_pool;
let pool = get_global_gpu_pool();
let initial_usage = pool.current_usage();
let ptr = pool.allocate(1024 * 1024).unwrap();
assert!(pool.current_usage() > initial_usage);
pool.free(ptr).unwrap();
assert!(pool.current_usage() <= initial_usage + 1024 * 1024);
}
#[test]
#[cfg(feature = "cuda")]
fn test_gpu_tensor_creation() {
use ghostflow_cuda::{GpuTensor, get_global_gpu_pool};
let pool = get_global_gpu_pool();
let tensor = GpuTensor::new(vec![10, 10], pool).unwrap();
assert_eq!(tensor.shape(), &[10, 10]);
}
#[test]
#[cfg(feature = "cuda")]
fn test_gpu_cpu_transfer() {
use ghostflow_cuda::{GpuTensor, get_global_gpu_pool};
let pool = get_global_gpu_pool();
let mut tensor = GpuTensor::new(vec![5, 5], pool).unwrap();
let data: Vec<f32> = (0..25).map(|i| i as f32).collect();
tensor.copy_from_cpu(&data).unwrap();
let result = tensor.copy_to_cpu().unwrap();
for (a, b) in data.iter().zip(result.iter()) {
assert!((a - b).abs() < 1e-5);
}
}
}
#[cfg(test)]
mod phase4_tests {
use ghostflow_core::fusion::*;
use ghostflow_core::jit::*;
use ghostflow_core::layout::*;
#[test]
fn test_fusion_engine() {
let engine = FusionEngine::new();
let graph = ComputeGraph {
nodes: vec![
GraphNode {
id: 0,
op: Operation::Conv2d { channels: 64, kernel: (3, 3) },
inputs: vec![],
outputs: vec![1],
},
GraphNode {
id: 1,
op: Operation::BatchNorm { channels: 64 },
inputs: vec![0],
outputs: vec![2],
},
GraphNode {
id: 2,
op: Operation::ReLU,
inputs: vec![1],
outputs: vec![],
},
],
edges: vec![(0, 1), (1, 2)],
};
let optimized = engine.optimize(graph);
assert!(optimized.nodes.len() <= 3);
}
#[test]
fn test_jit_compiler() {
let mut compiler = JitCompiler::new();
assert_eq!(compiler.cache_stats().0, 0);
let graph = ComputeGraph {
nodes: vec![
GraphNode {
id: 0,
op: Operation::ReLU,
inputs: vec![],
outputs: vec![],
},
],
edges: vec![],
};
let result = compiler.compile(&graph);
assert!(result.is_ok() || result.is_err());
let stats_after = compiler.cache_stats();
assert!(stats_after.0 == 1); }
#[test]
fn test_layout_optimizer() {
let mut optimizer = LayoutOptimizer::new();
let conv_op = OperationType::Conv2d {
kernel: (3, 3),
stride: (1, 1),
};
let layout = optimizer.choose_layout(&conv_op);
assert!(layout == MemoryLayout::NCHW || layout == MemoryLayout::NHWC);
let layout2 = optimizer.choose_layout(&conv_op);
assert_eq!(layout, layout2);
}
#[test]
fn test_layout_transformation() {
let optimizer = LayoutOptimizer::new();
let data: Vec<f32> = (0..16).map(|i| i as f32).collect();
let shape = vec![1, 2, 2, 4];
println!("Original data: {:?}", data);
let nhwc = optimizer.transform_layout(
&data,
MemoryLayout::NCHW,
MemoryLayout::NHWC,
&shape,
);
println!("NHWC data: {:?}", nhwc);
assert_eq!(nhwc.len(), data.len());
let nchw_shape = vec![1, 2, 2, 4];
let nchw = optimizer.transform_layout(
&nhwc,
MemoryLayout::NHWC,
MemoryLayout::NCHW,
&nchw_shape,
);
println!("Back to NCHW: {:?}", nchw);
for (i, (a, b)) in data.iter().zip(nchw.iter()).enumerate() {
println!("Index {}: original={}, transformed={}, diff={}", i, a, b, (a - b).abs());
assert!((a - b).abs() < 1e-5, "Mismatch at index {}: {} vs {}", i, a, b);
}
}
#[test]
fn test_performance_estimation() {
let optimizer = LayoutOptimizer::new();
let conv_op = OperationType::Conv2d {
kernel: (3, 3),
stride: (1, 1),
};
let perf_nchw = optimizer.estimate_performance(&conv_op, MemoryLayout::NCHW);
let perf_nhwc = optimizer.estimate_performance(&conv_op, MemoryLayout::NHWC);
assert!(perf_nchw >= 1.0);
assert!(perf_nhwc >= 1.0);
}
}
#[cfg(test)]
mod integration_tests {
use super::*;
#[test]
fn test_end_to_end_matmul() {
let a = Tensor::randn(&[512, 512]);
let b = Tensor::randn(&[512, 512]);
let start = std::time::Instant::now();
let c = a.matmul(&b).unwrap();
let duration = start.elapsed();
println!("512x512 matmul took: {:?}", duration);
assert_eq!(c.dims(), &[512, 512]);
assert!(duration.as_secs() < 1);
}
#[test]
fn test_end_to_end_conv() {
use ghostflow_core::ops::conv::conv2d_optimized;
let input = Tensor::randn(&[4, 64, 56, 56]);
let weight = Tensor::randn(&[128, 64, 3, 3]);
let bias = Some(Tensor::zeros(&[128]));
let start = std::time::Instant::now();
let output = conv2d_optimized(&input, &weight, bias.as_ref(), (1, 1), (1, 1)).unwrap();
let duration = start.elapsed();
println!("Conv2d took: {:?}", duration);
assert_eq!(output.dims(), &[4, 128, 56, 56]);
}
#[test]
fn test_activation_chain() {
let input = Tensor::randn(&[1000, 1000]);
let start = std::time::Instant::now();
let output = input.relu().gelu().sigmoid();
let duration = start.elapsed();
println!("Activation chain took: {:?}", duration);
assert_eq!(output.dims(), &[1000, 1000]);
assert!(output.data_f32().iter().all(|&x| x >= 0.0 && x <= 1.0));
}
}