pub mod buffer;
pub mod cpu_fallback;
pub mod manager;
pub mod transfer;
pub mod cuda;
pub mod metal;
pub mod opencl;
pub use buffer::GpuBuffer;
pub use manager::GpuMemoryManager;
pub use cpu_fallback::CpuFallback;
pub use cuda::CudaOperations;
pub use metal::MetalOperations;
pub use opencl::OpenCLOperations;
#[cfg(test)]
mod tests {
use super::*;
use crate::tensor::Tensor;
use std::sync::Arc;
#[test]
fn test_module_structure() {
let _manager = GpuMemoryManager::<f32>::new();
let _buffer = GpuBuffer::Cpu(Arc::new(vec![1.0f32, 2.0, 3.0]));
}
#[test]
fn test_backward_compatibility() {
let tensor = Tensor::<f32>::ones(&[2, 3]);
let device = crate::gpu::DeviceType::Cpu;
let buffer = GpuMemoryManager::to_device(&tensor, &device).unwrap();
let restored = GpuMemoryManager::to_cpu(&buffer, &[2, 3]).unwrap();
assert_eq!(tensor.shape(), restored.shape());
assert_eq!(tensor.data, restored.data);
}
#[test]
fn test_elementwise_operations() {
let manager = GpuMemoryManager::<f32>::new();
let lhs = GpuBuffer::Cpu(Arc::new(vec![1.0, 2.0, 3.0]));
let rhs = GpuBuffer::Cpu(Arc::new(vec![4.0, 5.0, 6.0]));
let result = manager
.execute_elementwise(&lhs, &rhs, |a, b| a + b)
.unwrap();
match result {
GpuBuffer::Cpu(data) => assert_eq!(data.as_ref(), &vec![5.0, 7.0, 9.0]),
#[cfg(any(feature = "cuda", feature = "metal", feature = "opencl"))]
_ => panic!("Expected CPU buffer from integration test"),
}
}
#[test]
#[cfg_attr(
not(any(feature = "cuda", feature = "metal", feature = "opencl")),
ignore = "GPU features not enabled"
)]
fn test_batch_normalization() {
let manager = GpuMemoryManager::<f32>::new();
let data = GpuBuffer::Cpu(Arc::new(vec![1.0f32, 2.0, 3.0, 4.0, 5.0]));
let epsilon = 1e-5f32;
let result = manager.execute_batch_normalize(&data, epsilon).unwrap();
let normalized_data = match result {
GpuBuffer::Cpu(data) => data,
#[cfg(any(feature = "cuda", feature = "metal", feature = "opencl"))]
_ => panic!("Expected CPU buffer from integration test"),
};
let mean: f32 = normalized_data.iter().sum::<f32>() / normalized_data.len() as f32;
assert!(
mean.abs() < 1e-6,
"Mean should be approximately zero, got {}",
mean
);
}
#[test]
fn test_attention_mechanism() {
let manager = GpuMemoryManager::<f32>::new();
let query = GpuBuffer::Cpu(Arc::new(vec![1.0f32, 0.5]));
let key = GpuBuffer::Cpu(Arc::new(vec![0.8f32, 1.2]));
let value = GpuBuffer::Cpu(Arc::new(vec![2.0f32, 3.0]));
let result = manager.execute_attention(&query, &key, &value).unwrap();
let attention_result = match result {
GpuBuffer::Cpu(data) => data,
#[cfg(any(feature = "cuda", feature = "metal", feature = "opencl"))]
_ => panic!("Expected CPU buffer from integration test"),
};
assert_eq!(attention_result.len(), 2);
assert!(attention_result.iter().all(|&x| x.is_finite()));
}
}