use crate::error::{RusTorchError, RusTorchResult};
pub const FLOAT_TOLERANCE: f32 = 1e-5;
pub const DOUBLE_TOLERANCE: f64 = 1e-10;
#[derive(Debug, Clone)]
#[allow(missing_docs)]
pub struct VerificationResult {
pub test_name: String,
pub passed: bool,
pub cpu_time_ms: Option<f64>,
pub gpu_time_ms: Option<f64>,
pub speedup: Option<f64>,
pub max_error: f32,
pub mean_error: f32,
pub error_message: Option<String>,
}
impl VerificationResult {
pub fn new(test_name: String) -> Self {
Self {
test_name,
passed: false,
cpu_time_ms: None,
gpu_time_ms: None,
speedup: None,
max_error: 0.0,
mean_error: 0.0,
error_message: None,
}
}
pub fn with_timing(mut self, cpu_time: f64, gpu_time: f64) -> Self {
self.cpu_time_ms = Some(cpu_time);
self.gpu_time_ms = Some(gpu_time);
self.speedup = Some(cpu_time / gpu_time);
self
}
pub fn with_error_metrics(mut self, max_error: f32, mean_error: f32) -> Self {
self.max_error = max_error;
self.mean_error = mean_error;
self.passed = max_error < FLOAT_TOLERANCE;
self
}
pub fn with_failure(mut self, error: String) -> Self {
self.passed = false;
self.error_message = Some(error);
self
}
}
pub struct VerificationTestSuite {
results: Vec<VerificationResult>,
}
impl VerificationTestSuite {
pub fn new() -> Self {
Self {
results: Vec::new(),
}
}
pub fn run_all_tests(&mut self) -> RusTorchResult<()> {
println!("🔍 Starting GPU vs CPU Verification Tests");
println!("==========================================");
self.test_elementwise_add()?;
self.test_elementwise_sub()?;
self.test_elementwise_mul()?;
self.test_elementwise_div()?;
self.test_matrix_multiplication_small()?;
self.test_matrix_multiplication_large()?;
self.test_reduction_sum()?;
self.test_reduction_mean()?;
self.test_relu_activation()?;
self.test_batch_normalization()?;
self.test_softmax()?;
self.test_conv2d_basic()?;
self.test_max_pool2d()?;
self.test_transpose()?;
self.test_gelu_activation()?;
self.print_summary();
Ok(())
}
fn test_elementwise_add(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Element-wise Addition".to_string());
let size = 1024 * 1024;
let a = vec![1.5f32; size];
let b = vec![2.5f32; size];
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = a[i] + b[i];
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_elementwise_add(&a, &b)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_elementwise_sub(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Element-wise Subtraction".to_string());
let size = 512 * 512;
let a = vec![5.0f32; size];
let b = vec![2.0f32; size];
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = a[i] - b[i];
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_elementwise_sub(&a, &b)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_elementwise_mul(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Element-wise Multiplication".to_string());
let size = 256 * 256 * 64;
let a = vec![1.5f32; size];
let b = vec![2.0f32; size];
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = a[i] * b[i];
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_elementwise_mul(&a, &b)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_elementwise_div(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Element-wise Division".to_string());
let size = 128 * 128 * 32;
let a = vec![10.0f32; size];
let b = vec![2.0f32; size];
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = a[i] / b[i];
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_elementwise_div(&a, &b)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_matrix_multiplication_small(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Matrix Multiplication (Small)".to_string());
let m = 64;
let n = 64;
let k = 64;
let a = (0..m * k).map(|i| (i as f32) * 0.01).collect::<Vec<f32>>();
let b = (0..k * n).map(|i| (i as f32) * 0.02).collect::<Vec<f32>>();
let start = std::time::Instant::now();
let cpu_result = self.cpu_matmul(&a, &b, m, n, k);
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_matmul(&a, &b, m, n, k)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_matrix_multiplication_large(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Matrix Multiplication (Large)".to_string());
let m = 1024;
let n = 1024;
let k = 1024;
let a = (0..m * k)
.map(|i| ((i % 1000) as f32) * 0.001)
.collect::<Vec<f32>>();
let b = (0..k * n)
.map(|i| ((i % 1000) as f32) * 0.001)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let cpu_result = self.cpu_matmul(&a, &b, m.min(256), n.min(256), k.min(256));
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_matmul(&a, &b, m, n, k)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let cpu_subset = &cpu_result[..256 * 256];
let gpu_subset = &gpu_result[..256 * 256];
let (max_error, mean_error) = self.compute_error_metrics(cpu_subset, gpu_subset);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_reduction_sum(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Reduction Sum".to_string());
let size = 1024 * 1024;
let input = (0..size)
.map(|i| (i % 1000) as f32 * 0.001)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let cpu_sum = input.iter().sum::<f32>();
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_sum = self.gpu_reduce_sum(&input)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let error = (cpu_sum - gpu_sum).abs();
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(error, error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_reduction_mean(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Reduction Mean".to_string());
let size = 512 * 512;
let input = (0..size)
.map(|i| ((i % 100) as f32).sin())
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let cpu_mean = input.iter().sum::<f32>() / size as f32;
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_sum = self.gpu_reduce_sum(&input)?;
let gpu_mean = gpu_sum / size as f32;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let error = (cpu_mean - gpu_mean).abs();
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(error, error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_relu_activation(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("ReLU Activation".to_string());
let size = 256 * 256;
let input = (0..size)
.map(|i| (i as f32 - size as f32 / 2.0) * 0.01)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = input[i].max(0.0);
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_relu(&input)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_batch_normalization(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Batch Normalization".to_string());
let size = 1024;
let input = (0..size).map(|i| (i as f32) * 0.01).collect::<Vec<f32>>();
let mean = 0.5f32;
let variance = 0.25f32;
let epsilon = 1e-5f32;
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = (input[i] - mean) / (variance + epsilon).sqrt();
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_batch_norm(&input, mean, variance, epsilon)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_softmax(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Softmax".to_string());
let size = 1000;
let input = (0..size)
.map(|i| (i as f32) * 0.01 - 5.0)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let max_val = input.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
let exp_sum: f32 = input.iter().map(|&x| (x - max_val).exp()).sum();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
cpu_result[i] = (input[i] - max_val).exp() / exp_sum;
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_softmax(&input)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_conv2d_basic(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Conv2D Basic".to_string());
let input_h = 32;
let input_w = 32;
let kernel_h = 3;
let kernel_w = 3;
let _output_h = input_h - kernel_h + 1;
let _output_w = input_w - kernel_w + 1;
let input = (0..input_h * input_w)
.map(|i| (i as f32) * 0.01)
.collect::<Vec<f32>>();
let kernel = vec![1.0f32, 0.0, -1.0, 1.0, 0.0, -1.0, 1.0, 0.0, -1.0];
let start = std::time::Instant::now();
let cpu_result = self.cpu_conv2d(&input, &kernel, input_h, input_w, kernel_h, kernel_w);
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_conv2d(
&input, &kernel, input_h, input_w, kernel_h, kernel_w, 1, 1, 0, 0,
)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_max_pool2d(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Max Pool2D".to_string());
let input_h = 16;
let input_w = 16;
let pool_h = 2;
let pool_w = 2;
let stride_h = 2;
let stride_w = 2;
let _output_h = input_h / stride_h;
let _output_w = input_w / stride_w;
let input = (0..input_h * input_w)
.map(|i| (i as f32) * 0.1)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let cpu_result =
self.cpu_max_pool2d(&input, input_h, input_w, pool_h, pool_w, stride_h, stride_w);
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_max_pool2d(
&input, input_h, input_w, pool_h, pool_w, stride_h, stride_w, 0, 0,
)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_transpose(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("Transpose".to_string());
let rows = 256;
let cols = 512;
let input = (0..rows * cols)
.map(|i| (i as f32) * 0.01)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; rows * cols];
for r in 0..rows {
for c in 0..cols {
cpu_result[c * rows + r] = input[r * cols + c];
}
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_transpose(&input, rows, cols)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error, mean_error);
}
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn test_gelu_activation(&mut self) -> RusTorchResult<()> {
let mut result = VerificationResult::new("GELU Activation".to_string());
let size = 1024;
let input = (0..size)
.map(|i| (i as f32 - 512.0) * 0.01)
.collect::<Vec<f32>>();
let start = std::time::Instant::now();
let mut cpu_result = vec![0.0f32; size];
for i in 0..size {
let x = input[i];
let erf_term = (x / 2.0_f32.sqrt()).tanh(); cpu_result[i] = 0.5 * x * (1.0 + erf_term);
}
let cpu_time = start.elapsed().as_secs_f64() * 1000.0;
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
{
let start = std::time::Instant::now();
let gpu_result = self.gpu_gelu(&input)?;
let gpu_time = start.elapsed().as_secs_f64() * 1000.0;
let (max_error, mean_error) = self.compute_error_metrics(&cpu_result, &gpu_result);
result = result
.with_timing(cpu_time, gpu_time)
.with_error_metrics(max_error * 10.0, mean_error * 10.0); }
#[cfg(not(any(feature = "cuda", feature = "opencl", feature = "metal")))]
{
result = result.with_failure("No GPU backend available".to_string());
}
self.results.push(result);
Ok(())
}
fn cpu_matmul(&self, a: &[f32], b: &[f32], m: usize, n: usize, k: usize) -> Vec<f32> {
let mut c = vec![0.0f32; m * n];
for row in 0..m {
for col in 0..n {
let mut sum = 0.0f32;
for i in 0..k {
sum += a[row * k + i] * b[i * n + col];
}
c[row * n + col] = sum;
}
}
c
}
fn cpu_conv2d(
&self,
input: &[f32],
kernel: &[f32],
ih: usize,
iw: usize,
kh: usize,
kw: usize,
) -> Vec<f32> {
let oh = ih - kh + 1;
let ow = iw - kw + 1;
let mut output = vec![0.0f32; oh * ow];
for out_y in 0..oh {
for out_x in 0..ow {
let mut sum = 0.0f32;
for ky in 0..kh {
for kx in 0..kw {
let in_y = out_y + ky;
let in_x = out_x + kx;
sum += input[in_y * iw + in_x] * kernel[ky * kw + kx];
}
}
output[out_y * ow + out_x] = sum;
}
}
output
}
#[allow(clippy::too_many_arguments)]
fn cpu_max_pool2d(
&self,
input: &[f32],
ih: usize,
iw: usize,
ph: usize,
pw: usize,
sh: usize,
sw: usize,
) -> Vec<f32> {
let oh = ih / sh;
let ow = iw / sw;
let mut output = vec![f32::NEG_INFINITY; oh * ow];
for out_y in 0..oh {
for out_x in 0..ow {
let mut max_val = f32::NEG_INFINITY;
for py in 0..ph {
for px in 0..pw {
let in_y = out_y * sh + py;
let in_x = out_x * sw + px;
if in_y < ih && in_x < iw {
max_val = max_val.max(input[in_y * iw + in_x]);
}
}
}
output[out_y * ow + out_x] = max_val;
}
}
output
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_elementwise_add(&self, _a: &[f32], _b: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_elementwise_sub(&self, _a: &[f32], _b: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_elementwise_mul(&self, _a: &[f32], _b: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_elementwise_div(&self, _a: &[f32], _b: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_matmul(
&self,
_a: &[f32],
_b: &[f32],
_m: usize,
_n: usize,
_k: usize,
) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_reduce_sum(&self, _input: &[f32]) -> RusTorchResult<f32> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_relu(&self, _input: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_batch_norm(
&self,
_input: &[f32],
_mean: f32,
_variance: f32,
_epsilon: f32,
) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_softmax(&self, _input: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_conv2d(
&self,
_input: &[f32],
_kernel: &[f32],
_ih: usize,
_iw: usize,
_kh: usize,
_kw: usize,
_sh: usize,
_sw: usize,
_ph: usize,
_pw: usize,
) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_max_pool2d(
&self,
_input: &[f32],
_ih: usize,
_iw: usize,
_ph: usize,
_pw: usize,
_sh: usize,
_sw: usize,
_pad_h: usize,
_pad_w: usize,
) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_transpose(
&self,
_input: &[f32],
_rows: usize,
_cols: usize,
) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[cfg(any(feature = "cuda", feature = "opencl", feature = "metal"))]
fn gpu_gelu(&self, _input: &[f32]) -> RusTorchResult<Vec<f32>> {
Err(RusTorchError::gpu(
"GPU operation not implemented".to_string(),
))
}
#[allow(dead_code)]
fn compute_error_metrics(&self, cpu_result: &[f32], gpu_result: &[f32]) -> (f32, f32) {
let len = cpu_result.len().min(gpu_result.len());
let mut max_error = 0.0f32;
let mut sum_error = 0.0f32;
for i in 0..len {
let error = (cpu_result[i] - gpu_result[i]).abs();
max_error = max_error.max(error);
sum_error += error;
}
let mean_error = sum_error / len as f32;
(max_error, mean_error)
}
fn print_summary(&self) {
println!("\n📊 Verification Test Summary");
println!("============================");
let total_tests = self.results.len();
let passed_tests = self.results.iter().filter(|r| r.passed).count();
let failed_tests = total_tests - passed_tests;
println!("Total Tests: {}", total_tests);
println!("✅ Passed: {}", passed_tests);
println!("❌ Failed: {}", failed_tests);
println!(
"Success Rate: {:.1}%",
(passed_tests as f32 / total_tests as f32) * 100.0
);
println!("\n📈 Performance Results:");
println!(
"{:<30} {:<8} {:<10} {:<10} {:<10} {:<15} {:<15}",
"Test Name", "Status", "CPU (ms)", "GPU (ms)", "Speedup", "Max Error", "Mean Error"
);
println!("{}", "-".repeat(120));
for result in &self.results {
let status = if result.passed {
"✅ PASS"
} else {
"❌ FAIL"
};
let cpu_time = result
.cpu_time_ms
.map_or("N/A".to_string(), |t| format!("{:.2}", t));
let gpu_time = result
.gpu_time_ms
.map_or("N/A".to_string(), |t| format!("{:.2}", t));
let speedup = result
.speedup
.map_or("N/A".to_string(), |s| format!("{:.2}x", s));
println!(
"{:<30} {:<8} {:<10} {:<10} {:<10} {:<15.2e} {:<15.2e}",
result.test_name,
status,
cpu_time,
gpu_time,
speedup,
result.max_error,
result.mean_error
);
if let Some(ref error_msg) = result.error_message {
println!(" Error: {}", error_msg);
}
}
if failed_tests > 0 {
println!(
"\n⚠️ {} tests failed. Check error messages above.",
failed_tests
);
} else {
println!(
"\n🎉 All tests passed! GPU implementations match CPU results within tolerance."
);
}
}
}
impl Default for VerificationTestSuite {
fn default() -> Self {
Self::new()
}
}