use rustygraph::*;
use rustygraph::performance::{GpuVisibilityGraph, GpuConfig, GpuCapabilities};
use std::time::Instant;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("╔════════════════════════════════════════════════════╗");
println!("║ RustyGraph GPU Performance Validation ║");
println!("╚════════════════════════════════════════════════════╝\n");
println!("1️⃣ DETECTING GPU CAPABILITIES");
println!("═══════════════════════════════════════════════════\n");
let gpu_caps = GpuCapabilities::detect();
gpu_caps.print_info();
println!();
if !gpu_caps.has_metal() {
println!("⚠️ Metal GPU not available on this platform");
println!(" Running CPU-only benchmarks for comparison\n");
}
let test_sizes = vec![
("Tiny", 100),
("Small", 500),
("Medium", 1000),
("Large", 2000),
("Very Large", 5000),
];
println!("2️⃣ PERFORMANCE BENCHMARKS");
println!("═══════════════════════════════════════════════════\n");
println!("{:<15} {:>10} {:>10} {:>10} {:>10}",
"Graph Size", "CPU (ms)", "GPU (ms)", "Speedup", "Edges");
println!("{}", "─".repeat(60));
let mut results = Vec::new();
for (name, size) in test_sizes {
let data: Vec<f64> = (0..size)
.map(|i| (i as f64 * 0.1).sin() + (i as f64 * 0.05).cos())
.collect();
let series = TimeSeries::from_raw(data.clone())?;
let cpu_start = Instant::now();
let cpu_graph = VisibilityGraph::from_series(&series)
.natural_visibility()?;
let cpu_time = cpu_start.elapsed();
let cpu_edges = cpu_graph.edges().len();
let gpu_config = GpuConfig::for_apple_silicon()
.with_min_nodes(500); let gpu_builder = GpuVisibilityGraph::with_config(gpu_config);
let gpu_start = Instant::now();
let gpu_graph = gpu_builder.build_natural(&series)?;
let gpu_time = gpu_start.elapsed();
let gpu_edges = gpu_graph.edges().len();
let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
let backend = if gpu_builder.should_use_gpu(size) && gpu_caps.has_metal() {
"GPU"
} else {
"CPU"
};
println!("{:<15} {:>10.2} {:>10.2} {:>9.2}x {:>10} ({})",
name,
cpu_time.as_secs_f64() * 1000.0,
gpu_time.as_secs_f64() * 1000.0,
speedup,
cpu_edges,
backend);
if cpu_edges != gpu_edges {
println!("⚠️ WARNING: Edge count mismatch! CPU: {}, GPU: {}", cpu_edges, gpu_edges);
}
results.push((name, size, cpu_time, gpu_time, speedup, backend));
}
println!();
println!("3️⃣ CORRECTNESS VALIDATION");
println!("═══════════════════════════════════════════════════\n");
println!("Testing CPU vs GPU output consistency...\n");
let test_data: Vec<f64> = vec![1.0, 3.0, 2.0, 4.0, 2.5, 3.5, 1.5];
let test_series = TimeSeries::from_raw(test_data)?;
let cpu_graph = VisibilityGraph::from_series(&test_series)
.natural_visibility()?;
let gpu_config = GpuConfig::for_apple_silicon().with_min_nodes(1);
let gpu_builder = GpuVisibilityGraph::with_config(gpu_config);
let gpu_graph = gpu_builder.build_natural(&test_series)?;
let cpu_edges = cpu_graph.edges();
let gpu_edges = gpu_graph.edges();
println!("CPU edges: {}", cpu_edges.len());
println!("GPU edges: {}", gpu_edges.len());
if cpu_edges.len() == gpu_edges.len() {
println!("✅ Edge count matches!");
} else {
println!("❌ Edge count mismatch!");
}
let cpu_keys: std::collections::HashSet<_> = cpu_edges.iter().map(|(k, _)| k).collect();
let gpu_keys: std::collections::HashSet<_> = gpu_edges.iter().map(|(k, _)| k).collect();
let all_match = cpu_keys == gpu_keys;
if all_match {
println!("✅ All edges match exactly!");
} else {
println!("❌ Some edges differ!");
let missing_in_gpu: Vec<_> = cpu_keys.difference(&gpu_keys).collect();
let extra_in_gpu: Vec<_> = gpu_keys.difference(&cpu_keys).collect();
if !missing_in_gpu.is_empty() {
println!(" Missing in GPU: {:?}", missing_in_gpu);
}
if !extra_in_gpu.is_empty() {
println!(" Extra in GPU: {:?}", extra_in_gpu);
}
}
println!();
println!("4️⃣ HORIZONTAL VISIBILITY TEST");
println!("═══════════════════════════════════════════════════\n");
let hv_data: Vec<f64> = (0..1000)
.map(|i| (i as f64 * 0.1).sin())
.collect();
let hv_series = TimeSeries::from_raw(hv_data)?;
let cpu_start = Instant::now();
let cpu_hv = VisibilityGraph::from_series(&hv_series)
.horizontal_visibility()?;
let cpu_hv_time = cpu_start.elapsed();
let gpu_start = Instant::now();
let gpu_hv = gpu_builder.build_horizontal(&hv_series)?;
let gpu_hv_time = gpu_start.elapsed();
println!("1000-node horizontal visibility:");
println!(" CPU time: {:.2} ms", cpu_hv_time.as_secs_f64() * 1000.0);
println!(" GPU time: {:.2} ms", gpu_hv_time.as_secs_f64() * 1000.0);
println!(" CPU edges: {}", cpu_hv.edges().len());
println!(" GPU edges: {}", gpu_hv.edges().len());
println!(" Match: {}", if cpu_hv.edges().len() == gpu_hv.edges().len() { "✅" } else { "❌" });
println!();
println!("5️⃣ SUMMARY");
println!("═══════════════════════════════════════════════════\n");
println!("Platform: {}", if gpu_caps.has_metal() {
"Apple Silicon with Metal GPU"
} else {
"CPU-only"
});
println!();
if gpu_caps.has_metal() {
println!("✅ Metal GPU detected and functional");
println!("✅ GPU acceleration available");
println!("✅ Automatic CPU/GPU selection working");
let best_speedup = results.iter()
.filter(|(_, _, _, _, _, backend)| *backend == "GPU")
.map(|(_, _, _, _, speedup, _)| speedup)
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if let Some(speedup) = best_speedup {
println!("✅ Best observed speedup: {:.2}x", speedup);
}
} else {
println!("ℹ️ Metal GPU not available on this platform");
println!("ℹ️ Using optimized CPU implementation");
}
println!();
println!("All correctness tests: {}", if all_match { "✅ PASSED" } else { "⚠️ REVIEW" });
println!();
println!("6️⃣ RECOMMENDATIONS");
println!("═══════════════════════════════════════════════════\n");
if gpu_caps.has_metal() {
println!("For Apple Silicon:");
println!(" • Use GPU for graphs > 2000 nodes");
println!(" • Unified memory reduces overhead");
println!(" • Consider batch processing for multiple graphs");
} else {
println!("For this platform:");
println!(" • CPU implementation is highly optimized");
println!(" • Parallel + SIMD provide excellent performance");
println!(" • Consider batching for processing multiple graphs");
}
Ok(())
}