use rustygraph::TimeSeries;
use std::time::Instant;
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
use rustygraph::performance::{GpuVisibilityGraph, GpuConfig, GpuCapabilities};
fn build_natural_cpu_f32(data: &[f32]) -> Vec<(usize, usize)> {
let n = data.len();
let mut edges = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
if is_visible_natural_f32(data, i, j) {
edges.push((i, j));
}
}
}
edges
}
fn is_visible_natural_f32(data: &[f32], i: usize, j: usize) -> bool {
let vi = data[i];
let vj = data[j];
for k in (i + 1)..j {
let vk = data[k];
let line_height = vi + (vj - vi) * ((k - i) as f32 / (j - i) as f32);
if vk >= line_height {
return false;
}
}
true
}
fn build_natural_cpu_f64(data: &[f64]) -> Vec<(usize, usize)> {
let n = data.len();
let mut edges = Vec::new();
for i in 0..n {
for j in (i + 1)..n {
if is_visible_natural_f64(data, i, j) {
edges.push((i, j));
}
}
}
edges
}
fn is_visible_natural_f64(data: &[f64], i: usize, j: usize) -> bool {
let vi = data[i];
let vj = data[j];
for k in (i + 1)..j {
let vk = data[k];
let line_height = vi + (vj - vi) * ((k - i) as f64 / (j - i) as f64);
if vk >= line_height {
return false;
}
}
true
}
fn main() {
println!("🔬 GPU vs CPU Precision-Matched Performance Comparison\n");
println!("{}", "=".repeat(70));
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
{
let caps = GpuCapabilities::detect();
println!("\n📊 Hardware Detection:");
caps.print_info();
println!();
if !caps.has_metal() {
println!("⚠️ Metal GPU not available. This comparison requires Apple Silicon.");
return;
}
let test_sizes = vec![10, 50, 100, 200, 500, 1000, 2000, 5000];
println!("\n📈 Performance Comparison (Natural Visibility)");
println!("{}", "=".repeat(70));
println!("{:<10} {:>12} {:>12} {:>12} {:>12}",
"Size", "GPU (f32)", "CPU (f32)", "CPU (f64)", "Match");
println!("{}", "-".repeat(70));
for &size in &test_sizes {
let data_f64: Vec<f64> = (0..size)
.map(|i| (i as f64 * 0.1).sin() * 100.0 + 100.0)
.collect();
let data_f32: Vec<f32> = data_f64.iter().map(|&x| x as f32).collect();
let _ = build_natural_cpu_f32(&data_f32);
let gpu_time = if size >= 100 { let config = GpuConfig::for_apple_silicon().with_min_nodes(0);
let gpu = GpuVisibilityGraph::with_config(config);
let series = TimeSeries::from_raw(data_f64.clone()).unwrap();
let start = Instant::now();
let graph = gpu.build_natural(&series).unwrap();
let elapsed = start.elapsed();
Some((elapsed, graph.edges().len()))
} else {
None
};
let start = Instant::now();
let edges_f32 = build_natural_cpu_f32(&data_f32);
let cpu_f32_time = start.elapsed();
let cpu_f32_edges = edges_f32.len();
let start = Instant::now();
let edges_f64 = build_natural_cpu_f64(&data_f64);
let cpu_f64_time = start.elapsed();
let cpu_f64_edges = edges_f64.len();
let match_status = if let Some((_, gpu_edges)) = gpu_time {
let f32_match = (gpu_edges as i32 - cpu_f32_edges as i32).abs();
let _f64_diff = (gpu_edges as i32 - cpu_f64_edges as i32).abs();
if f32_match == 0 {
"✅ Perfect"
} else if f32_match < size / 20 {
"✓ Close"
} else {
"⚠️ Diff"
}
} else {
"N/A"
};
if let Some((gpu_elapsed, _)) = gpu_time {
println!("{:<10} {:>11.2?} {:>11.2?} {:>11.2?} {:>12}",
size, gpu_elapsed, cpu_f32_time, cpu_f64_time, match_status);
} else {
println!("{:<10} {:>12} {:>11.2?} {:>11.2?} {:>12}",
size, "N/A", cpu_f32_time, cpu_f64_time, "N/A");
}
}
println!("\n{}", "=".repeat(70));
println!("\n🔍 Detailed Analysis (size=500):");
println!("{}", "=".repeat(70));
let size = 500;
let data_f64: Vec<f64> = (0..size)
.map(|i| (i as f64 * 0.1).sin() * 100.0 + 100.0)
.collect();
let data_f32: Vec<f32> = data_f64.iter().map(|&x| x as f32).collect();
let config = GpuConfig::for_apple_silicon().with_min_nodes(0);
let gpu = GpuVisibilityGraph::with_config(config);
let series = TimeSeries::from_raw(data_f64.clone()).unwrap();
let start = Instant::now();
let graph_gpu = gpu.build_natural(&series).unwrap();
let gpu_time = start.elapsed();
let start = Instant::now();
let edges_f32 = build_natural_cpu_f32(&data_f32);
let cpu_f32_time = start.elapsed();
let start = Instant::now();
let edges_f64 = build_natural_cpu_f64(&data_f64);
let cpu_f64_time = start.elapsed();
let gpu_edge_count = graph_gpu.edges().len();
println!("\nEdge Counts:");
println!(" GPU (f32): {} edges", gpu_edge_count);
println!(" CPU (f32): {} edges", edges_f32.len());
println!(" CPU (f64): {} edges", edges_f64.len());
println!("\nPrecision Differences:");
let gpu_vs_f32 = (gpu_edge_count as i32 - edges_f32.len() as i32).abs();
let gpu_vs_f64 = (gpu_edge_count as i32 - edges_f64.len() as i32).abs();
let f32_vs_f64 = (edges_f32.len() as i32 - edges_f64.len() as i32).abs();
println!(" GPU vs CPU(f32): {} edges difference ({:.2}%)",
gpu_vs_f32,
gpu_vs_f32 as f64 / edges_f32.len() as f64 * 100.0);
println!(" GPU vs CPU(f64): {} edges difference ({:.2}%)",
gpu_vs_f64,
gpu_vs_f64 as f64 / edges_f64.len() as f64 * 100.0);
println!(" CPU(f32) vs CPU(f64): {} edges difference ({:.2}%)",
f32_vs_f64,
f32_vs_f64 as f64 / edges_f64.len() as f64 * 100.0);
println!("\nPerformance (Apples-to-Apples f32 comparison):");
let speedup = cpu_f32_time.as_secs_f64() / gpu_time.as_secs_f64();
println!(" GPU (f32): {:>8.2?}", gpu_time);
println!(" CPU (f32): {:>8.2?} (baseline)", cpu_f32_time);
println!(" CPU (f64): {:>8.2?}", cpu_f64_time);
println!("\n GPU Speedup vs CPU(f32): {:.2}x {}",
speedup,
if speedup > 1.0 { "🚀 GPU FASTER" }
else { "⚠️ CPU FASTER" });
println!("\n💡 Key Findings:");
println!(" • GPU uses float32 due to Metal limitations");
println!(" • CPU(f32) provides fair performance comparison");
println!(" • CPU(f64) shows precision impact on edge detection");
println!(" • Small precision differences are expected and acceptable");
println!("\n✅ Conclusion:");
if speedup > 1.2 {
println!(" GPU provides {:.1}x speedup over CPU when using same precision!", speedup);
} else if speedup > 0.8 {
println!(" GPU and CPU have comparable performance at this size.");
println!(" GPU benefits emerge with larger graphs (>5000 nodes).");
} else {
println!(" CPU is faster at this size due to GPU overhead.");
println!(" GPU excels with larger graphs where parallelism helps.");
}
}
#[cfg(not(all(target_os = "macos", target_arch = "aarch64", feature = "metal")))]
{
println!("⚠️ This example requires Apple Silicon and the 'metal' feature.");
println!(" Build with: cargo run --example gpu_precision_comparison --features metal");
}
}