use ruviz::core::*;
use ruviz::data::*;
use ruviz::prelude::{Plot, Position};
use ruviz::render::gpu::{GpuRenderer, initialize_gpu_backend};
use ruviz::render::pooled::PooledRenderer;
use std::time::Instant;
#[derive(Debug, Clone)]
struct BenchmarkResult {
point_count: usize,
cpu_time_us: f64,
gpu_time_us: f64,
cpu_throughput: f64,
gpu_throughput: f64,
gpu_speedup: f64,
gpu_success: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
std::fs::create_dir_all("generated/examples").ok();
println!("GPU vs CPU Scaling Analysis");
println!("===========================\n");
let cpu_renderer = PooledRenderer::new();
println!("CPU Renderer initialized");
let mut gpu_renderer = match initialize_gpu_backend().await {
Ok(_) => match GpuRenderer::new().await {
Ok(renderer) => {
println!(
"GPU Renderer initialized - threshold: {}",
renderer.gpu_threshold()
);
Some(renderer)
}
Err(e) => {
println!("GPU Renderer failed: {}", e);
None
}
},
Err(e) => {
println!("GPU Backend failed: {}", e);
None
}
};
let test_sizes = vec![
500, 1_000, 2_000, 5_000, 10_000, 20_000, 50_000, 100_000, 200_000, 500_000, 1_000_000,
2_000_000, 5_000_000,
];
let mut results = Vec::new();
for &point_count in &test_sizes {
println!("\nTesting {} points", format_number(point_count as u64));
let x_data: Vec<f64> = (0..point_count).map(|i| i as f64 * 0.001).collect();
let y_data: Vec<f64> = x_data
.iter()
.map(|&x| (x * 2.0 * std::f64::consts::PI).sin())
.collect();
let x_range = (0.0, point_count as f64 * 0.001);
let y_range = (-1.0, 1.0);
let viewport = (0.0, 0.0, 1920.0, 1080.0);
print!(" CPU: ");
let start = Instant::now();
let _cpu_result = cpu_renderer.transform_coordinates_pooled(
&x_data, &y_data, x_range.0, x_range.1, y_range.0, y_range.1, viewport.0, viewport.1,
viewport.2, viewport.3,
)?;
let cpu_time = start.elapsed();
let cpu_time_us = cpu_time.as_micros() as f64;
let cpu_throughput = point_count as f64 / cpu_time.as_secs_f64();
println!(
"{:>10.0} us ({:>12.0} pts/sec)",
cpu_time_us, cpu_throughput
);
let (gpu_time_us, gpu_throughput, gpu_speedup, gpu_success) = if let Some(ref mut gpu) =
gpu_renderer
{
print!(" GPU: ");
let start = Instant::now();
match gpu.transform_coordinates_optimal(&x_data, &y_data, x_range, y_range, viewport) {
Ok(_gpu_result) => {
let gpu_time = start.elapsed();
let gpu_time_us = gpu_time.as_micros() as f64;
let gpu_throughput = point_count as f64 / gpu_time.as_secs_f64();
let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
println!(
"{:>10.0} us ({:>12.0} pts/sec) [{:.2}x speedup]",
gpu_time_us, gpu_throughput, speedup
);
(gpu_time_us, gpu_throughput, speedup, true)
}
Err(e) => {
println!("FAILED: {}", e);
(cpu_time_us, cpu_throughput, 1.0, false)
}
}
} else {
println!(" GPU: Not available");
(cpu_time_us, cpu_throughput, 1.0, false)
};
results.push(BenchmarkResult {
point_count,
cpu_time_us,
gpu_time_us,
cpu_throughput,
gpu_throughput,
gpu_speedup,
gpu_success,
});
let data_size = point_count * std::mem::size_of::<f64>() * 2;
println!(" Memory: {:.1} MB", data_size as f64 / 1_000_000.0);
if cpu_time.as_secs_f64() > 5.0 {
println!(" CPU time > 5s, skipping larger datasets");
break;
}
}
println!("\nPerformance Summary Table");
println!("=========================");
println!(
"{:>10} {:>12} {:>12} {:>12} {:>12} {:>10}",
"Points", "CPU (us)", "GPU (us)", "CPU (Mpts/s)", "GPU (Mpts/s)", "Speedup"
);
println!("{}", "-".repeat(80));
for result in &results {
let cpu_mpts = result.cpu_throughput / 1_000_000.0;
let gpu_mpts = if result.gpu_success {
result.gpu_throughput / 1_000_000.0
} else {
0.0
};
let speedup_str = if result.gpu_success {
format!("{:.2}x", result.gpu_speedup)
} else {
"FAIL".to_string()
};
println!(
"{:>10} {:>12.0} {:>12.0} {:>12.1} {:>12.1} {:>10}",
format_number(result.point_count as u64),
result.cpu_time_us,
if result.gpu_success {
result.gpu_time_us
} else {
0.0
},
cpu_mpts,
gpu_mpts,
speedup_str
);
}
create_performance_plot(&results)?;
if let Some(gpu) = &gpu_renderer {
let stats = gpu.get_stats();
println!("\nGPU Statistics:");
println!(" GPU Operations: {}", stats.gpu_operations);
println!(" CPU Fallbacks: {}", stats.cpu_operations);
println!(
" GPU Points: {}",
format_number(stats.gpu_points_processed)
);
println!(
" CPU Points: {}",
format_number(stats.cpu_points_processed)
);
}
println!("\nScaling analysis complete! Check generated/examples/ for plots");
Ok(())
}
fn create_performance_plot(results: &[BenchmarkResult]) -> Result<()> {
let point_counts: Vec<f64> = results.iter().map(|r| r.point_count as f64).collect();
let cpu_throughput: Vec<f64> = results
.iter()
.map(|r| r.cpu_throughput / 1_000_000.0)
.collect();
let gpu_throughput: Vec<f64> = results
.iter()
.map(|r| {
if r.gpu_success {
r.gpu_throughput / 1_000_000.0
} else {
0.0
}
})
.collect();
Plot::new()
.title("GPU vs CPU Performance Scaling")
.xlabel("Dataset Size (points)")
.ylabel("Throughput (Million points/sec)")
.legend(Position::TopLeft)
.size(12.0, 6.0)
.dpi(150)
.line(&point_counts, &cpu_throughput)
.label("CPU")
.line(&point_counts, &gpu_throughput)
.label("GPU")
.save("generated/examples/gpu_throughput_scaling.png")?;
let valid_speedups: Vec<_> = results.iter().filter(|r| r.gpu_success).collect();
if !valid_speedups.is_empty() {
let speedup_points: Vec<f64> = valid_speedups
.iter()
.map(|r| r.point_count as f64)
.collect();
let speedup_values: Vec<f64> = valid_speedups.iter().map(|r| r.gpu_speedup).collect();
Plot::new()
.title("GPU Speedup vs Dataset Size")
.xlabel("Dataset Size (points)")
.ylabel("GPU Speedup (x)")
.size(12.0, 6.0)
.dpi(150)
.scatter(&speedup_points, &speedup_values)
.save("generated/examples/gpu_speedup_scaling.png")?;
}
println!("\nPerformance plots saved:");
println!(" generated/examples/gpu_throughput_scaling.png");
println!(" generated/examples/gpu_speedup_scaling.png");
Ok(())
}
fn format_number(n: u64) -> String {
let s = n.to_string();
let chars: Vec<char> = s.chars().collect();
let mut result = String::new();
for (i, &ch) in chars.iter().enumerate() {
if i > 0 && (chars.len() - i).is_multiple_of(3) {
result.push(',');
}
result.push(ch);
}
result
}