benchmark_backends/
benchmark_backends.rs1use std::time::Instant;
6
7use adele_ring::backend::ArithmeticBackend;
8use adele_ring::{executor, Channels, RnsBatch, RnsInt};
9
10fn time_backend<F: Fn() -> RnsBatch>(f: F, iters: u32) -> f64 {
11 for _ in 0..10 {
13 let _ = f();
14 }
15 let start = Instant::now();
16 for _ in 0..iters {
17 let _ = f();
18 }
19 start.elapsed().as_secs_f64() * 1e6 / iters as f64 }
21
22fn main() {
23 let exec = executor();
24 let ch = Channels::standard(32);
25 let has_gpu = exec.gpu().is_some();
26
27 println!("== adele-ring :: backend benchmark (32 channels) ==");
28 println!(
29 "GPU available: {}\n",
30 if has_gpu {
31 exec.gpu().map(|g| g.adapter_name().to_string()).unwrap_or_default()
32 } else {
33 "no (CPU-only)".to_string()
34 }
35 );
36
37 println!("{:>10} | {:>14} | {:>12} | winner", "batch_size", "cpu_rayon_us", "gpu_us");
38 println!("{}", "-".repeat(56));
39
40 for &size in &[1usize, 16, 128, 1024, 16_384, 65_536] {
41 let a = RnsBatch::from_rns_ints(&vec![RnsInt::from_i64(123, ch.clone()); size]);
42 let b = RnsBatch::from_rns_ints(&vec![RnsInt::from_i64(456, ch.clone()); size]);
43
44 let iters = if size <= 128 { 2000 } else { 100 };
45 let cpu_us = time_backend(|| exec.cpu().batch_rns_add(&a, &b), iters);
46
47 let (gpu_str, winner) = if let Some(gpu) = exec.gpu() {
48 let gpu_us = time_backend(|| gpu.batch_rns_add(&a, &b), iters);
49 let w = if cpu_us <= gpu_us { "CPU" } else { "GPU" };
50 (format!("{gpu_us:>12.2}"), w)
51 } else {
52 (" n/a".to_string(), "CPU")
53 };
54
55 println!("{size:>10} | {cpu_us:>14.2} | {gpu_str} | {winner}");
56 }
57
58 println!(
59 "\nNote: CPU wins for small batches (GPU upload/dispatch overhead ~100us);\n\
60 GPU pulls ahead once the batch is large enough to amortize that fixed cost."
61 );
62}