use batuta::backend::BackendSelector;
fn main() {
println!("🎯 Backend Selection Demo");
println!("Based on sovereign-ai-spec.md section 2.2 (5× PCIe rule)\n");
let selector = BackendSelector::new();
println!("Example 1: Matrix Multiplication 64×64");
let backend = selector.select_for_matmul(64, 64, 64);
println!(" Selected backend: {}", backend);
println!(" Rationale: Small matrix, PCIe overhead dominates\n");
println!("Example 2: Matrix Multiplication 512×512");
let backend = selector.select_for_matmul(512, 512, 512);
println!(" Selected backend: {}", backend);
println!(" Rationale: Compute/transfer ratio still < 5×\n");
println!("Example 3: Matrix Multiplication 2048×2048");
let backend = selector.select_for_matmul(2048, 2048, 2048);
println!(" Selected backend: {}", backend);
println!(" Rationale: O(n³) compute begins to justify GPU\n");
println!("Example 4: Dot Product (10K elements)");
let backend = selector.select_for_vector_op(10_000, 2);
println!(" Selected backend: {}", backend);
println!(" Rationale: Memory-bound, GPU overhead too high\n");
println!("Example 5: Element-wise Add (1M elements)");
let backend = selector.select_for_elementwise(1_000_000);
println!(" Selected backend: {}", backend);
println!(" Rationale: Minimal compute, uses SIMD\n");
println!("Example 6: Custom Workload");
let data_bytes = 1_000_000; let flops = 1_000_000_000; let backend = selector.select_backend(data_bytes, flops);
println!(" Data: {} bytes", data_bytes);
println!(" FLOPs: {}", flops);
println!(" Selected backend: {}", backend);
let pcie_bw = 32e9; let gpu_gflops = 20e12; let transfer_time = data_bytes as f64 / pcie_bw;
let compute_time = flops as f64 / gpu_gflops;
let ratio = compute_time / transfer_time;
println!(" Compute/Transfer ratio: {:.2}×", ratio);
println!(" (Need > 5× for GPU benefit)\n");
println!("✅ Per Gregg & Hazelwood (2011): GPU dispatch when");
println!(" compute_time > 5× transfer_time");
}