use crate::parallel_solver::{
AssemblyTask, CsrMatrix, ParallelAssembler, ParallelGmresSolver, ParallelPcgSolver,
};
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
pub struct BenchReport {
pub name: String,
pub iterations: u32,
pub total_time: Duration,
pub mean_time: Duration,
pub mflops: Option<f64>,
pub n: usize,
pub note: Option<String>,
}
impl std::fmt::Display for BenchReport {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}] n={} iters={} mean={:.3}µs",
self.name,
self.n,
self.iterations,
self.mean_time.as_secs_f64() * 1e6
)?;
if let Some(mf) = self.mflops {
write!(f, " {:.1} MFLOPs", mf)?;
}
if let Some(ref note) = self.note {
write!(f, " ({})", note)?;
}
Ok(())
}
}
pub struct BenchHarness {
pub warmup: u32,
pub iterations: u32,
pub reports: Vec<BenchReport>,
}
impl BenchHarness {
pub fn new() -> Self {
Self {
warmup: 3,
iterations: 10,
reports: Vec::new(),
}
}
pub fn fast() -> Self {
Self {
warmup: 1,
iterations: 5,
reports: Vec::new(),
}
}
pub fn run<F>(&mut self, name: &str, mut f: F) -> BenchReport
where
F: FnMut() -> BenchResult,
{
let mut last = BenchResult::default();
for _ in 0..self.warmup {
last = f();
}
let start = Instant::now();
for _ in 0..self.iterations {
last = f();
}
let total = start.elapsed();
let mean = total / self.iterations;
let report = BenchReport {
name: name.to_string(),
iterations: self.iterations,
total_time: total,
mean_time: mean,
mflops: last.mflops,
n: last.n,
note: last.note,
};
self.reports.push(report.clone());
report
}
pub fn print_summary(&self) {
println!("\n{:=<70}", "");
println!(
"{:<30} {:>8} {:>12} {:>10}",
"Benchmark", "N", "Mean (µs)", "MFLOPs"
);
println!("{:=<70}", "");
for r in &self.reports {
let mf = r.mflops.map_or("—".to_string(), |m| format!("{:.1}", m));
println!(
"{:<30} {:>8} {:>12.3} {:>10}",
r.name,
r.n,
r.mean_time.as_secs_f64() * 1e6,
mf
);
}
println!("{:=<70}", "");
}
}
impl Default for BenchHarness {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Default, Clone)]
pub struct BenchResult {
pub n: usize,
pub mflops: Option<f64>,
pub note: Option<String>,
}
pub fn tridiagonal_csr(n: usize) -> CsrMatrix {
let nnz = if n == 1 { 1 } else { 3 * n - 2 };
let mut row_offsets = vec![0usize; n + 1];
let mut col_indices = Vec::with_capacity(nnz);
let mut values = Vec::with_capacity(nnz);
for i in 0..n {
if i > 0 {
col_indices.push(i - 1);
values.push(-1.0);
}
col_indices.push(i);
values.push(2.0);
if i < n - 1 {
col_indices.push(i + 1);
values.push(-1.0);
}
row_offsets[i + 1] = col_indices.len();
}
CsrMatrix {
nrows: n,
ncols: n,
row_offsets,
col_indices,
values,
}
}
pub fn banded_csr(n: usize, bw: usize) -> CsrMatrix {
let mut row_offsets = vec![0usize; n + 1];
let mut col_indices = Vec::new();
let mut values = Vec::new();
for i in 0..n {
let lo = i.saturating_sub(bw);
let hi = (i + bw).min(n - 1);
for j in lo..=hi {
col_indices.push(j);
if j == i {
values.push(2.0 * (hi - lo + 1) as f64);
}
else {
values.push(-1.0);
}
}
row_offsets[i + 1] = col_indices.len();
}
CsrMatrix {
nrows: n,
ncols: n,
row_offsets,
col_indices,
values,
}
}
pub fn example_assembly_task(element_id: usize, n_dof_per_node: usize) -> AssemblyTask {
let n_nodes = 3;
let n_dof = n_nodes * n_dof_per_node;
let global_start = element_id * n_dof_per_node;
let global_dofs: Vec<usize> = (0..n_nodes)
.flat_map(|node| (0..n_dof_per_node).map(move |d| global_start + node * n_dof_per_node + d))
.collect();
let scale = 1.0 + element_id as f64 * 0.01;
let mut ke = vec![0.0; n_dof * n_dof];
for d in 0..n_dof {
ke[d * n_dof + d] = scale;
}
AssemblyTask { global_dofs, ke }
}
pub fn bench_spmv(n: usize) -> BenchResult {
let a = tridiagonal_csr(n);
let x: Vec<f64> = (0..n).map(|i| (i + 1) as f64).collect();
let mut y = vec![0.0; n];
let t0 = Instant::now();
a.spmv(&x, &mut y);
let elapsed = t0.elapsed().as_secs_f64();
let nnz = a.nnz();
let flops = 2.0 * nnz as f64;
let mflops = if elapsed > 0.0 {
flops / elapsed / 1e6
} else {
f64::INFINITY
};
{
debug_assert!(y[0].abs() < 1e-6 || n == 1);
}
BenchResult {
n,
mflops: Some(mflops),
note: Some(format!("nnz={}", nnz)),
}
}
pub fn bench_spmv_parallel(n: usize) -> BenchResult {
let a = tridiagonal_csr(n);
let x: Vec<f64> = (0..n).map(|i| (i + 1) as f64).collect();
let mut y = vec![0.0; n];
let t0 = Instant::now();
a.spmv_par(&x, &mut y);
let elapsed = t0.elapsed().as_secs_f64();
let nnz = a.nnz();
let mflops = 2.0 * nnz as f64 / elapsed / 1e6;
BenchResult {
n,
mflops: Some(mflops),
note: Some(format!("rayon nnz={}", nnz)),
}
}
pub fn bench_pcg(n: usize, max_iter: usize, tol: f64) -> BenchResult {
let a = tridiagonal_csr(n);
let b = vec![1.0; n];
let mut x = vec![0.0; n];
let solver = ParallelPcgSolver::new(max_iter, tol);
let t0 = Instant::now();
let stats = solver.solve(&a, &b, &mut x);
let elapsed = t0.elapsed().as_secs_f64();
let iters = stats.iterations;
let nnz = a.nnz();
let flops = iters as f64 * 6.0 * nnz as f64;
let mflops = flops / elapsed / 1e6;
BenchResult {
n,
mflops: Some(mflops),
note: Some(format!("iters={} tol={:.0e}", iters, tol)),
}
}
pub fn bench_gmres(n: usize, restart: usize, max_iter: usize, tol: f64) -> BenchResult {
let a = tridiagonal_csr(n);
let b = vec![1.0; n];
let mut x = vec![0.0; n];
let solver = ParallelGmresSolver {
krylov_dim: restart,
max_restarts: max_iter,
tolerance: tol,
};
let t0 = Instant::now();
let stats = solver.solve(&a, &b, &mut x);
let elapsed = t0.elapsed().as_secs_f64();
let iters = stats.iterations;
let nnz = a.nnz();
let flops = iters as f64 * restart as f64 * 2.0 * nnz as f64;
let mflops = flops / elapsed.max(1e-9) / 1e6;
BenchResult {
n,
mflops: Some(mflops),
note: Some(format!(
"restart={} iters={} tol={:.0e}",
restart, iters, tol
)),
}
}
pub fn bench_assembly(n_elements: usize, ndofs: usize) -> BenchResult {
let dof_per_node = 2;
let tasks: Vec<AssemblyTask> = (0..n_elements)
.map(|e| example_assembly_task(e % (ndofs / (3 * dof_per_node)).max(1), dof_per_node))
.collect();
let assembler = ParallelAssembler { ndofs };
let t0 = Instant::now();
let _k = assembler.assemble(&tasks);
let elapsed = t0.elapsed().as_secs_f64();
let n_dof_elem = 3 * dof_per_node;
let flops = n_elements as f64 * (n_dof_elem * n_dof_elem) as f64;
let mflops = flops / elapsed.max(1e-9) / 1e6;
BenchResult {
n: n_elements,
mflops: Some(mflops),
note: Some(format!("ndofs={}", ndofs)),
}
}
pub fn bench_element_stiffness(n_elements: usize) -> BenchResult {
let n_dof = 12; let t0 = Instant::now();
let mut checksum = 0.0_f64;
for e in 0..n_elements {
let ke = compute_linear_tet_ke_stub(e, n_dof);
checksum += ke[0]; }
let elapsed = t0.elapsed().as_secs_f64();
let _ = checksum;
let flops = n_elements as f64 * 2.0 * (n_dof * n_dof) as f64;
let mflops = flops / elapsed.max(1e-9) / 1e6;
BenchResult {
n: n_elements,
mflops: Some(mflops),
note: Some(format!("n_dof_per_elem={}", n_dof)),
}
}
fn compute_linear_tet_ke_stub(element_id: usize, n_dof: usize) -> Vec<f64> {
let scale = 1.0 + element_id as f64 * 1e-4;
let mut ke = vec![0.0_f64; n_dof * n_dof];
for i in 0..n_dof {
for j in 0..n_dof {
ke[i * n_dof + j] = if i == j { 4.0 * scale } else { -scale };
}
}
ke
}
#[derive(Debug, Clone)]
pub struct SuiteConfig {
pub warmup: u32,
pub iterations: u32,
pub spmv_pcg_sizes: Vec<usize>,
pub pcg_max_iter: usize,
pub pcg_tol: f64,
pub gmres: Option<(usize, usize, usize, f64)>,
pub assembly: Vec<(usize, usize)>,
pub element_stiffness: Vec<usize>,
}
impl SuiteConfig {
pub fn full() -> Self {
Self {
warmup: 1,
iterations: 5,
spmv_pcg_sizes: vec![100, 500, 1000, 5000],
pcg_max_iter: 200,
pcg_tol: 1e-8,
gmres: Some((1000, 50, 500, 1e-8)),
assembly: vec![(1000, 600), (10000, 6000)],
element_stiffness: vec![1000, 10000],
}
}
pub fn smoke() -> Self {
Self {
warmup: 0,
iterations: 1,
spmv_pcg_sizes: vec![32],
pcg_max_iter: 16,
pcg_tol: 1e-4,
gmres: Some((32, 8, 4, 1e-3)),
assembly: vec![(16, 60)],
element_stiffness: vec![16],
}
}
}
pub fn run_suite(cfg: &SuiteConfig, verbose: bool) -> String {
let mut h = BenchHarness {
warmup: cfg.warmup,
iterations: cfg.iterations.max(1),
reports: Vec::new(),
};
for &n in &cfg.spmv_pcg_sizes {
h.run(&format!("spmv_seq_n{}", n), || bench_spmv(n));
h.run(&format!("spmv_par_n{}", n), || bench_spmv_parallel(n));
h.run(&format!("pcg_n{}", n), || {
bench_pcg(n, cfg.pcg_max_iter, cfg.pcg_tol)
});
}
if let Some((n, restart, max_iter, tol)) = cfg.gmres {
h.run(&format!("gmres_n{}_r{}", n, restart), || {
bench_gmres(n, restart, max_iter, tol)
});
}
for &(ne, nd) in &cfg.assembly {
h.run(&format!("assembly_{}_elem", ne), || bench_assembly(ne, nd));
}
for &ne in &cfg.element_stiffness {
h.run(&format!("ke_stiffness_{}", ne), || {
bench_element_stiffness(ne)
});
}
let mut out = String::new();
if verbose {
for r in &h.reports {
out.push_str(&format!("{}\n", r));
}
} else {
out = format!("{} benchmarks completed", h.reports.len());
}
out
}
pub fn run_full_suite(verbose: bool) -> String {
run_suite(&SuiteConfig::full(), verbose)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tridiagonal_csr_shape() {
let a = tridiagonal_csr(5);
assert_eq!(a.nrows, 5);
assert_eq!(a.ncols, 5);
assert_eq!(a.col_indices.len(), 13);
}
#[test]
fn test_tridiagonal_csr_spmv() {
let a = tridiagonal_csr(5);
let x = vec![1.0; 5];
let mut y = vec![0.0; 5];
a.spmv(&x, &mut y);
assert!((y[0] - 1.0).abs() < 1e-10, "y[0]={}", y[0]);
assert!((y[2]).abs() < 1e-10, "y[2]={}", y[2]);
assert!((y[4] - 1.0).abs() < 1e-10, "y[4]={}", y[4]);
}
#[test]
fn test_bench_spmv_runs() {
let result = bench_spmv(100);
assert_eq!(result.n, 100);
assert!(result.mflops.is_some());
}
#[test]
fn test_bench_pcg_converges() {
let result = bench_pcg(50, 100, 1e-8);
assert!(result.note.is_some());
let note = result.note.unwrap();
assert!(note.contains("iters="), "note: {}", note);
}
#[test]
fn test_bench_gmres_runs() {
let result = bench_gmres(50, 20, 100, 1e-6);
assert_eq!(result.n, 50);
}
#[test]
fn test_bench_assembly_runs() {
let result = bench_assembly(100, 60);
assert_eq!(result.n, 100);
assert!(result.mflops.is_some());
}
#[test]
fn test_bench_element_stiffness() {
let result = bench_element_stiffness(100);
assert_eq!(result.n, 100);
}
#[test]
fn test_banded_csr() {
let a = banded_csr(10, 2);
assert_eq!(a.nrows, 10);
assert!(a.col_indices.len() > 10);
}
#[test]
fn test_bench_harness_warmup() {
let mut h = BenchHarness {
warmup: 1,
iterations: 3,
reports: Vec::new(),
};
let report = h.run("test", || bench_spmv(10));
assert_eq!(report.iterations, 3);
assert_eq!(h.reports.len(), 1);
}
#[test]
fn test_run_full_suite() {
let s = run_suite(&SuiteConfig::smoke(), false);
assert!(s.contains("benchmarks"), "output: {}", s);
}
}