#![allow(clippy::print_stdout, clippy::print_stderr)]
#![allow(clippy::needless_range_loop)]
use crate::linalg::ldl::factorize_with_par;
use crate::linalg::parallelism::solver_par_from_threads;
use crate::sparse::CscMatrix;
use std::time::Instant;
fn build_dense_psd_upper(n: usize, seed: u64) -> CscMatrix {
let mut state = seed.max(1);
let mut rows: Vec<usize> = Vec::with_capacity(n * (n + 1) / 2);
let mut cols: Vec<usize> = Vec::with_capacity(n * (n + 1) / 2);
let mut vals: Vec<f64> = Vec::with_capacity(n * (n + 1) / 2);
for j in 0..n {
let mut row_sum_abs = 0.0_f64;
for i in 0..j {
state = state.wrapping_mul(LCG_MUL).wrapping_add(LCG_ADD);
let raw = ((state >> 33) as f64) / (u32::MAX as f64);
let v = (raw - 0.5) * 2.0;
rows.push(i);
cols.push(j);
vals.push(v);
row_sum_abs += v.abs();
}
rows.push(j);
cols.push(j);
vals.push(row_sum_abs + (n as f64) * DIAG_BIAS_PER_DIM + DIAG_BIAS_FLOOR);
}
CscMatrix::from_triplets(&rows, &cols, &vals, n, n).unwrap()
}
const LCG_MUL: u64 = 6364136223846793005;
const LCG_ADD: u64 = 1442695040888963407;
const DIAG_BIAS_PER_DIM: f64 = 1.0e-2;
const DIAG_BIAS_FLOOR: f64 = 1.0;
const SOLVE_AGREEMENT_TOL: f64 = 1e-6;
fn measure_factorize_solve(mat: &CscMatrix, rhs: &[f64], par: faer::Par) -> (f64, Vec<f64>) {
let n = mat.nrows;
let t0 = Instant::now();
let factor = factorize_with_par(mat, par).expect("factorize_with_par failed");
let mut sol = vec![0.0_f64; n];
factor.solve(rhs, &mut sol);
(t0.elapsed().as_secs_f64(), sol)
}
fn max_abs_diff(a: &[f64], b: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).abs())
.fold(0.0_f64, f64::max)
}
#[test]
fn faer_par_speedup_supernodal_dense() {
let n = SPEEDUP_TEST_N;
let mat = build_dense_psd_upper(n, 0xCAFE_F00D_DEAD_BEEFu64);
let rhs: Vec<f64> = (0..n).map(|i| ((i % 7) as f64) - 3.5).collect();
let par8 = solver_par_from_threads(8);
let encoded = par_thread_count(par8);
assert_eq!(
encoded, 8,
"solver_par_from_threads(8) encodes {encoded} threads; expected 8 (helper no-op?)"
);
let (_, seq_sol) = measure_factorize_solve(&mat, &rhs, faer::Par::Seq);
let (_, par_sol) = measure_factorize_solve(&mat, &rhs, par8);
let diff = max_abs_diff(&seq_sol, &par_sol);
eprintln!(
"[dense PSD n={n}] threads={encoded} seq/par max_diff={:.3e}",
diff
);
assert!(
diff < SOLVE_AGREEMENT_TOL,
"seq/par sol mismatch: max_diff={:.3e} > {:.3e}",
diff,
SOLVE_AGREEMENT_TOL
);
}
const SPEEDUP_TEST_N: usize = 1500;
#[test]
fn faer_par_speedup_arrowhead_pd() {
let n = SPEEDUP_TEST_N;
let mat = build_arrowhead_pd_upper(n, ARROWHEAD_TIP_COLS, 0xDEAD_BEEF_BADC_0FFEu64);
let rhs: Vec<f64> = (0..n).map(|i| ((i % 5) as f64) - 2.0).collect();
let par8 = solver_par_from_threads(8);
let encoded = par_thread_count(par8);
assert_eq!(
encoded, 8,
"solver_par_from_threads(8) encodes {encoded} threads; expected 8 (helper no-op?)"
);
let (_, seq_sol) = measure_factorize_solve(&mat, &rhs, faer::Par::Seq);
let (_, par_sol) = measure_factorize_solve(&mat, &rhs, par8);
let diff = max_abs_diff(&seq_sol, &par_sol);
eprintln!(
"[arrowhead n={n} tip={}] threads={encoded} seq/par max_diff={:.3e}",
ARROWHEAD_TIP_COLS, diff
);
assert!(
diff < SOLVE_AGREEMENT_TOL,
"arrowhead seq/par mismatch: max_diff={:.3e}",
diff
);
}
fn build_arrowhead_pd_upper(n: usize, tip: usize, seed: u64) -> CscMatrix {
let mut state = seed.max(1);
let mut rows: Vec<usize> = Vec::new();
let mut cols: Vec<usize> = Vec::new();
let mut vals: Vec<f64> = Vec::new();
for j in 0..n {
if j < tip {
let mut sum_abs = 0.0;
for i in 0..j {
state = state.wrapping_mul(LCG_MUL).wrapping_add(LCG_ADD);
let raw = ((state >> 33) as f64) / (u32::MAX as f64);
let v = (raw - 0.5) * 2.0;
rows.push(i);
cols.push(j);
vals.push(v);
sum_abs += v.abs();
}
rows.push(j);
cols.push(j);
vals.push(sum_abs + (n as f64) * DIAG_BIAS_PER_DIM + DIAG_BIAS_FLOOR);
} else {
let mut sum_abs = 0.0;
for i in 0..tip {
state = state.wrapping_mul(LCG_MUL).wrapping_add(LCG_ADD);
let raw = ((state >> 33) as f64) / (u32::MAX as f64);
let v = (raw - 0.5) * 2.0;
rows.push(i);
cols.push(j);
vals.push(v);
sum_abs += v.abs();
}
rows.push(j);
cols.push(j);
vals.push(sum_abs + (n as f64) * DIAG_BIAS_PER_DIM + DIAG_BIAS_FLOOR);
}
}
CscMatrix::from_triplets(&rows, &cols, &vals, n, n).unwrap()
}
const ARROWHEAD_TIP_COLS: usize = 200;
fn par_thread_count(par: faer::Par) -> usize {
match par {
faer::Par::Seq => 1,
faer::Par::Rayon(n) => n.get(),
}
}
fn rayon_pool_active_count(n: usize) -> usize {
rayon::ThreadPoolBuilder::new()
.num_threads(n)
.build()
.expect("ThreadPool build")
.install(rayon::current_num_threads)
}
#[test]
fn faer_par_table_driven_size_sweep() {
let par8 = solver_par_from_threads(8);
let encoded = par_thread_count(par8);
assert_eq!(
encoded, 8,
"solver_par_from_threads(8) encodes {encoded} threads; expected 8 (helper no-op?)"
);
assert_eq!(
rayon_pool_active_count(encoded),
encoded,
"rayon pool with {encoded} threads reports wrong active count"
);
let cases: &[(&str, usize)] = &[
("small_n80", 80),
("mid_n800", 800),
("big_n1500", SPEEDUP_TEST_N),
];
for &(label, n) in cases {
let mat = build_dense_psd_upper(n, 0xAA00_BB11_CC22_DD33u64.wrapping_add(n as u64));
let rhs: Vec<f64> = (0..n).map(|i| ((i % 3) as f64) - 1.0).collect();
let (_, seq_sol) = measure_factorize_solve(&mat, &rhs, faer::Par::Seq);
let (_, par_sol) = measure_factorize_solve(&mat, &rhs, par8);
let diff = max_abs_diff(&seq_sol, &par_sol);
eprintln!(
"[{label}] n={n} threads={encoded} seq/par max_diff={:.3e}",
diff
);
assert!(
diff < SOLVE_AGREEMENT_TOL,
"{label} (n={n}): seq/par solution mismatch max_diff={:.3e} >= {:.3e}",
diff,
SOLVE_AGREEMENT_TOL
);
}
}
#[test]
fn faer_par_threads_sweep_monotone() {
let n = SPEEDUP_TEST_N;
let mat = build_dense_psd_upper(n, 0x1234_5678_9ABC_DEF0u64);
let rhs: Vec<f64> = (0..n).map(|i| (i % 11) as f64).collect();
let (_, ref_sol) = measure_factorize_solve(&mat, &rhs, faer::Par::Seq);
for &t in &[1_usize, 2, 4, 8] {
let par = solver_par_from_threads(t);
let encoded = par_thread_count(par);
let expected = t.max(1);
assert_eq!(
encoded, expected,
"solver_par_from_threads({t}) encodes {encoded} threads; expected {expected}"
);
if t >= 2 {
assert_eq!(
rayon_pool_active_count(encoded),
encoded,
"rayon pool with {encoded} threads (t={t}) reports wrong active count"
);
}
let (_, sol) = measure_factorize_solve(&mat, &rhs, par);
let diff = max_abs_diff(&ref_sol, &sol);
eprintln!(
"[sweep n={n}] threads={t} encoded={encoded} sol_diff={:.3e}",
diff
);
assert!(
diff < SOLVE_AGREEMENT_TOL,
"sweep t={t}: solution diverges from Par::Seq: max_diff={:.3e} >= {:.3e}",
diff,
SOLVE_AGREEMENT_TOL
);
}
}