use crate::dense::factor::{
factor, factor_frontal_blocked_in_place, BunchKaufmanParams, FrontalFactors,
};
use crate::dense::matrix::SymmetricMatrix;
use crate::error::FeralError;
use crate::inertia::Inertia;
use crate::scaling::{compute_scaling_dense_fast, compute_scaling_with_cache, ScalingStrategy};
use crate::sparse::csc::CscMatrix;
use crate::symbolic::SymbolicFactorization;
use std::sync::{Arc, Mutex};
use std::time::Instant;
#[derive(Debug, Clone)]
pub struct NumericParams {
pub bk: BunchKaufmanParams,
pub scaling: ScalingStrategy,
pub small_leaf: SmallLeafBatch,
pub profiler: Option<Arc<Mutex<Profiler>>>,
pub parallel_telemetry: Option<Arc<AtomicLockStats>>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum SmallLeafBatch {
#[default]
Off,
On,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct SupernodeTiming {
pub snode_idx: usize,
pub nrow: usize,
pub ncol: usize,
pub us: u64,
}
#[derive(Debug, Clone, Default)]
pub struct Profiler {
timings: Vec<SupernodeTiming>,
prologue_us: u64,
epilogue_us: u64,
total_us: u64,
}
impl Profiler {
pub fn new() -> Self {
Self::default()
}
pub fn len(&self) -> usize {
self.timings.len()
}
pub fn is_empty(&self) -> bool {
self.timings.is_empty()
}
pub fn timings(&self) -> &[SupernodeTiming] {
&self.timings
}
pub fn report(&self) -> ProfileReport {
const RANGES: &[(&str, usize, usize)] = &[
("<=8", 0, 8),
("9-16", 9, 16),
("17-32", 17, 32),
("33-64", 33, 64),
("65-128", 65, 128),
(">128", 129, usize::MAX),
];
let mut buckets: Vec<BucketStats> = RANGES
.iter()
.map(|&(range, _, _)| BucketStats {
range,
count: 0,
sum_us: 0,
pct_of_total: 0.0,
avg_us: 0.0,
})
.collect();
for t in &self.timings {
for (i, &(_, lo, hi)) in RANGES.iter().enumerate() {
if t.nrow >= lo && t.nrow <= hi {
buckets[i].count += 1;
buckets[i].sum_us += t.us;
break;
}
}
}
let loop_us: u64 = buckets.iter().map(|b| b.sum_us).sum();
let mut warnings: Vec<String> = Vec::new();
let count_sum: usize = buckets.iter().map(|b| b.count).sum();
if count_sum != self.timings.len() {
warnings.push(format!(
"bucket count sum {} != timings len {}",
count_sum,
self.timings.len()
));
}
if self.total_us > 0 && loop_us + self.prologue_us + self.epilogue_us > self.total_us {
warnings.push(format!(
"loop+prologue+epilogue ({}) exceeds total ({})",
loop_us + self.prologue_us + self.epilogue_us,
self.total_us
));
}
for b in &mut buckets {
if loop_us > 0 {
b.pct_of_total = (b.sum_us as f64) * 100.0 / (loop_us as f64);
}
if b.count > 0 {
b.avg_us = (b.sum_us as f64) / (b.count as f64);
}
}
let overhead_pct = if self.total_us > 0 {
((self.prologue_us + self.epilogue_us) as f64) * 100.0 / (self.total_us as f64)
} else {
0.0
};
ProfileReport {
n_supernodes: self.timings.len(),
prologue_us: self.prologue_us,
epilogue_us: self.epilogue_us,
loop_us,
total_us: self.total_us,
overhead_pct,
buckets,
validation_warnings: warnings,
}
}
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct BucketStats {
pub range: &'static str,
pub count: usize,
pub sum_us: u64,
pub pct_of_total: f64,
pub avg_us: f64,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ProfileReport {
pub n_supernodes: usize,
pub prologue_us: u64,
pub epilogue_us: u64,
pub loop_us: u64,
pub total_us: u64,
pub overhead_pct: f64,
pub buckets: Vec<BucketStats>,
pub validation_warnings: Vec<String>,
}
impl Default for NumericParams {
fn default() -> Self {
Self {
bk: BunchKaufmanParams {
pivot_threshold: 1e-8,
..BunchKaufmanParams::default()
},
scaling: ScalingStrategy::default(),
small_leaf: SmallLeafBatch::default(),
profiler: None,
parallel_telemetry: None,
}
}
}
impl NumericParams {
pub fn with_bk(bk: BunchKaufmanParams) -> Self {
Self {
bk,
scaling: ScalingStrategy::default(),
small_leaf: SmallLeafBatch::default(),
profiler: None,
parallel_telemetry: None,
}
}
}
#[derive(Default, Debug)]
pub struct AtomicLockStats {
pub contrib_wait_ns: std::sync::atomic::AtomicU64,
pub contrib_hold_ns: std::sync::atomic::AtomicU64,
pub node_factors_wait_ns: std::sync::atomic::AtomicU64,
pub node_factors_hold_ns: std::sync::atomic::AtomicU64,
pub factor_body_ns: std::sync::atomic::AtomicU64,
pub task_wall_ns: std::sync::atomic::AtomicU64,
pub ws_lock_wait_ns: std::sync::atomic::AtomicU64,
pub n_tasks: std::sync::atomic::AtomicU64,
pub phase_scaling_ns: std::sync::atomic::AtomicU64,
pub phase_permute_ns: std::sync::atomic::AtomicU64,
pub phase_symmetric_pattern_ns: std::sync::atomic::AtomicU64,
pub phase_tree_setup_ns: std::sync::atomic::AtomicU64,
pub phase_thread_ws_ns: std::sync::atomic::AtomicU64,
pub phase_leaves_ns: std::sync::atomic::AtomicU64,
pub phase_scope_ns: std::sync::atomic::AtomicU64,
pub phase_collect_ns: std::sync::atomic::AtomicU64,
}
#[derive(Default, Debug, Clone, Copy)]
pub struct ParallelLockStats {
pub contrib_wait_ns: u64,
pub contrib_hold_ns: u64,
pub node_factors_wait_ns: u64,
pub node_factors_hold_ns: u64,
pub factor_body_ns: u64,
pub task_wall_ns: u64,
pub ws_lock_wait_ns: u64,
pub n_tasks: u64,
pub phase_scaling_ns: u64,
pub phase_permute_ns: u64,
pub phase_symmetric_pattern_ns: u64,
pub phase_tree_setup_ns: u64,
pub phase_thread_ws_ns: u64,
pub phase_leaves_ns: u64,
pub phase_scope_ns: u64,
pub phase_collect_ns: u64,
}
impl AtomicLockStats {
pub fn snapshot(&self) -> ParallelLockStats {
use std::sync::atomic::Ordering;
ParallelLockStats {
contrib_wait_ns: self.contrib_wait_ns.load(Ordering::Relaxed),
contrib_hold_ns: self.contrib_hold_ns.load(Ordering::Relaxed),
node_factors_wait_ns: self.node_factors_wait_ns.load(Ordering::Relaxed),
node_factors_hold_ns: self.node_factors_hold_ns.load(Ordering::Relaxed),
factor_body_ns: self.factor_body_ns.load(Ordering::Relaxed),
task_wall_ns: self.task_wall_ns.load(Ordering::Relaxed),
ws_lock_wait_ns: self.ws_lock_wait_ns.load(Ordering::Relaxed),
n_tasks: self.n_tasks.load(Ordering::Relaxed),
phase_scaling_ns: self.phase_scaling_ns.load(Ordering::Relaxed),
phase_permute_ns: self.phase_permute_ns.load(Ordering::Relaxed),
phase_symmetric_pattern_ns: self.phase_symmetric_pattern_ns.load(Ordering::Relaxed),
phase_tree_setup_ns: self.phase_tree_setup_ns.load(Ordering::Relaxed),
phase_thread_ws_ns: self.phase_thread_ws_ns.load(Ordering::Relaxed),
phase_leaves_ns: self.phase_leaves_ns.load(Ordering::Relaxed),
phase_scope_ns: self.phase_scope_ns.load(Ordering::Relaxed),
phase_collect_ns: self.phase_collect_ns.load(Ordering::Relaxed),
}
}
}
#[derive(Debug, Clone)]
pub struct SchurBlock {
pub dim: usize,
pub data: Vec<f64>,
}
impl SchurBlock {
#[inline]
pub fn get(&self, i: usize, j: usize) -> f64 {
self.data[j * self.dim + i]
}
pub fn symv(&self, x: &[f64], y: &mut [f64]) -> Result<(), FeralError> {
if x.len() != self.dim || y.len() != self.dim {
return Err(FeralError::DimensionMismatch {
expected: self.dim,
got: x.len().max(y.len()),
});
}
for yi in y.iter_mut() {
*yi = 0.0;
}
for (j, &xj) in x.iter().enumerate().take(self.dim) {
let col = &self.data[j * self.dim..(j + 1) * self.dim];
for (i, &v) in col.iter().enumerate() {
y[i] += v * xj;
}
}
Ok(())
}
pub fn solve(&self, rhs: &[f64]) -> Result<Vec<f64>, FeralError> {
self.solve_with(rhs, &BunchKaufmanParams::default())
}
pub fn solve_with(
&self,
rhs: &[f64],
params: &BunchKaufmanParams,
) -> Result<Vec<f64>, FeralError> {
if rhs.len() != self.dim {
return Err(FeralError::DimensionMismatch {
expected: self.dim,
got: rhs.len(),
});
}
let s_mat = SymmetricMatrix::from_column_major(self.dim, self.data.clone())?;
let (factors, _inertia) = factor(&s_mat, params)?;
crate::dense::solve::solve(&factors, rhs)
}
}
#[derive(Debug)]
pub struct SparseFactors {
pub n: usize,
pub perm: Vec<usize>,
pub perm_inv: Vec<usize>,
pub node_factors: Vec<NodeFactors>,
pub needs_refinement: bool,
pub scaling: Vec<f64>,
pub scaling_info: crate::scaling::ScalingInfo,
pub resolved_method: crate::symbolic::OrderingMethod,
pub resolved_amalgamation: crate::symbolic::AmalgamationStrategy,
pub resolved_preprocess: crate::symbolic::OrderingPreprocess,
}
impl SparseFactors {
pub fn summary(&self) -> String {
let mut n_2x2 = 0usize;
let mut n_delayed = 0usize;
let mut nnz_l = 0usize;
let mut inertia = crate::inertia::Inertia::new(0, 0, 0);
for nf in &self.node_factors {
let ff = &nf.frontal_factors;
n_delayed += ff.n_delayed;
let trailing = ff.nrow.saturating_sub(ff.nelim) * ff.nelim;
nnz_l += ff.nelim * (ff.nelim + 1) / 2 + trailing;
let nelim = ff.nelim;
let mut k = 0;
while k < nelim {
let two_by_two = k + 1 < nelim && ff.d_subdiag[k] != 0.0;
if two_by_two {
n_2x2 += 1;
k += 2;
} else {
k += 1;
}
}
inertia.positive += nf.inertia.positive;
inertia.negative += nf.inertia.negative;
inertia.zero += nf.inertia.zero;
}
format!(
"n={} | ord={:?} | amalg={:?} | preproc={:?} | scaling={:?} | n_supernodes={} | nnz_L={} | n_2x2={} | n_delayed={} | inertia=({},{},{})",
self.n,
self.resolved_method,
self.resolved_amalgamation,
self.resolved_preprocess,
self.scaling_info,
self.node_factors.len(),
nnz_l,
n_2x2,
n_delayed,
inertia.positive,
inertia.negative,
inertia.zero,
)
}
pub fn factor_nnz(&self) -> usize {
self.node_factors
.iter()
.map(|nf| {
let nrow = nf.frontal_factors.nrow;
let nelim = nf.frontal_factors.nelim;
let trailing = nrow.saturating_sub(nelim) * nelim;
let eliminated_lower_with_diag = nelim * (nelim + 1) / 2;
eliminated_lower_with_diag + trailing
})
.sum()
}
pub fn min_diagonal(&self) -> Option<f64> {
let mut min_d = f64::INFINITY;
let mut any = false;
for nf in &self.node_factors {
let ff = &nf.frontal_factors;
let nelim = ff.nelim;
let mut k = 0;
while k < nelim {
let two_by_two = k + 1 < nelim && ff.d_subdiag[k] != 0.0;
let eig = if two_by_two {
let a = ff.d_diag[k];
let b = ff.d_subdiag[k];
let c = ff.d_diag[k + 1];
let trace = a + c;
let det = a * c - b * b;
let disc = (trace * trace - 4.0 * det).max(0.0).sqrt();
(trace - disc) * 0.5
} else {
ff.d_diag[k]
};
if eig < min_d {
min_d = eig;
}
any = true;
k += if two_by_two { 2 } else { 1 };
}
}
if any {
Some(min_d)
} else {
None
}
}
}
#[derive(Debug)]
pub struct NodeFactors {
pub first_col: usize,
pub ncol: usize,
pub nelim: usize,
pub n_delayed_in: usize,
pub nrow: usize,
pub row_indices: Vec<usize>,
pub frontal_factors: FrontalFactors,
pub inertia: Inertia,
}
#[derive(Debug, Default)]
pub struct FactorWorkspace {
row_map: Vec<usize>,
frontal_values: Vec<f64>,
build_delayed: Vec<usize>,
build_trailing: Vec<usize>,
build_seen: Vec<bool>,
dense_values: Vec<f64>,
local_contribs: Vec<Option<ContribBlock>>,
}
impl FactorWorkspace {
pub fn new() -> Self {
Self::default()
}
}
#[inline]
pub fn should_use_dense_fast_path(n: usize, nnz_lower: usize) -> bool {
const N_TINY: usize = 16;
const N_MAX: usize = 128;
const RHO_NUM: usize = 1;
const RHO_DEN: usize = 4;
if n == 0 {
return false;
}
if n <= N_TINY {
return true;
}
if n > N_MAX {
return false;
}
let lower_cells = n * (n + 1) / 2;
nnz_lower * RHO_DEN >= lower_cells * RHO_NUM
}
pub fn dense_fast_factor(
matrix: &CscMatrix,
params: &NumericParams,
) -> Result<(SparseFactors, Inertia), FeralError> {
let mut ws = FactorWorkspace::new();
dense_fast_factor_with_workspace(matrix, params, &mut ws)
}
pub fn dense_fast_factor_with_workspace(
matrix: &CscMatrix,
params: &NumericParams,
ws: &mut FactorWorkspace,
) -> Result<(SparseFactors, Inertia), FeralError> {
let n = matrix.n;
if n == 0 {
return Err(FeralError::InvalidInput(
"dense_fast_factor: matrix dimension is zero".to_string(),
));
}
let dense_buf = std::mem::take(&mut ws.dense_values);
let mut sym = matrix.to_dense_into(dense_buf);
let (scaling, scaling_info) = compute_scaling_dense_fast(matrix, &sym, ¶ms.scaling)?;
if let crate::scaling::ScalingInfo::PartialSingular { n_unmatched } = &scaling_info {
eprintln!(
"warning: MC64 matching left {} of {} variables unmatched; \
scaling is identity on those rows/columns",
n_unmatched, n
);
}
for (j, &s_j) in scaling.iter().enumerate() {
let col = j * n;
for (i, &s_i) in scaling.iter().enumerate().skip(j) {
sym.data[col + i] *= s_i * s_j;
}
}
let ff = factor_frontal_blocked_in_place(&mut sym, n, false, ¶ms.bk)?;
ws.dense_values = sym.data;
let inertia = ff.inertia.clone();
let needs_refinement = ff.needs_refinement;
let perm: Vec<usize> = (0..n).collect();
let perm_inv: Vec<usize> = (0..n).collect();
let row_indices: Vec<usize> = (0..n).collect();
let node = NodeFactors {
first_col: 0,
ncol: n,
nelim: ff.nelim,
n_delayed_in: 0,
nrow: n,
row_indices,
frontal_factors: ff,
inertia: inertia.clone(),
};
Ok((
SparseFactors {
n,
perm,
perm_inv,
node_factors: vec![node],
needs_refinement,
scaling,
scaling_info,
resolved_method: crate::symbolic::OrderingMethod::Amd,
resolved_amalgamation: crate::symbolic::AmalgamationStrategy::Adjacency,
resolved_preprocess: crate::symbolic::OrderingPreprocess::None,
},
inertia,
))
}
pub fn factorize_multifrontal_supernodal(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
) -> Result<(SparseFactors, Inertia), FeralError> {
let mut ws = FactorWorkspace::new();
factorize_multifrontal_supernodal_with_workspace(matrix, symbolic, params, &mut ws)
}
pub fn factorize_multifrontal(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
) -> Result<(SparseFactors, Inertia), FeralError> {
let mut ws = FactorWorkspace::new();
factorize_multifrontal_with_workspace(matrix, symbolic, params, &mut ws)
}
pub fn factorize_multifrontal_with_schur(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
) -> Result<(SparseFactors, Inertia, SchurBlock), FeralError> {
let n_schur = symbolic.is_schur_tail.ok_or_else(|| {
FeralError::InvalidInput(
"factorize_multifrontal_with_schur requires symbolic produced by \
symbolic_factorize_with_schur (is_schur_tail is None)"
.to_string(),
)
})?;
if n_schur == 0 {
return Err(FeralError::InvalidInput(
"is_schur_tail = Some(0); use factorize_multifrontal instead".to_string(),
));
}
let n = symbolic.n;
let n_snodes = symbolic.supernodes.len();
let mut nvschur_per_snode = vec![0usize; n_snodes];
let schur_lo = n - n_schur;
for (s, snode) in symbolic.supernodes.iter().enumerate() {
let col_lo = snode.first_col;
let col_hi = col_lo + snode.ncol();
if col_hi <= schur_lo || col_lo >= n {
continue;
}
let lo = col_lo.max(schur_lo);
let hi = col_hi.min(n);
nvschur_per_snode[s] = hi - lo;
}
let last_snode = n_snodes
.checked_sub(1)
.ok_or_else(|| FeralError::InvalidInput("symbolic has zero supernodes".to_string()))?;
let last = &symbolic.supernodes[last_snode];
if last.first_col + last.ncol() != n {
return Err(FeralError::InvalidInput(
"Schur path expects last supernode to end at n-1".to_string(),
));
}
if nvschur_per_snode[last_snode] != n_schur {
return Err(FeralError::InvalidInput(format!(
"F3.2b scope: Schur tail must lie in a single root supernode \
(last snode covers {} of {} Schur columns); see \
dev/research/schur-complement.md F3.3",
nvschur_per_snode[last_snode], n_schur
)));
}
for &k in &nvschur_per_snode[..last_snode] {
debug_assert_eq!(k, 0, "nvschur > 0 outside last snode violates F3.2b scope");
}
let mut ws = FactorWorkspace::new();
factorize_multifrontal_with_schur_inner(matrix, symbolic, params, &mut ws, &nvschur_per_snode)
}
fn factorize_multifrontal_with_schur_inner(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
ws: &mut FactorWorkspace,
nvschur_per_snode: &[usize],
) -> Result<(SparseFactors, Inertia, SchurBlock), FeralError> {
let n = symbolic.n;
let n_snodes = symbolic.supernodes.len();
ws.row_map.clear();
ws.row_map.resize(n, usize::MAX);
let (scaling_user, scaling_info) = crate::scaling::compute_scaling(matrix, ¶ms.scaling)?;
let scaling_pivot_order: Vec<f64> =
symbolic.perm.iter().map(|&old| scaling_user[old]).collect();
let permuted = permute_csc_values(matrix, &symbolic.perm, &symbolic.perm_inv)?;
let full_pattern = permuted.symmetric_pattern();
let mut is_root = vec![true; n_snodes];
for snode in &symbolic.supernodes {
for &child_idx in &snode.children {
if child_idx < n_snodes {
is_root[child_idx] = false;
}
}
}
let mut contrib_blocks: Vec<Option<ContribBlock>> = (0..n_snodes).map(|_| None).collect();
let mut node_factors: Vec<NodeFactors> = Vec::with_capacity(n_snodes);
let mut total_inertia = Inertia {
positive: 0,
negative: 0,
zero: 0,
};
let mut needs_refinement = false;
for (snode_idx, &nvschur) in nvschur_per_snode.iter().enumerate() {
let node = factor_one_supernode(
snode_idx,
symbolic,
&permuted,
&full_pattern,
&scaling_pivot_order,
&is_root,
params,
ws,
&mut contrib_blocks,
nvschur,
)?;
total_inertia.positive += node.inertia.positive;
total_inertia.negative += node.inertia.negative;
total_inertia.zero += node.inertia.zero;
if node.frontal_factors.needs_refinement {
needs_refinement = true;
}
node_factors.push(node);
}
let n_schur = nvschur_per_snode[n_snodes - 1];
debug_assert!(n_schur > 0);
let contrib = contrib_blocks[n_snodes - 1].take().ok_or_else(|| {
FeralError::InvalidInput(
"Schur path: root supernode produced no contribution block".to_string(),
)
})?;
if contrib.dim < n_schur {
return Err(FeralError::InvalidInput(format!(
"Schur extraction: root contrib dim {} < n_schur {}",
contrib.dim, n_schur
)));
}
let mut out = vec![0.0f64; n_schur * n_schur];
for j in 0..n_schur {
for i in 0..n_schur {
let val = if i >= j {
contrib.data[j * contrib.dim + i]
} else {
contrib.data[i * contrib.dim + j]
};
out[j * n_schur + i] = val;
}
}
let factors = SparseFactors {
n,
perm: symbolic.perm.clone(),
perm_inv: symbolic.perm_inv.clone(),
node_factors,
needs_refinement,
scaling: scaling_user,
scaling_info,
resolved_method: symbolic.resolved_method,
resolved_amalgamation: symbolic.resolved_amalgamation,
resolved_preprocess: symbolic.resolved_preprocess,
};
let schur = SchurBlock {
dim: n_schur,
data: out,
};
Ok((factors, total_inertia, schur))
}
pub fn factorize_multifrontal_with_workspace(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
ws: &mut FactorWorkspace,
) -> Result<(SparseFactors, Inertia), FeralError> {
if should_use_dense_fast_path(matrix.n, matrix.row_idx.len()) {
return dense_fast_factor_with_workspace(matrix, params, ws);
}
factorize_multifrontal_supernodal_with_workspace(matrix, symbolic, params, ws)
}
pub fn factorize_multifrontal_supernodal_with_workspace(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
ws: &mut FactorWorkspace,
) -> Result<(SparseFactors, Inertia), FeralError> {
let t_total = params.profiler.as_ref().map(|_| Instant::now());
let t_prologue = params.profiler.as_ref().map(|_| Instant::now());
let n = symbolic.n;
let n_snodes = symbolic.supernodes.len();
ws.row_map.clear();
ws.row_map.resize(n, usize::MAX);
let (scaling_user, scaling_info) =
compute_scaling_with_cache(matrix, ¶ms.scaling, symbolic.cached_mc64.as_ref())?;
if let crate::scaling::ScalingInfo::PartialSingular { n_unmatched } = &scaling_info {
eprintln!(
"warning: MC64 matching left {} of {} variables unmatched; \
scaling is identity on those rows/columns",
n_unmatched, n
);
}
let scaling_pivot_order: Vec<f64> =
symbolic.perm.iter().map(|&old| scaling_user[old]).collect();
debug_assert_eq!(scaling_pivot_order.len(), n);
let permuted = permute_csc_values(matrix, &symbolic.perm, &symbolic.perm_inv)?;
let full_pattern = permuted.symmetric_pattern();
let mut is_root = vec![true; n_snodes];
for snode in &symbolic.supernodes {
for &child_idx in &snode.children {
if child_idx < n_snodes {
is_root[child_idx] = false;
}
}
}
let mut contrib_blocks: Vec<Option<ContribBlock>> = (0..n_snodes).map(|_| None).collect();
let mut node_factors: Vec<NodeFactors> = Vec::with_capacity(n_snodes);
let mut total_inertia = Inertia {
positive: 0,
negative: 0,
zero: 0,
};
let mut needs_refinement = false;
let use_small_leaf =
params.small_leaf == SmallLeafBatch::On && !symbolic.small_leaf_groups.is_empty();
let mut snode_idx = 0usize;
let prologue_us = t_prologue.map(|t| t.elapsed().as_micros() as u64);
while snode_idx < n_snodes {
if use_small_leaf {
if let Some(gid) = symbolic.snode_group[snode_idx] {
let group = &symbolic.small_leaf_groups[gid];
debug_assert_eq!(
group.members.first(),
Some(&snode_idx),
"group members must start at current snode_idx"
);
for (i, &m) in group.members.iter().enumerate() {
let t_snode = params.profiler.as_ref().map(|_| Instant::now());
let node = factor_one_small_leaf(
m,
&group.member_rows[i],
symbolic,
&permuted,
&scaling_pivot_order,
&is_root,
params,
ws,
&mut contrib_blocks,
)?;
if let (Some(arc), Some(t)) = (params.profiler.as_ref(), t_snode) {
let snode = &symbolic.supernodes[m];
let timing = SupernodeTiming {
snode_idx: m,
nrow: snode.nrow,
ncol: snode.ncol,
us: t.elapsed().as_micros() as u64,
};
if let Ok(mut prof) = arc.lock() {
prof.timings.push(timing);
}
}
total_inertia.positive += node.inertia.positive;
total_inertia.negative += node.inertia.negative;
total_inertia.zero += node.inertia.zero;
if node.frontal_factors.needs_refinement {
needs_refinement = true;
}
node_factors.push(node);
}
snode_idx += group.members.len();
continue;
}
}
let t_snode = params.profiler.as_ref().map(|_| Instant::now());
let node = factor_one_supernode(
snode_idx,
symbolic,
&permuted,
&full_pattern,
&scaling_pivot_order,
&is_root,
params,
ws,
&mut contrib_blocks,
0, )?;
if let (Some(arc), Some(t)) = (params.profiler.as_ref(), t_snode) {
let snode = &symbolic.supernodes[snode_idx];
let timing = SupernodeTiming {
snode_idx,
nrow: snode.nrow,
ncol: snode.ncol,
us: t.elapsed().as_micros() as u64,
};
if let Ok(mut prof) = arc.lock() {
prof.timings.push(timing);
}
}
total_inertia.positive += node.inertia.positive;
total_inertia.negative += node.inertia.negative;
total_inertia.zero += node.inertia.zero;
if node.frontal_factors.needs_refinement {
needs_refinement = true;
}
node_factors.push(node);
snode_idx += 1;
}
let t_epilogue = params.profiler.as_ref().map(|_| Instant::now());
let result = Ok((
SparseFactors {
n,
perm: symbolic.perm.clone(),
perm_inv: symbolic.perm_inv.clone(),
node_factors,
needs_refinement,
scaling: scaling_user,
scaling_info,
resolved_method: symbolic.resolved_method,
resolved_amalgamation: symbolic.resolved_amalgamation,
resolved_preprocess: symbolic.resolved_preprocess,
},
total_inertia,
));
if let Some(arc) = params.profiler.as_ref() {
if let Ok(mut prof) = arc.lock() {
prof.prologue_us = prologue_us.unwrap_or(0);
prof.epilogue_us = t_epilogue
.map(|t| t.elapsed().as_micros() as u64)
.unwrap_or(0);
prof.total_us = t_total.map(|t| t.elapsed().as_micros() as u64).unwrap_or(0);
}
}
result
}
#[allow(clippy::too_many_arguments)]
fn factor_one_supernode(
snode_idx: usize,
symbolic: &SymbolicFactorization,
permuted: &CscMatrix,
full_pattern: &crate::sparse::csc::CscPattern,
scaling_pivot_order: &[f64],
is_root: &[bool],
params: &NumericParams,
ws: &mut FactorWorkspace,
contrib_blocks: &mut [Option<ContribBlock>],
nvschur: usize,
) -> Result<NodeFactors, FeralError> {
let snode = &symbolic.supernodes[snode_idx];
let own_ncol = snode.ncol();
let nrow = snode.nrow;
if nrow == 0 || own_ncol == 0 {
return Ok(NodeFactors {
first_col: snode.first_col,
ncol: 0,
nelim: 0,
n_delayed_in: 0,
nrow: 0,
row_indices: Vec::new(),
frontal_factors: FrontalFactors {
nrow: 0,
ncol: 0,
nelim: 0,
l: Vec::new(),
d_diag: Vec::new(),
d_subdiag: Vec::new(),
perm: Vec::new(),
perm_inv: Vec::new(),
contrib: Vec::new(),
contrib_dim: 0,
n_delayed: 0,
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
needs_refinement: false,
n_rook_rescues: 0,
zero_tol: params.bk.zero_tol,
zero_tol_2x2: params.bk.zero_tol_2x2,
},
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
});
}
let n_delayed_in: usize = snode
.children
.iter()
.filter_map(|&c| contrib_blocks[c].as_ref())
.map(|c| c.n_delayed)
.sum();
let expanded_ncol = own_ncol + n_delayed_in;
let mut row_indices = build_row_indices(
snode,
full_pattern,
contrib_blocks,
&mut ws.build_delayed,
&mut ws.build_trailing,
&mut ws.build_seen,
);
let actual_nrow = row_indices.len();
debug_assert!(
actual_nrow >= expanded_ncol,
"row_indices ({}) must cover the expanded fully-summed block ({})",
actual_nrow,
expanded_ncol
);
#[cfg(debug_assertions)]
{
let first_col = snode.first_col;
let own_last = first_col + own_ncol;
for (pos, &r) in row_indices.iter().enumerate().skip(expanded_ncol) {
debug_assert!(
r >= own_last,
"trailing row {} at position {} of supernode (first_col={}, own_ncol={}, expanded_ncol={}) is < first_col+own_ncol={}",
r, pos, first_col, own_ncol, expanded_ncol, own_last
);
}
}
let own_col_offset = if nvschur > 0 && n_delayed_in > 0 {
let mut swapped = Vec::with_capacity(actual_nrow);
swapped.extend_from_slice(&row_indices[own_ncol..expanded_ncol]);
swapped.extend_from_slice(&row_indices[..own_ncol]);
swapped.extend_from_slice(&row_indices[expanded_ncol..]);
row_indices = swapped;
n_delayed_in
} else {
0
};
for (local, &global) in row_indices.iter().enumerate() {
ws.row_map[global] = local;
}
let scaling = scaling_pivot_order;
let frontal_buf = std::mem::take(&mut ws.frontal_values);
let mut frontal = SymmetricMatrix::from_pooled_buf(actual_nrow, frontal_buf);
for (k_local, &gj) in row_indices[own_col_offset..own_col_offset + own_ncol]
.iter()
.enumerate()
{
let local_j = own_col_offset + k_local;
let s_j = scaling[gj];
for k in permuted.col_ptr[gj]..permuted.col_ptr[gj + 1] {
let gi = permuted.row_idx[k];
let local_i = ws.row_map[gi];
if local_i != usize::MAX {
let val = permuted.values[k] * scaling[gi] * s_j;
frontal.set(local_i, local_j, val);
}
}
}
for &child_idx in &snode.children {
if let Some(contrib) = contrib_blocks[child_idx].take() {
extend_add(&contrib, &ws.row_map, &mut frontal);
}
}
debug_assert!(
nvschur == 0 || is_root[snode_idx],
"nvschur > 0 only valid at root supernodes (Schur tail invariant)"
);
debug_assert!(nvschur <= expanded_ncol);
let may_delay = !is_root[snode_idx];
let eliminable = expanded_ncol - nvschur;
let mut ff = factor_frontal_blocked_in_place(&mut frontal, eliminable, may_delay, ¶ms.bk)?;
ws.frontal_values = frontal.data;
let node_inertia = ff.inertia.clone();
let node_nelim = ff.nelim;
let node_n_delayed = ff.n_delayed;
if ff.contrib_dim > 0 {
let cdim = ff.contrib_dim;
let mut contrib_row_indices = Vec::with_capacity(cdim);
for cj in 0..cdim {
contrib_row_indices.push(row_indices[ff.perm[node_nelim + cj]]);
}
let contrib_data = std::mem::take(&mut ff.contrib);
contrib_blocks[snode_idx] = Some(ContribBlock {
row_indices: contrib_row_indices,
data: contrib_data,
dim: cdim,
n_delayed: node_n_delayed,
});
}
for &global in &row_indices {
ws.row_map[global] = usize::MAX;
}
Ok(NodeFactors {
first_col: snode.first_col,
ncol: expanded_ncol,
nelim: node_nelim,
n_delayed_in,
nrow: actual_nrow,
row_indices,
frontal_factors: ff,
inertia: node_inertia,
})
}
#[allow(clippy::too_many_arguments)]
fn factor_one_small_leaf(
snode_idx: usize,
precomputed_rows: &[usize],
symbolic: &SymbolicFactorization,
permuted: &CscMatrix,
scaling_pivot_order: &[f64],
is_root: &[bool],
params: &NumericParams,
ws: &mut FactorWorkspace,
contrib_blocks: &mut [Option<ContribBlock>],
) -> Result<NodeFactors, FeralError> {
let snode = &symbolic.supernodes[snode_idx];
debug_assert!(
snode.children.is_empty(),
"factor_one_small_leaf called on non-leaf supernode {}",
snode_idx
);
let own_ncol = snode.ncol();
let nrow = snode.nrow;
if nrow == 0 || own_ncol == 0 {
return Ok(NodeFactors {
first_col: snode.first_col,
ncol: 0,
nelim: 0,
n_delayed_in: 0,
nrow: 0,
row_indices: Vec::new(),
frontal_factors: FrontalFactors {
nrow: 0,
ncol: 0,
nelim: 0,
l: Vec::new(),
d_diag: Vec::new(),
d_subdiag: Vec::new(),
perm: Vec::new(),
perm_inv: Vec::new(),
contrib: Vec::new(),
contrib_dim: 0,
n_delayed: 0,
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
needs_refinement: false,
n_rook_rescues: 0,
zero_tol: params.bk.zero_tol,
zero_tol_2x2: params.bk.zero_tol_2x2,
},
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
});
}
let row_indices = precomputed_rows.to_vec();
let actual_nrow = row_indices.len();
let expanded_ncol = own_ncol;
debug_assert!(actual_nrow >= expanded_ncol);
for (local, &global) in row_indices.iter().enumerate() {
ws.row_map[global] = local;
}
let scaling = scaling_pivot_order;
let frontal_buf = std::mem::take(&mut ws.frontal_values);
let mut frontal = SymmetricMatrix::from_pooled_buf(actual_nrow, frontal_buf);
for (local_j, &gj) in row_indices[..own_ncol].iter().enumerate() {
let s_j = scaling[gj];
for k in permuted.col_ptr[gj]..permuted.col_ptr[gj + 1] {
let gi = permuted.row_idx[k];
let local_i = ws.row_map[gi];
if local_i != usize::MAX {
let val = permuted.values[k] * scaling[gi] * s_j;
frontal.set(local_i, local_j, val);
}
}
}
let may_delay = !is_root[snode_idx];
let mut ff =
factor_frontal_blocked_in_place(&mut frontal, expanded_ncol, may_delay, ¶ms.bk)?;
ws.frontal_values = frontal.data;
let node_inertia = ff.inertia.clone();
let node_nelim = ff.nelim;
let node_n_delayed = ff.n_delayed;
if ff.contrib_dim > 0 {
let cdim = ff.contrib_dim;
let mut contrib_row_indices = Vec::with_capacity(cdim);
for cj in 0..cdim {
contrib_row_indices.push(row_indices[ff.perm[node_nelim + cj]]);
}
let contrib_data = std::mem::take(&mut ff.contrib);
contrib_blocks[snode_idx] = Some(ContribBlock {
row_indices: contrib_row_indices,
data: contrib_data,
dim: cdim,
n_delayed: node_n_delayed,
});
}
for &global in &row_indices {
ws.row_map[global] = usize::MAX;
}
Ok(NodeFactors {
first_col: snode.first_col,
ncol: expanded_ncol,
nelim: node_nelim,
n_delayed_in: 0,
nrow: actual_nrow,
row_indices,
frontal_factors: ff,
inertia: node_inertia,
})
}
pub const N_PAR_MIN: usize = 32;
pub fn should_parallelize_assembly(symbolic: &SymbolicFactorization) -> bool {
if symbolic.supernodes.len() < N_PAR_MIN {
return false;
}
symbolic.supernodes.iter().any(|s| s.children.len() >= 2)
}
pub fn factorize_multifrontal_parallel_with_workspace(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
ws: &mut FactorWorkspace,
) -> Result<(SparseFactors, Inertia), FeralError> {
if should_use_dense_fast_path(matrix.n, matrix.row_idx.len()) {
return dense_fast_factor_with_workspace(matrix, params, ws);
}
if should_parallelize_assembly(symbolic) {
return factorize_multifrontal_supernodal_parallel(matrix, symbolic, params);
}
factorize_multifrontal_supernodal_with_workspace(matrix, symbolic, params, ws)
}
pub fn factorize_multifrontal_parallel(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
) -> Result<(SparseFactors, Inertia), FeralError> {
let mut ws = FactorWorkspace::new();
factorize_multifrontal_parallel_with_workspace(matrix, symbolic, params, &mut ws)
}
pub fn factorize_multifrontal_supernodal_parallel(
matrix: &CscMatrix,
symbolic: &SymbolicFactorization,
params: &NumericParams,
) -> Result<(SparseFactors, Inertia), FeralError> {
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Mutex;
let n = symbolic.n;
let n_snodes = symbolic.supernodes.len();
let telemetry = params.parallel_telemetry.as_deref();
let t_phase = telemetry.map(|_| std::time::Instant::now());
let (scaling_user, scaling_info) =
compute_scaling_with_cache(matrix, ¶ms.scaling, symbolic.cached_mc64.as_ref())?;
if let crate::scaling::ScalingInfo::PartialSingular { n_unmatched } = &scaling_info {
eprintln!(
"warning: MC64 matching left {} of {} variables unmatched; \
scaling is identity on those rows/columns",
n_unmatched, n
);
}
let scaling_pivot_order: Vec<f64> =
symbolic.perm.iter().map(|&old| scaling_user[old]).collect();
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_scaling_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
let permuted = permute_csc_values(matrix, &symbolic.perm, &symbolic.perm_inv)?;
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_permute_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
let full_pattern = permuted.symmetric_pattern();
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_symmetric_pattern_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
let mut is_root = vec![true; n_snodes];
for snode in &symbolic.supernodes {
for &child_idx in &snode.children {
if child_idx < n_snodes {
is_root[child_idx] = false;
}
}
}
let mut parents: Vec<Option<usize>> = vec![None; n_snodes];
for (i, snode) in symbolic.supernodes.iter().enumerate() {
for &c in &snode.children {
if c < n_snodes {
parents[c] = Some(i);
}
}
}
let pending: Vec<AtomicUsize> = symbolic
.supernodes
.iter()
.map(|s| {
let cnt = s.children.iter().filter(|&&c| c < n_snodes).count();
AtomicUsize::new(cnt)
})
.collect();
let contrib_blocks: Mutex<Vec<Option<ContribBlock>>> =
Mutex::new((0..n_snodes).map(|_| None).collect());
let node_factors_out: Mutex<Vec<Option<NodeFactors>>> =
Mutex::new((0..n_snodes).map(|_| None).collect());
let first_error: Mutex<Option<FeralError>> = Mutex::new(None);
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_tree_setup_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
let num_threads = rayon::current_num_threads().max(1);
let thread_ws: Vec<Mutex<FactorWorkspace>> = (0..num_threads + 1)
.map(|_| {
let mut w = FactorWorkspace::new();
w.row_map.resize(n, usize::MAX);
w.build_seen.resize(n, false);
w.local_contribs.resize_with(n_snodes, || None);
Mutex::new(w)
})
.collect();
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_thread_ws_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
let leaves: Vec<usize> = symbolic
.supernodes
.iter()
.enumerate()
.filter_map(|(i, s)| {
if s.children.iter().all(|&c| c >= n_snodes) {
Some(i)
} else {
None
}
})
.collect();
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_leaves_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
rayon::scope(|scope| {
for &leaf_idx in &leaves {
run_parallel_task(
scope,
leaf_idx,
symbolic,
&permuted,
&full_pattern,
&scaling_pivot_order,
&is_root,
params,
&parents,
&pending,
&contrib_blocks,
&node_factors_out,
&first_error,
&thread_ws,
);
}
});
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_scope_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let t_phase = telemetry.map(|_| std::time::Instant::now());
let err_opt = match first_error.into_inner() {
Ok(v) => v,
Err(p) => p.into_inner(),
};
if let Some(err) = err_opt {
return Err(err);
}
let nodes_vec = match node_factors_out.into_inner() {
Ok(v) => v,
Err(p) => p.into_inner(),
};
let mut final_nodes: Vec<NodeFactors> = Vec::with_capacity(n_snodes);
let mut total_inertia = Inertia {
positive: 0,
negative: 0,
zero: 0,
};
let mut needs_refinement = false;
for opt in nodes_vec.into_iter() {
let node = match opt {
Some(n) => n,
None => {
return Err(FeralError::InvalidInput(
"parallel driver: supernode was not processed (graph stall)".to_string(),
));
}
};
total_inertia.positive += node.inertia.positive;
total_inertia.negative += node.inertia.negative;
total_inertia.zero += node.inertia.zero;
if node.frontal_factors.needs_refinement {
needs_refinement = true;
}
final_nodes.push(node);
}
if let (Some(t), Some(start)) = (telemetry, t_phase) {
t.phase_collect_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
Ok((
SparseFactors {
n,
perm: symbolic.perm.clone(),
perm_inv: symbolic.perm_inv.clone(),
node_factors: final_nodes,
needs_refinement,
scaling: scaling_user,
scaling_info,
resolved_method: symbolic.resolved_method,
resolved_amalgamation: symbolic.resolved_amalgamation,
resolved_preprocess: symbolic.resolved_preprocess,
},
total_inertia,
))
}
#[allow(clippy::too_many_arguments)]
fn run_parallel_task<'a>(
scope: &rayon::Scope<'a>,
snode_idx: usize,
symbolic: &'a SymbolicFactorization,
permuted: &'a CscMatrix,
full_pattern: &'a crate::sparse::csc::CscPattern,
scaling_pivot_order: &'a [f64],
is_root: &'a [bool],
params: &'a NumericParams,
parents: &'a [Option<usize>],
pending: &'a [std::sync::atomic::AtomicUsize],
contrib_blocks: &'a std::sync::Mutex<Vec<Option<ContribBlock>>>,
node_factors_out: &'a std::sync::Mutex<Vec<Option<NodeFactors>>>,
first_error: &'a std::sync::Mutex<Option<FeralError>>,
thread_ws: &'a [std::sync::Mutex<FactorWorkspace>],
) {
use std::sync::atomic::Ordering;
scope.spawn(move |s| {
let telemetry = params.parallel_telemetry.as_deref();
let t_task_start = telemetry.map(|_| std::time::Instant::now());
if let Some(t) = telemetry {
t.n_tasks.fetch_add(1, Ordering::Relaxed);
}
{
let err_guard = match first_error.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if err_guard.is_some() {
if let (Some(t), Some(start)) = (telemetry, t_task_start) {
t.task_wall_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
return;
}
}
let snode = &symbolic.supernodes[snode_idx];
let n_snodes = symbolic.supernodes.len();
let thread_idx = rayon::current_thread_index().unwrap_or(thread_ws.len() - 1);
let thread_idx = thread_idx.min(thread_ws.len() - 1);
let ws_mtx = &thread_ws[thread_idx];
let (result, own_contrib) = {
let t_ws_wait = telemetry.map(|_| std::time::Instant::now());
let mut ws_guard = match ws_mtx.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if let (Some(t), Some(start)) = (telemetry, t_ws_wait) {
t.ws_lock_wait_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let mut local_contribs = std::mem::take(&mut ws_guard.local_contribs);
{
let t_wait_start = telemetry.map(|_| std::time::Instant::now());
let mut shared = match contrib_blocks.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if let (Some(t), Some(start)) = (telemetry, t_wait_start) {
t.contrib_wait_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let hold_start = telemetry.map(|_| std::time::Instant::now());
for &c in &snode.children {
if c < n_snodes {
local_contribs[c] = shared[c].take();
}
}
if let (Some(t), Some(start)) = (telemetry, hold_start) {
t.contrib_hold_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
}
let factor_start = telemetry.map(|_| std::time::Instant::now());
let res = factor_one_supernode(
snode_idx,
symbolic,
permuted,
full_pattern,
scaling_pivot_order,
is_root,
params,
&mut ws_guard,
&mut local_contribs,
0, );
if let (Some(t), Some(start)) = (telemetry, factor_start) {
t.factor_body_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let own = local_contribs[snode_idx].take();
ws_guard.local_contribs = local_contribs;
(res, own)
};
match result {
Ok(node) => {
{
let t_wait_start = telemetry.map(|_| std::time::Instant::now());
let mut shared = match contrib_blocks.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if let (Some(t), Some(start)) = (telemetry, t_wait_start) {
t.contrib_wait_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let hold_start = telemetry.map(|_| std::time::Instant::now());
shared[snode_idx] = own_contrib;
if let (Some(t), Some(start)) = (telemetry, hold_start) {
t.contrib_hold_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
}
{
let t_wait_start = telemetry.map(|_| std::time::Instant::now());
let mut nf = match node_factors_out.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if let (Some(t), Some(start)) = (telemetry, t_wait_start) {
t.node_factors_wait_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
let hold_start = telemetry.map(|_| std::time::Instant::now());
nf[snode_idx] = Some(node);
if let (Some(t), Some(start)) = (telemetry, hold_start) {
t.node_factors_hold_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
}
if let Some(parent_idx) = parents[snode_idx] {
let prev = pending[parent_idx].fetch_sub(1, Ordering::AcqRel);
if prev == 1 {
run_parallel_task(
s,
parent_idx,
symbolic,
permuted,
full_pattern,
scaling_pivot_order,
is_root,
params,
parents,
pending,
contrib_blocks,
node_factors_out,
first_error,
thread_ws,
);
}
}
}
Err(e) => {
let mut err_guard = match first_error.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if err_guard.is_none() {
*err_guard = Some(e);
}
}
}
if let (Some(t), Some(start)) = (telemetry, t_task_start) {
t.task_wall_ns
.fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
}
});
}
fn permute_csc_values(
matrix: &CscMatrix,
_perm: &[usize],
perm_inv: &[usize],
) -> Result<CscMatrix, FeralError> {
let n = matrix.n;
let mut triplets: Vec<(usize, usize, f64)> = Vec::with_capacity(matrix.nnz());
for old_j in 0..n {
let new_j = perm_inv[old_j];
for k in matrix.col_ptr[old_j]..matrix.col_ptr[old_j + 1] {
let old_i = matrix.row_idx[k];
let new_i = perm_inv[old_i];
let val = matrix.values[k];
if new_i >= new_j {
triplets.push((new_i, new_j, val));
} else {
triplets.push((new_j, new_i, val));
}
}
}
let rows: Vec<usize> = triplets.iter().map(|t| t.0).collect();
let cols: Vec<usize> = triplets.iter().map(|t| t.1).collect();
let vals: Vec<f64> = triplets.iter().map(|t| t.2).collect();
CscMatrix::from_triplets(n, &rows, &cols, &vals)
}
fn build_row_indices(
snode: &crate::symbolic::supernode::Supernode,
full_pattern: &crate::sparse::csc::CscPattern,
contrib_blocks: &[Option<ContribBlock>],
build_delayed: &mut Vec<usize>,
build_trailing: &mut Vec<usize>,
build_seen: &mut Vec<bool>,
) -> Vec<usize> {
let own_ncol = snode.ncol();
let first_col = snode.first_col;
let n = full_pattern.n;
if build_seen.len() < n {
build_seen.resize(n, false);
}
build_delayed.clear();
for &child_idx in &snode.children {
if let Some(contrib) = &contrib_blocks[child_idx] {
build_delayed.extend_from_slice(&contrib.row_indices[..contrib.n_delayed]);
}
}
for seen in build_seen.iter_mut().skip(first_col).take(own_ncol) {
*seen = true;
}
for &c in build_delayed.iter() {
build_seen[c] = true;
}
let own_last = first_col + own_ncol;
build_trailing.clear();
for j in first_col..first_col + own_ncol {
for k in full_pattern.col_ptr[j]..full_pattern.col_ptr[j + 1] {
let r = full_pattern.row_idx[k];
if r < own_last {
continue;
}
if !build_seen[r] {
build_seen[r] = true;
build_trailing.push(r);
}
}
}
for &child_idx in &snode.children {
if let Some(contrib) = &contrib_blocks[child_idx] {
for &r in &contrib.row_indices[contrib.n_delayed..] {
if r < own_last {
continue;
}
if !build_seen[r] {
build_seen[r] = true;
build_trailing.push(r);
}
}
}
}
build_trailing.sort_unstable();
let total = own_ncol + build_delayed.len() + build_trailing.len();
let mut result = Vec::with_capacity(total);
result.extend(first_col..first_col + own_ncol);
result.extend_from_slice(build_delayed);
result.extend_from_slice(build_trailing);
for seen in build_seen.iter_mut().skip(first_col).take(own_ncol) {
*seen = false;
}
for &c in build_delayed.iter() {
build_seen[c] = false;
}
for &r in build_trailing.iter() {
build_seen[r] = false;
}
result
}
#[derive(Debug)]
struct ContribBlock {
row_indices: Vec<usize>,
data: Vec<f64>,
dim: usize,
n_delayed: usize,
}
fn extend_add(contrib: &ContribBlock, parent_row_map: &[usize], frontal: &mut SymmetricMatrix) {
let cdim = contrib.dim;
for cj in 0..cdim {
let parent_j = parent_row_map[contrib.row_indices[cj]];
if parent_j == usize::MAX {
continue;
}
for ci in cj..cdim {
let parent_i = parent_row_map[contrib.row_indices[ci]];
if parent_i == usize::MAX {
continue;
}
let val = contrib.data[cj * cdim + ci];
if val == 0.0 {
continue;
}
if parent_i >= parent_j {
frontal.set(parent_i, parent_j, frontal.get(parent_i, parent_j) + val);
} else {
frontal.set(parent_j, parent_i, frontal.get(parent_j, parent_i) + val);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dense::factor::ZeroPivotAction;
use crate::symbolic::{symbolic_factorize, SupernodeParams};
fn make_params() -> NumericParams {
NumericParams::with_bk(BunchKaufmanParams {
on_zero_pivot: ZeroPivotAction::ForceAccept,
..BunchKaufmanParams::default()
})
}
#[test]
fn test_factorize_diagonal() {
let m = CscMatrix::from_triplets(3, &[0, 1, 2], &[0, 1, 2], &[2.0, 3.0, 5.0]).unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let (factors, inertia) = factorize_multifrontal(&m, &sym, &make_params()).unwrap();
assert_eq!(inertia.positive, 3);
assert_eq!(inertia.negative, 0);
assert_eq!(inertia.zero, 0);
assert_eq!(factors.n, 3);
}
#[test]
fn test_summary_one_liner() {
let n = 32usize;
let mut rows: Vec<usize> = Vec::new();
let mut cols: Vec<usize> = Vec::new();
let mut vals: Vec<f64> = Vec::new();
for i in 0..n {
rows.push(i);
cols.push(i);
vals.push(2.0);
if i + 1 < n {
rows.push(i + 1);
cols.push(i);
vals.push(-1.0);
}
}
let m = CscMatrix::from_triplets(n, &rows, &cols, &vals).unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let (factors, inertia) = factorize_multifrontal(&m, &sym, &make_params()).unwrap();
let s = factors.summary();
assert!(s.contains("ord="), "summary missing ord field: {}", s);
assert!(s.contains("amalg="), "summary missing amalg field: {}", s);
assert!(
s.contains("preproc="),
"summary missing preproc field: {}",
s
);
assert!(
s.contains("scaling="),
"summary missing scaling field: {}",
s
);
let nnz_l = factors.factor_nnz();
assert!(nnz_l > 0, "tridiagonal factor_nnz must be > 0");
assert!(
s.contains(&format!("nnz_L={}", nnz_l)),
"summary nnz_L mismatch: got {}, want nnz_L={}",
s,
nnz_l
);
let expected = format!(
"inertia=({},{},{})",
inertia.positive, inertia.negative, inertia.zero
);
assert!(
s.contains(&expected),
"summary inertia mismatch: got {}, want substring {}",
s,
expected
);
assert_ne!(
factors.resolved_amalgamation,
crate::symbolic::AmalgamationStrategy::Auto
);
assert_ne!(
factors.resolved_method,
crate::symbolic::OrderingMethod::Auto
);
assert_ne!(
factors.resolved_preprocess,
crate::symbolic::OrderingPreprocess::Auto
);
assert_eq!(factors.resolved_method, sym.resolved_method);
assert_eq!(factors.resolved_amalgamation, sym.resolved_amalgamation);
assert_eq!(factors.resolved_preprocess, sym.resolved_preprocess);
}
#[test]
fn test_factorize_tridiagonal() {
let m = CscMatrix::from_triplets(
3,
&[0, 1, 1, 2, 2],
&[0, 0, 1, 1, 2],
&[2.0, -1.0, 2.0, -1.0, 2.0],
)
.unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let (factors, inertia) = factorize_multifrontal(&m, &sym, &make_params()).unwrap();
assert_eq!(inertia.positive, 3);
assert_eq!(inertia.negative, 0);
assert_eq!(inertia.zero, 0);
assert_eq!(factors.n, 3);
}
#[test]
fn test_factorize_matches_dense() {
let m = CscMatrix::from_triplets(
3,
&[0, 1, 1, 2, 2],
&[0, 0, 1, 1, 2],
&[2.0, -1.0, 3.0, -1.0, 4.0],
)
.unwrap();
let dense_mat = m.to_dense();
let params = make_params();
let (_, dense_inertia) = factor(&dense_mat, ¶ms.bk).unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let (_, sparse_inertia) = factorize_multifrontal(&m, &sym, ¶ms).unwrap();
assert_eq!(sparse_inertia, dense_inertia);
}
#[test]
fn test_factorize_kkt() {
let m = CscMatrix::from_triplets(
3,
&[0, 1, 2, 2, 2],
&[0, 1, 0, 1, 2],
&[2.0, 3.0, 1.0, 1.0, -1e-8],
)
.unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let params = make_params();
let (_, inertia) = factorize_multifrontal(&m, &sym, ¶ms).unwrap();
assert_eq!(inertia.positive, 2);
assert_eq!(inertia.negative, 1);
assert_eq!(inertia.zero, 0);
}
#[test]
fn test_factorize_indefinite() {
let m = CscMatrix::from_triplets(2, &[0, 1, 1], &[0, 0, 1], &[1.0, 2.0, 1.0]).unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let params = make_params();
let (_, inertia) = factorize_multifrontal(&m, &sym, ¶ms).unwrap();
assert_eq!(inertia.positive, 1);
assert_eq!(inertia.negative, 1);
assert_eq!(inertia.zero, 0);
}
#[test]
fn factorize_multifrontal_with_two_strategies_on_one_symbolic() {
use crate::scaling::ScalingStrategy;
let m = CscMatrix::from_triplets(
3,
&[0, 2, 1, 2, 2],
&[0, 0, 1, 1, 2],
&[2.0, -1.0, 2.0, -1.0, 0.0],
)
.unwrap();
let sym = symbolic_factorize(&m, &SupernodeParams::default()).unwrap();
let infnorm = NumericParams {
bk: BunchKaufmanParams {
on_zero_pivot: ZeroPivotAction::ForceAccept,
..BunchKaufmanParams::default()
},
scaling: ScalingStrategy::InfNorm,
small_leaf: Default::default(),
profiler: None,
parallel_telemetry: None,
};
let identity = NumericParams {
bk: infnorm.bk.clone(),
scaling: ScalingStrategy::Identity,
small_leaf: Default::default(),
profiler: None,
parallel_telemetry: None,
};
let (_, i_inf) = factorize_multifrontal(&m, &sym, &infnorm).unwrap();
let (_, i_id) = factorize_multifrontal(&m, &sym, &identity).unwrap();
assert_eq!(i_inf.positive, 2);
assert_eq!(i_inf.negative, 1);
assert_eq!(i_id.positive, 2);
assert_eq!(i_id.negative, 1);
}
#[test]
fn dense_fast_factor_auto_matches_explicit_infnorm_bitwise() {
use crate::scaling::ScalingStrategy;
let n = 24usize;
let mut rows = Vec::new();
let mut cols = Vec::new();
let mut vals = Vec::new();
for j in 0..n {
rows.push(j);
cols.push(j);
vals.push(10.0 * (j as f64 + 1.0));
for i in (j + 1)..n {
rows.push(i);
cols.push(j);
vals.push(1.0 + 0.1 * (i - j) as f64);
}
}
let m = CscMatrix::from_triplets(n, &rows, &cols, &vals).unwrap();
assert!(
should_use_dense_fast_path(m.n, m.row_idx.len()),
"test setup: dense fast-path gate must fire"
);
let auto = NumericParams {
bk: BunchKaufmanParams {
on_zero_pivot: ZeroPivotAction::ForceAccept,
..BunchKaufmanParams::default()
},
scaling: ScalingStrategy::Auto,
small_leaf: Default::default(),
profiler: None,
parallel_telemetry: None,
};
let infnorm = NumericParams {
scaling: ScalingStrategy::InfNorm,
..auto.clone()
};
let (f_auto, i_auto) = dense_fast_factor(&m, &auto).unwrap();
let (f_inf, i_inf) = dense_fast_factor(&m, &infnorm).unwrap();
assert_eq!(i_auto, i_inf, "inertia diverged under Auto vs InfNorm");
assert_eq!(
f_auto.node_factors.len(),
f_inf.node_factors.len(),
"node count diverged",
);
for (k, (na, ni)) in f_auto
.node_factors
.iter()
.zip(f_inf.node_factors.iter())
.enumerate()
{
assert_eq!(
na.frontal_factors.d_diag.len(),
ni.frontal_factors.d_diag.len(),
"node {} D-diag length diverged",
k,
);
for (j, (da, di)) in na
.frontal_factors
.d_diag
.iter()
.zip(ni.frontal_factors.d_diag.iter())
.enumerate()
{
assert_eq!(
da.to_bits(),
di.to_bits(),
"node {} D_diag[{}] bits differ: auto={} infnorm={}",
k,
j,
da,
di,
);
}
}
}
fn small_kkt_6x6_for_schur() -> CscMatrix {
let mut rows = Vec::new();
let mut cols = Vec::new();
let mut vals = Vec::new();
for i in 0..4 {
rows.push(i);
cols.push(i);
vals.push((i + 1) as f64);
}
rows.push(4);
cols.push(0);
vals.push(0.5);
rows.push(4);
cols.push(2);
vals.push(0.7);
rows.push(5);
cols.push(1);
vals.push(0.3);
rows.push(5);
cols.push(3);
vals.push(0.9);
rows.push(4);
cols.push(4);
vals.push(1.5);
rows.push(5);
cols.push(4);
vals.push(0.2);
rows.push(5);
cols.push(5);
vals.push(2.5);
CscMatrix::from_triplets(6, &rows, &cols, &vals).unwrap()
}
fn hand_computed_schur_2x2() -> [[f64; 2]; 2] {
let s00 = 1.5 - (0.25 + 0.49 / 3.0);
let s11 = 2.5 - (0.045 + 0.2025);
let s01 = 0.2;
[[s00, s01], [s01, s11]]
}
#[test]
fn schur_block_matches_hand_computed_for_small_kkt() {
let m = small_kkt_6x6_for_schur();
let params = crate::symbolic::SupernodeParams::default();
let sym = crate::symbolic::symbolic_factorize_with_schur(&m, ¶ms, &[4, 5]).unwrap();
let nparams = NumericParams {
scaling: crate::scaling::ScalingStrategy::Identity,
..NumericParams::default()
};
let (_factors, _inertia, schur) =
factorize_multifrontal_with_schur(&m, &sym, &nparams).unwrap();
assert_eq!(schur.dim, 2);
let expected = hand_computed_schur_2x2();
let tol = 1e-12;
for i in 0..2 {
for j in 0..2 {
let got = schur.get(i, j);
let want = expected[i][j];
assert!(
(got - want).abs() < tol,
"S({},{}) got {} want {} diff {}",
i,
j,
got,
want,
(got - want).abs()
);
}
}
}
#[test]
fn schur_block_full_square_storage() {
let m = small_kkt_6x6_for_schur();
let params = crate::symbolic::SupernodeParams::default();
let sym = crate::symbolic::symbolic_factorize_with_schur(&m, ¶ms, &[4, 5]).unwrap();
let nparams = NumericParams {
scaling: crate::scaling::ScalingStrategy::Identity,
..NumericParams::default()
};
let (_, _, schur) = factorize_multifrontal_with_schur(&m, &sym, &nparams).unwrap();
for i in 0..schur.dim {
for j in 0..schur.dim {
assert!((schur.get(i, j) - schur.get(j, i)).abs() < 1e-15);
}
}
}
#[test]
fn schur_user_order_preserved_when_reversed() {
let m = small_kkt_6x6_for_schur();
let params = crate::symbolic::SupernodeParams::default();
let sym = crate::symbolic::symbolic_factorize_with_schur(&m, ¶ms, &[5, 4]).unwrap();
let nparams = NumericParams {
scaling: crate::scaling::ScalingStrategy::Identity,
..NumericParams::default()
};
let (_, _, schur) = factorize_multifrontal_with_schur(&m, &sym, &nparams).unwrap();
let hand = hand_computed_schur_2x2();
let map = [1usize, 0usize];
let tol = 1e-12;
for i in 0..2 {
for j in 0..2 {
let got = schur.get(i, j);
let want = hand[map[i]][map[j]];
assert!(
(got - want).abs() < tol,
"reversed S({},{}) got {} want {}",
i,
j,
got,
want
);
}
}
}
#[test]
fn schur_rejects_symbolic_without_schur_tail() {
let m = small_kkt_6x6_for_schur();
let params = crate::symbolic::SupernodeParams::default();
let sym = crate::symbolic::symbolic_factorize(&m, ¶ms).unwrap(); assert_eq!(sym.is_schur_tail, None);
let nparams = NumericParams::default();
let r = factorize_multifrontal_with_schur(&m, &sym, &nparams);
assert!(matches!(r, Err(FeralError::InvalidInput(_))));
}
#[test]
fn schur_multi_supernode_tail_matches_oracle() {
let half = 25usize;
let k_each = 40usize;
let n = 2 * half + 2 * k_each;
let mut rows: Vec<usize> = Vec::new();
let mut cols: Vec<usize> = Vec::new();
let mut vals: Vec<f64> = Vec::new();
for i in 0..n {
rows.push(i);
cols.push(i);
vals.push(2.0 + i as f64);
}
for i in 0..half {
for s in 0..k_each {
let j = 2 * half + s;
rows.push(j);
cols.push(i);
vals.push(0.1);
}
}
for i in half..2 * half {
for s in 0..k_each {
let j = 2 * half + k_each + s;
rows.push(j);
cols.push(i);
vals.push(0.1);
}
}
for s in 0..k_each {
for t in 0..s {
rows.push(2 * half + s);
cols.push(2 * half + t);
vals.push(0.05);
rows.push(2 * half + k_each + s);
cols.push(2 * half + k_each + t);
vals.push(0.05);
}
}
for s in (2 * half + 1)..n {
rows.push(s);
cols.push(s - 1);
vals.push(0.03);
}
let m = CscMatrix::from_triplets(n, &rows, &cols, &vals).unwrap();
let schur: Vec<usize> = (2 * half..n).collect();
let n_schur = schur.len();
let params = crate::symbolic::SupernodeParams::default();
let sym = crate::symbolic::symbolic_factorize_with_schur(&m, ¶ms, &schur).unwrap();
let nparams = NumericParams {
scaling: crate::scaling::ScalingStrategy::Identity,
..NumericParams::default()
};
let (_, _, schur_block) = factorize_multifrontal_with_schur(&m, &sym, &nparams).unwrap();
assert_eq!(schur_block.dim, n_schur);
let mut is_schur = vec![false; n];
for &i in &schur {
is_schur[i] = true;
}
let f_indices: Vec<usize> = (0..n).filter(|i| !is_schur[*i]).collect();
let nf = f_indices.len();
let mut f_inv = vec![usize::MAX; n];
for (k, &i) in f_indices.iter().enumerate() {
f_inv[i] = k;
}
let mut a = vec![0.0f64; n * n];
for j in 0..n {
for k in m.col_ptr[j]..m.col_ptr[j + 1] {
let i = m.row_idx[k];
a[j * n + i] = m.values[k];
if i != j {
a[i * n + j] = m.values[k];
}
}
}
let mut tr = (Vec::new(), Vec::new(), Vec::new());
for j in 0..n {
if is_schur[j] {
continue;
}
for k in m.col_ptr[j]..m.col_ptr[j + 1] {
let i = m.row_idx[k];
if !is_schur[i] {
tr.0.push(f_inv[i]);
tr.1.push(f_inv[j]);
tr.2.push(m.values[k]);
}
}
}
let a_ff = CscMatrix::from_triplets(nf, &tr.0, &tr.1, &tr.2).unwrap();
let sym_ff = crate::symbolic::symbolic_factorize(&a_ff, ¶ms).unwrap();
let (factors_ff, _) = factorize_multifrontal(&a_ff, &sym_ff, &nparams).unwrap();
let mut s_ref = vec![0.0f64; n_schur * n_schur];
for (si, &i) in schur.iter().enumerate() {
for (sj, &j) in schur.iter().enumerate() {
s_ref[sj * n_schur + si] = a[j * n + i];
}
}
for (sj, &j) in schur.iter().enumerate() {
let mut rhs = vec![0.0f64; nf];
for &fi in &f_indices {
rhs[f_inv[fi]] = a[j * n + fi];
}
let x = crate::numeric::solve::solve_sparse(&factors_ff, &rhs).unwrap();
for (si, &i) in schur.iter().enumerate() {
let mut acc = 0.0;
for &fi in &f_indices {
acc += a[i * n + fi] * x[f_inv[fi]];
}
s_ref[sj * n_schur + si] -= acc;
}
}
let mut max_rel = 0.0f64;
for sj in 0..n_schur {
for si in 0..n_schur {
let want = s_ref[sj * n_schur + si];
let got = schur_block.get(si, sj);
let denom = want.abs().max(1e-14);
let rel = (got - want).abs() / denom;
if rel > max_rel {
max_rel = rel;
}
}
}
assert!(
max_rel < 1e-10,
"Schur block max relative error {} exceeds 1e-10",
max_rel
);
}
#[test]
fn schur_block_solve_small_explicit() {
let dim = 3;
let s = vec![
4.0, -1.0, 0.0, -1.0, 3.0, -1.0, 0.0, -1.0, 2.0, ];
let block = SchurBlock {
dim,
data: s.clone(),
};
let rhs = vec![2.0, 2.0, 4.0];
let x = block.solve(&rhs).expect("solve must succeed");
let want = [1.0, 2.0, 3.0];
for i in 0..dim {
assert!(
(x[i] - want[i]).abs() < 1e-12,
"x[{i}] = {} != {} (rel diff {:.3e})",
x[i],
want[i],
(x[i] - want[i]).abs()
);
}
let mut rhs_check = vec![0.0; dim];
block.symv(&x, &mut rhs_check).expect("symv");
for i in 0..dim {
assert!((rhs_check[i] - rhs[i]).abs() < 1e-12);
}
}
#[test]
fn schur_block_solve_roundtrip_after_factorize() {
let entries = [
(0, 0, 2.0),
(1, 1, 3.0),
(2, 2, 4.0),
(3, 0, 1.0),
(3, 1, 1.0),
(3, 2, 1.0),
];
let n = 4;
let mut rows = Vec::new();
let mut cols = Vec::new();
let mut vals = Vec::new();
for &(r, c, v) in &entries {
rows.push(r);
cols.push(c);
vals.push(v);
}
let m = CscMatrix::from_triplets(n, &rows, &cols, &vals).unwrap();
let schur_indices = vec![3];
let snode = crate::symbolic::SupernodeParams::default();
let sym =
crate::symbolic::symbolic_factorize_with_schur(&m, &snode, &schur_indices).unwrap();
let nparams = NumericParams {
scaling: crate::scaling::ScalingStrategy::Identity,
..NumericParams::default()
};
let (_, _, schur_block) = factorize_multifrontal_with_schur(&m, &sym, &nparams).unwrap();
let want_s = -(1.0 / 2.0 + 1.0 / 3.0 + 1.0 / 4.0);
assert!((schur_block.get(0, 0) - want_s).abs() < 1e-12);
let x_target = vec![1.5_f64];
let mut rhs_s = vec![0.0; 1];
schur_block.symv(&x_target, &mut rhs_s).unwrap();
let x_solved = schur_block.solve(&rhs_s).unwrap();
assert!((x_solved[0] - x_target[0]).abs() < 1e-12);
}
#[test]
fn schur_block_solve_dim_mismatch() {
let block = SchurBlock {
dim: 2,
data: vec![1.0, 0.0, 0.0, 1.0],
};
let rhs = vec![1.0, 2.0, 3.0];
let r = block.solve(&rhs);
assert!(matches!(r, Err(FeralError::DimensionMismatch { .. })));
}
fn issue5_mss1_inertia_sweep_with(
zero_tol: f64,
pivot_threshold: f64,
) -> Option<Vec<(f64, Inertia)>> {
let path = std::path::Path::new("data/matrices/kkt/MSS1/MSS1_0000.mtx");
let mtx = match crate::io::mtx::read_mtx(path) {
Ok(m) => m,
Err(_) => return None, };
let n = mtx.n;
let n_x = 90usize;
assert_eq!(n, 163, "MSS1_0000 expected n=163");
let mut bk = BunchKaufmanParams {
on_zero_pivot: ZeroPivotAction::ForceAccept,
zero_tol,
zero_tol_2x2: zero_tol * zero_tol,
..BunchKaufmanParams::default()
};
bk.pivot_threshold = pivot_threshold;
let params = NumericParams {
bk,
scaling: ScalingStrategy::Identity,
small_leaf: SmallLeafBatch::default(),
profiler: None,
parallel_telemetry: None,
};
let deltas = [0.0, 1e-4, 1e-2, 1.0, 1e2, 1e4, 1e6, 1e8, 1e10, 1e12];
let mut results = Vec::with_capacity(deltas.len());
for &dw in &deltas {
let mut rows: Vec<usize> = Vec::with_capacity(mtx.entries.len() + n_x);
let mut cols: Vec<usize> = Vec::with_capacity(mtx.entries.len() + n_x);
let mut vals: Vec<f64> = Vec::with_capacity(mtx.entries.len() + n_x);
for &(r, c, v) in &mtx.entries {
if r >= n_x && c >= n_x && r == c {
continue;
}
rows.push(r);
cols.push(c);
vals.push(v);
}
for i in 0..n_x {
rows.push(i);
cols.push(i);
vals.push(dw);
}
let csc = CscMatrix::from_triplets(n, &rows, &cols, &vals)
.expect("MSS1_0000 + δ_w·I CSC build");
let sym = symbolic_factorize(&csc, &SupernodeParams::default())
.expect("symbolic factorize MSS1_0000");
let (_factors, inertia) = factorize_multifrontal(&csc, &sym, ¶ms)
.expect("numeric factorize MSS1_0000 + δ_w·I");
results.push((dw, inertia));
}
Some(results)
}
fn issue5_mss1_inertia_sweep() -> Option<Vec<(f64, Inertia)>> {
issue5_mss1_inertia_sweep_with(1e-10, 0.0)
}
#[test]
fn issue_5_mss1_iter0_inertia_wanders_under_delta_w_sweep() {
let Some(results) = issue5_mss1_inertia_sweep() else {
return;
};
for (dw, inertia) in &results {
eprintln!(
"δ_w = {:>8.0e} → inertia(+{:3}, -{:3}, 0:{:>3})",
dw, inertia.positive, inertia.negative, inertia.zero
);
}
let positives: Vec<usize> = results.iter().map(|(_, i)| i.positive).collect();
let any_decrease = positives.windows(2).any(|w| w[1] < w[0]);
assert!(
any_decrease,
"issue #5 regression: expected non-monotone positive count, got monotone: \
{:?}. If this assertion fails, the in-kernel pivot behavior has changed; \
revisit the disposition in dev/research/issue-5-mss1-inertia-monotonicity.md \
§9 and flip the assertion to monotone non-decreasing.",
positives
);
}
#[test]
fn issue_5_mss1_zero_tol_sweep_diagnostic() {
let tols = [1e-10, 1e-8, 1e-6, 1e-4, 1e-2];
for &tol in &tols {
let Some(results) = issue5_mss1_inertia_sweep_with(tol, 0.0) else {
return;
};
let positives: Vec<usize> = results.iter().map(|(_, i)| i.positive).collect();
let negatives: Vec<usize> = results.iter().map(|(_, i)| i.negative).collect();
let zeros: Vec<usize> = results.iter().map(|(_, i)| i.zero).collect();
let monotone = positives.windows(2).all(|w| w[1] >= w[0]);
eprintln!(
"zero_tol = {:>5.0e}: monotone(+) = {}\n +: {:?}\n -: {:?}\n 0: {:?}",
tol, monotone, positives, negatives, zeros
);
}
}
#[test]
fn issue_5_mss1_pivot_threshold_sweep_diagnostic() {
let us = [0.0, 1e-10, 1e-8, 1e-6, 1e-4, 1e-2, 1e-1, 0.5];
for &u in &us {
let Some(results) = issue5_mss1_inertia_sweep_with(1e-10, u) else {
return;
};
let positives: Vec<usize> = results.iter().map(|(_, i)| i.positive).collect();
let negatives: Vec<usize> = results.iter().map(|(_, i)| i.negative).collect();
let zeros: Vec<usize> = results.iter().map(|(_, i)| i.zero).collect();
let monotone = positives.windows(2).all(|w| w[1] >= w[0]);
eprintln!(
"pivot_threshold = {:>5.0e}: monotone(+) = {}\n +: {:?}\n -: {:?}\n 0: {:?}",
u, monotone, positives, negatives, zeros
);
}
}
}