pub mod column_counts;
pub mod ldlt_compress;
pub mod profiler;
pub mod small_leaf;
pub mod supernode;
use crate::error::FeralError;
use crate::ordering::amd::permute_pattern;
use crate::ordering::elimination_tree::EliminationTree;
use crate::ordering::postorder::{biased_postorder, postorder};
use crate::sparse::csc::{CscMatrix, CscPattern};
pub use column_counts::{column_counts, column_counts_gnp, total_factor_nnz};
pub use ldlt_compress::{build_supermap, compress_pattern, expand_permutation, SuperMap};
pub use profiler::{record_stage, StagePct, StageTiming, SymbolicProfileReport, SymbolicProfiler};
pub use small_leaf::{find_small_leaf_groups, SmallLeafGroup, SmallLeafParams};
pub use supernode::{
find_supernodes, pick_amalgamation_strategy, AmalgamationStrategy, OrderingPreprocess,
Supernode, SupernodeParams, AUTO_MULTI_CHILD_FRAC_THRESHOLD,
};
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum OrderingMethod {
#[default]
Amd,
Amf,
MetisND,
ScotchND,
KahipND,
Auto,
AutoRace,
}
fn choose_adaptive(pattern: &CscPattern, method: OrderingMethod) -> OrderingMethod {
if method != OrderingMethod::Auto {
return method;
}
let n = pattern.n;
let full_nnz = pattern.row_idx.len();
if n == 0 {
return OrderingMethod::Amd;
}
let avg_deg = full_nnz as f64 / n as f64;
if n > 100_000 && avg_deg < 5.0 {
return OrderingMethod::Amd;
}
let stored_nnz = (full_nnz + n) / 2;
let base = pick_default_method(n, stored_nnz);
if base == OrderingMethod::MetisND && is_arrow_bordered(pattern) {
return OrderingMethod::Amf;
}
if base == OrderingMethod::MetisND {
return OrderingMethod::Amf;
}
base
}
fn is_arrow_bordered(pattern: &CscPattern) -> bool {
const HEAVY_DEG_FLOOR: usize = 64;
const HEAVY_AVG_MULT: f64 = 8.0;
const ARROW_COUNT_FRAC: f64 = 0.05;
const ARROW_NNZ_SHARE: f64 = 0.20;
let n = pattern.n;
if n == 0 {
return false;
}
let full_nnz = pattern.row_idx.len();
if full_nnz == 0 {
return false;
}
let avg_deg = full_nnz as f64 / n as f64;
let heavy_thr = (HEAVY_AVG_MULT * avg_deg).ceil() as usize;
let heavy_thr = heavy_thr.max(HEAVY_DEG_FLOOR);
let mut heavy_count = 0usize;
let mut heavy_nnz = 0usize;
for j in 0..n {
let deg = pattern.col_ptr[j + 1] - pattern.col_ptr[j];
if deg > heavy_thr {
heavy_count += 1;
heavy_nnz += deg;
}
}
if heavy_count == 0 {
return false;
}
let count_ok = (heavy_count as f64) < ARROW_COUNT_FRAC * n as f64;
let share_ok = (heavy_nnz as f64) >= ARROW_NNZ_SHARE * full_nnz as f64;
count_ok && share_ok
}
#[derive(Debug)]
pub struct SymbolicFactorization {
pub n: usize,
pub perm: Vec<usize>,
pub perm_inv: Vec<usize>,
pub supernodes: Vec<Supernode>,
pub factor_nnz_estimate: usize,
pub factor_slack: f64,
pub contrib_sizes: Vec<usize>,
pub peak_contrib_bytes: usize,
pub etree: EliminationTree,
pub permuted_pattern: CscPattern,
pub col_counts: Vec<usize>,
pub small_leaf_groups: Vec<SmallLeafGroup>,
pub snode_group: Vec<Option<usize>>,
pub(crate) cached_mc64: Option<crate::scaling::Mc64Cache>,
pub resolved_method: OrderingMethod,
pub resolved_amalgamation: supernode::AmalgamationStrategy,
pub resolved_preprocess: supernode::OrderingPreprocess,
pub is_schur_tail: Option<usize>,
}
fn pick_default_method(n: usize, _stored_nnz: usize) -> OrderingMethod {
if n == 0 {
return OrderingMethod::Amd;
}
if n <= 10_000 {
OrderingMethod::Amf
} else {
OrderingMethod::MetisND
}
}
pub fn pick_ordering_preprocess(matrix: &CscMatrix) -> OrderingPreprocess {
const MIN_N_FOR_COMPRESSION: usize = 128;
const LOW_DEGREE_THRESHOLD: f64 = 0.30;
let n = matrix.n;
if n < MIN_N_FOR_COMPRESSION {
return OrderingPreprocess::None;
}
let mut low_degree = 0usize;
for j in 0..n {
let nnz_col = matrix.col_ptr[j + 1] - matrix.col_ptr[j];
if nnz_col <= 2 {
low_degree += 1;
}
}
if low_degree as f64 / n as f64 >= LOW_DEGREE_THRESHOLD {
OrderingPreprocess::LdltCompress
} else {
OrderingPreprocess::None
}
}
pub fn symbolic_factorize(
matrix: &CscMatrix,
snode_params: &SupernodeParams,
) -> Result<SymbolicFactorization, FeralError> {
symbolic_factorize_with_method(matrix, snode_params, OrderingMethod::Auto)
}
fn to_contract_pattern_bufs(pattern: &CscPattern) -> Result<(Vec<i32>, Vec<i32>), FeralError> {
let col_ptr: Result<Vec<i32>, _> = pattern.col_ptr.iter().map(|&x| i32::try_from(x)).collect();
let col_ptr = col_ptr.map_err(|_| {
FeralError::InvalidInput("matrix too large for i32-indexed ordering crates".to_string())
})?;
let row_idx: Result<Vec<i32>, _> = pattern.row_idx.iter().map(|&x| i32::try_from(x)).collect();
let row_idx = row_idx.map_err(|_| {
FeralError::InvalidInput("matrix too large for i32-indexed ordering crates".to_string())
})?;
Ok((col_ptr, row_idx))
}
fn run_external_ordering(
pattern: &CscPattern,
method: OrderingMethod,
) -> Result<(Vec<usize>, OrderingMethod), FeralError> {
let (col_buf, row_buf) = to_contract_pattern_bufs(pattern)?;
let pat = feral_ordering_core::CscPattern::new(pattern.n, &col_buf, &row_buf)
.ok_or_else(|| FeralError::InvalidInput("malformed CSC pattern".to_string()))?;
debug_assert_ne!(method, OrderingMethod::Auto);
let mut actual = method;
let perm_i32 = match method {
OrderingMethod::Amd => feral_amd::amd_order(&pat),
OrderingMethod::Amf => feral_amf::amf_order(&pat),
OrderingMethod::MetisND => feral_metis::metis_order(&pat),
OrderingMethod::ScotchND => {
let opts = feral_scotch::ScotchOptions::default();
feral_scotch::scotch_order_full(&pat, &opts).map(|(perm, _, sstats)| {
if sstats.n_separator_vertices == 0 {
actual = OrderingMethod::Amd;
}
perm
})
}
OrderingMethod::KahipND => feral_kahip::kahip_order(&pat),
OrderingMethod::Auto => {
unreachable!("Auto is resolved by symbolic_factorize_with_method")
}
OrderingMethod::AutoRace => {
unreachable!("AutoRace is resolved by symbolic_factorize_with_method")
}
};
let perm_i32 = perm_i32
.map_err(|e| FeralError::InvalidInput(format!("external ordering failed: {}", e)))?;
if perm_i32.len() != pattern.n {
return Err(FeralError::InvalidInput(format!(
"external ordering returned {} entries for n={}",
perm_i32.len(),
pattern.n
)));
}
let mut out: Vec<usize> = Vec::with_capacity(perm_i32.len());
for x in perm_i32 {
let u = usize::try_from(x).map_err(|_| {
FeralError::InvalidInput("external ordering returned negative index".to_string())
})?;
if u >= pattern.n {
return Err(FeralError::InvalidInput(
"external ordering returned out-of-range index".to_string(),
));
}
out.push(u);
}
Ok((out, actual))
}
const RACE_CANDIDATES: &[OrderingMethod] = &[
OrderingMethod::Amd,
OrderingMethod::MetisND,
OrderingMethod::ScotchND,
OrderingMethod::KahipND,
];
fn symbolic_factorize_race(
matrix: &CscMatrix,
snode_params: &SupernodeParams,
) -> Result<SymbolicFactorization, FeralError> {
let mut best: Option<SymbolicFactorization> = None;
let mut last_err: Option<FeralError> = None;
for &cand in RACE_CANDIDATES {
match symbolic_factorize_with_method(matrix, snode_params, cand) {
Ok(sym) => {
let is_better = best
.as_ref()
.map(|b| sym.factor_nnz_estimate < b.factor_nnz_estimate)
.unwrap_or(true);
if is_better {
best = Some(sym);
}
}
Err(e) => {
last_err = Some(e);
}
}
}
best.ok_or_else(|| {
last_err.unwrap_or_else(|| {
FeralError::InvalidInput("AutoRace: no candidates available".to_string())
})
})
}
pub fn symbolic_factorize_with_method(
matrix: &CscMatrix,
snode_params: &SupernodeParams,
method: OrderingMethod,
) -> Result<SymbolicFactorization, FeralError> {
if method == OrderingMethod::AutoRace {
return symbolic_factorize_race(matrix, snode_params);
}
let n = matrix.n;
let prof = snode_params.symbolic_profiler.as_ref();
let t_total = prof.map(|_| std::time::Instant::now());
let t_sym = prof.map(|_| std::time::Instant::now());
let full_pattern = matrix.symmetric_pattern();
if let Some(t) = t_sym {
record_stage(prof, "symmetric_pattern", t);
}
let method = choose_adaptive(&full_pattern, method);
let mut cached_mc64: Option<crate::scaling::Mc64Cache> = None;
let t_pick = prof.map(|_| std::time::Instant::now());
let resolved_preprocess = match snode_params.preprocess {
OrderingPreprocess::Auto => pick_ordering_preprocess(matrix),
other => other,
};
if let Some(t) = t_pick {
record_stage(prof, "pick_preprocess", t);
}
let t_ord = prof.map(|_| std::time::Instant::now());
let (amd_perm, resolved_method): (Vec<usize>, OrderingMethod) = match resolved_preprocess {
OrderingPreprocess::None => run_external_ordering(&full_pattern, method)?,
OrderingPreprocess::Auto => unreachable!("resolved above"),
OrderingPreprocess::LdltCompress => {
let cache = crate::scaling::compute_mc64_cache(matrix)?;
let map = build_supermap(&cache.perm);
let pair = if map.ncmp() == n {
run_external_ordering(&full_pattern, method)?
} else {
let cpat = compress_pattern(&full_pattern, &map);
let (super_perm, resolved) = run_external_ordering(&cpat, method)?;
(expand_permutation(&super_perm, &map), resolved)
};
cached_mc64 = Some(cache);
pair
}
};
if let Some(t) = t_ord {
record_stage(prof, "ordering", t);
}
let t_perm1 = prof.map(|_| std::time::Instant::now());
let amd_pattern = permute_pattern(&full_pattern, &amd_perm);
if let Some(t) = t_perm1 {
record_stage(prof, "permute1", t);
}
let t_etree0 = prof.map(|_| std::time::Instant::now());
let amd_etree = EliminationTree::from_pattern(&amd_pattern);
if let Some(t) = t_etree0 {
record_stage(prof, "etree_initial", t);
}
let t_post = prof.map(|_| std::time::Instant::now());
let (post, post_inv) = postorder(&amd_etree);
if let Some(t) = t_post {
record_stage(prof, "postorder", t);
}
let t_compose = prof.map(|_| std::time::Instant::now());
let perm: Vec<usize> = post.iter().map(|&p| amd_perm[p]).collect();
let mut perm_inv = vec![0usize; n];
for (new, &old) in perm.iter().enumerate() {
perm_inv[old] = new;
}
if let Some(t) = t_compose {
record_stage(prof, "perm_compose", t);
}
let t_perm2 = prof.map(|_| std::time::Instant::now());
let permuted_pattern = permute_pattern(&full_pattern, &perm);
if let Some(t) = t_perm2 {
record_stage(prof, "permute2", t);
}
let t_relabel = prof.map(|_| std::time::Instant::now());
let final_parent: Vec<Option<usize>> = (0..n)
.map(|new| {
let old_amd = post[new];
amd_etree.parent[old_amd].map(|old_par| post_inv[old_par])
})
.collect();
let etree = EliminationTree {
parent: final_parent,
n,
};
if let Some(t) = t_relabel {
record_stage(prof, "etree_relabel", t);
}
let t_cc = prof.map(|_| std::time::Instant::now());
let mut col_counts = column_counts_gnp(&permuted_pattern, &etree);
if let Some(t) = t_cc {
record_stage(prof, "col_counts", t);
}
let mut permuted_pattern = permuted_pattern;
let mut perm = perm;
let mut etree = etree;
let mut effective_params = snode_params.clone();
if matches!(
effective_params.amalgamation_strategy,
supernode::AmalgamationStrategy::Auto
) {
effective_params.amalgamation_strategy = supernode::pick_amalgamation_strategy(&etree);
}
let snode_params: &SupernodeParams = &effective_params;
let t_renumber = prof.map(|_| std::time::Instant::now());
if matches!(
snode_params.amalgamation_strategy,
supernode::AmalgamationStrategy::Renumber
) {
let bias = supernode::predict_merges(&etree, &col_counts, snode_params);
if bias.iter().any(|&b| b) {
let (post2, _post2_inv) = biased_postorder(&etree, &bias);
let new_perm: Vec<usize> = post2.iter().map(|&p| perm[p]).collect();
let mut new_perm_inv = vec![0usize; n];
for (new, &old) in new_perm.iter().enumerate() {
new_perm_inv[old] = new;
}
let new_permuted_pattern = permute_pattern(&full_pattern, &new_perm);
let new_etree = EliminationTree::from_pattern(&new_permuted_pattern);
let new_col_counts = column_counts_gnp(&new_permuted_pattern, &new_etree);
perm = new_perm;
perm_inv = new_perm_inv;
permuted_pattern = new_permuted_pattern;
etree = new_etree;
col_counts = new_col_counts;
}
}
if let Some(t) = t_renumber {
record_stage(prof, "renumber", t);
}
let factor_nnz = total_factor_nnz(&col_counts);
let t_find = prof.map(|_| std::time::Instant::now());
let mut supernodes = find_supernodes(&etree, &col_counts, snode_params);
supernode::assign_delayed_capacities(&mut supernodes);
if let Some(t) = t_find {
record_stage(prof, "find_supernodes", t);
}
let t_slg = prof.map(|_| std::time::Instant::now());
let (small_leaf_groups, snode_group) =
find_small_leaf_groups(&supernodes, &permuted_pattern, &snode_params.small_leaf);
if let Some(t) = t_slg {
record_stage(prof, "small_leaf_groups", t);
}
let t_pk = prof.map(|_| std::time::Instant::now());
let contrib_sizes: Vec<usize> = supernodes.iter().map(|s| s.contrib_size()).collect();
let peak_contrib_bytes = compute_peak_contrib(&supernodes, &contrib_sizes);
if let Some(t) = t_pk {
record_stage(prof, "peak_contrib", t);
}
let factor_slack = 1.2;
if let (Some(arc), Some(t)) = (prof, t_total) {
if let Ok(mut p) = arc.lock() {
p.set_total(t.elapsed().as_micros() as u64);
}
}
Ok(SymbolicFactorization {
n,
perm,
perm_inv,
supernodes,
factor_nnz_estimate: (factor_nnz as f64 * factor_slack) as usize,
factor_slack,
contrib_sizes,
peak_contrib_bytes,
etree,
permuted_pattern,
col_counts,
small_leaf_groups,
snode_group,
cached_mc64,
resolved_method,
resolved_amalgamation: snode_params.amalgamation_strategy,
resolved_preprocess,
is_schur_tail: None,
})
}
pub fn symbolic_factorize_with_schur(
matrix: &CscMatrix,
snode_params: &SupernodeParams,
schur_indices: &[usize],
) -> Result<SymbolicFactorization, FeralError> {
let n = matrix.n;
let n_schur = schur_indices.len();
if n_schur == 0 {
return symbolic_factorize_with_method(matrix, snode_params, OrderingMethod::Amd);
}
let mut effective_params = snode_params.clone();
effective_params.preprocess = OrderingPreprocess::None;
effective_params.amalgamation_strategy = supernode::AmalgamationStrategy::Adjacency;
let initial_perm = crate::ordering::schur::compute_schur_aware_perm(matrix, schur_indices)?;
let full_pattern = matrix.symmetric_pattern();
let initial_permuted = permute_pattern(&full_pattern, &initial_perm);
let initial_etree = EliminationTree::from_pattern(&initial_permuted);
let mut is_schur = vec![false; n];
for slot in is_schur.iter_mut().skip(n - n_schur) {
*slot = true;
}
let (post, post_inv) =
crate::ordering::postorder::schur_constrained_postorder(&initial_etree, &is_schur);
for (k, &p) in post.iter().enumerate().skip(n - n_schur) {
debug_assert_eq!(
p, k,
"schur_constrained_postorder violated tail identity at k={}",
k
);
}
let perm: Vec<usize> = post.iter().map(|&p| initial_perm[p]).collect();
let mut perm_inv = vec![0usize; n];
for (new, &old) in perm.iter().enumerate() {
perm_inv[old] = new;
}
debug_assert_eq!(
&perm[n - n_schur..],
schur_indices,
"Schur tail invariant violated"
);
let permuted_pattern = permute_pattern(&full_pattern, &perm);
let final_parent: Vec<Option<usize>> = (0..n)
.map(|new| {
let old_initial = post[new];
initial_etree.parent[old_initial].map(|old_par| post_inv[old_par])
})
.collect();
let etree = EliminationTree {
parent: final_parent,
n,
};
let col_counts = column_counts::column_counts_gnp(&permuted_pattern, &etree);
let factor_nnz = column_counts::total_factor_nnz(&col_counts);
let mut supernodes = supernode::find_supernodes(&etree, &col_counts, &effective_params);
merge_schur_tail_supernodes(&mut supernodes, n, n_schur)?;
supernode::assign_delayed_capacities(&mut supernodes);
let (small_leaf_groups, snode_group) =
find_small_leaf_groups(&supernodes, &permuted_pattern, &effective_params.small_leaf);
let contrib_sizes: Vec<usize> = supernodes.iter().map(|s| s.contrib_size()).collect();
let peak_contrib_bytes = compute_peak_contrib(&supernodes, &contrib_sizes);
let factor_slack = 1.2;
Ok(SymbolicFactorization {
n,
perm,
perm_inv,
supernodes,
factor_nnz_estimate: (factor_nnz as f64 * factor_slack) as usize,
factor_slack,
contrib_sizes,
peak_contrib_bytes,
etree,
permuted_pattern,
col_counts,
small_leaf_groups,
snode_group,
cached_mc64: None,
resolved_method: OrderingMethod::Amd,
resolved_amalgamation: effective_params.amalgamation_strategy,
resolved_preprocess: OrderingPreprocess::None,
is_schur_tail: Some(n_schur),
})
}
fn merge_schur_tail_supernodes(
supernodes: &mut Vec<Supernode>,
n: usize,
n_schur: usize,
) -> Result<(), FeralError> {
let schur_lo = n - n_schur;
split_straddling_supernode(supernodes, schur_lo)?;
let mut tail_start: Option<usize> = None;
for (s, snode) in supernodes.iter().enumerate().rev() {
let col_lo = snode.first_col;
let col_hi = col_lo + snode.ncol();
let intersects = col_hi > schur_lo && col_lo < n;
if intersects {
tail_start = Some(s);
} else {
break;
}
}
if let Some(start) = tail_start {
for (s, snode) in supernodes.iter().enumerate().take(start) {
let col_lo = snode.first_col;
let col_hi = col_lo + snode.ncol();
let intersects = col_hi > schur_lo && col_lo < n;
if intersects {
return Err(FeralError::InvalidInput(format!(
"Schur-bearing supernodes are not contiguous at the \
tail (snode {} bears Schur cols but lies below \
non-Schur supernode(s) preceding the tail run \
starting at index {}). This requires a forest- \
structured Schur set; see \
dev/research/schur-complement.md F3.2b.",
s, start
)));
}
}
}
let Some(start) = tail_start else {
return Err(FeralError::InvalidInput(
"F3.2b merge: no Schur-bearing supernodes found despite \
n_schur > 0 (symbolic invariant broken)"
.to_string(),
));
};
if start == supernodes.len() - 1 {
let last = &supernodes[start];
let col_lo = last.first_col;
let col_hi = col_lo + last.ncol();
if col_lo > schur_lo || col_hi != n {
return Err(FeralError::InvalidInput(format!(
"F3.2b merge: single Schur supernode at index {} does not \
cover the full Schur tail [{}, {}) (covers [{}, {}))",
start, schur_lo, n, col_lo, col_hi
)));
}
return Ok(());
}
let merged_first_col = supernodes[start].first_col;
if merged_first_col != schur_lo {
return Err(FeralError::InvalidInput(format!(
"F3.2b merge: tail run starts at col {} but Schur tail \
begins at {}",
merged_first_col, schur_lo
)));
}
let mut expected = merged_first_col;
for (s, snode) in supernodes.iter().enumerate().skip(start) {
if snode.first_col != expected {
return Err(FeralError::InvalidInput(format!(
"F3.2b merge: supernode {} starts at col {} but expected {} \
(Schur supernode column ranges must be contiguous)",
s, snode.first_col, expected
)));
}
expected = snode.first_col + snode.ncol();
}
if expected != n {
return Err(FeralError::InvalidInput(format!(
"F3.2b merge: tail run ends at col {} but Schur tail ends at {}",
expected, n
)));
}
let mut merged_nrow = 0usize;
let merge_indices: std::collections::HashSet<usize> = (start..supernodes.len()).collect();
let mut merged_children: Vec<usize> = Vec::new();
for snode in supernodes.iter().skip(start) {
let extent = (snode.first_col + snode.nrow) - merged_first_col;
if extent > merged_nrow {
merged_nrow = extent;
}
for &c in &snode.children {
if !merge_indices.contains(&c) {
merged_children.push(c);
}
}
}
let merged_ncol = n_schur;
if merged_nrow < merged_ncol {
merged_nrow = merged_ncol;
}
let merged_row_indices: Vec<usize> =
(merged_first_col..merged_first_col + merged_nrow).collect();
supernodes[start] = Supernode {
first_col: merged_first_col,
ncol: merged_ncol,
nrow: merged_nrow,
row_indices: merged_row_indices,
children: merged_children,
delayed_capacity: usize::MAX,
};
supernodes.truncate(start + 1);
Ok(())
}
fn split_straddling_supernode(
supernodes: &mut Vec<Supernode>,
schur_lo: usize,
) -> Result<(), FeralError> {
let mut straddle_idx: Option<usize> = None;
for (s, snode) in supernodes.iter().enumerate() {
let lo = snode.first_col;
let hi = lo + snode.ncol;
if lo < schur_lo && hi > schur_lo {
if straddle_idx.is_some() {
return Err(FeralError::InvalidInput(format!(
"F3.2b split: multiple supernodes straddle schur_lo={} \
(impossible after find_supernodes — column ranges are disjoint)",
schur_lo
)));
}
straddle_idx = Some(s);
}
}
let Some(b) = straddle_idx else {
return Ok(());
};
let original = supernodes[b].clone();
let ncol_ns = schur_lo - original.first_col;
let ncol_sc = original.ncol - ncol_ns;
let nrow_total = original.nrow;
if original.row_indices.len() != nrow_total {
return Err(FeralError::InvalidInput(format!(
"F3.2b split: supernode {} has nrow={} but row_indices len={}",
b,
nrow_total,
original.row_indices.len()
)));
}
for snode in supernodes.iter_mut() {
for c in snode.children.iter_mut() {
if *c == b {
*c = b + 1;
} else if *c > b {
*c += 1;
}
}
}
let non_schur = Supernode {
first_col: original.first_col,
ncol: ncol_ns,
nrow: nrow_total,
row_indices: original.row_indices.clone(),
children: original.children,
delayed_capacity: usize::MAX,
};
let schur_half = Supernode {
first_col: schur_lo,
ncol: ncol_sc,
nrow: nrow_total - ncol_ns,
row_indices: original.row_indices[ncol_ns..].to_vec(),
children: vec![b],
delayed_capacity: usize::MAX,
};
supernodes[b] = non_schur;
supernodes.insert(b + 1, schur_half);
Ok(())
}
fn compute_peak_contrib(supernodes: &[Supernode], contrib_sizes: &[usize]) -> usize {
let n_snodes = supernodes.len();
if n_snodes == 0 {
return 0;
}
let mut live = vec![false; n_snodes];
let mut current_size = 0usize;
let mut peak = 0usize;
for k in 0..n_snodes {
current_size += contrib_sizes[k];
live[k] = true;
if current_size > peak {
peak = current_size;
}
for &child in &supernodes[k].children {
if live[child] {
current_size -= contrib_sizes[child];
live[child] = false;
}
}
}
peak * std::mem::size_of::<f64>()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_symbolic_factorize_basic() {
let m =
CscMatrix::from_triplets(4, &[0, 1, 1, 2, 2, 3, 3], &[0, 0, 1, 1, 2, 2, 3], &[1.0; 7])
.unwrap();
let params = SupernodeParams {
nemin: 32,
..Default::default()
};
let sym = symbolic_factorize(&m, ¶ms).unwrap();
assert_eq!(sym.n, 4);
assert_eq!(sym.perm.len(), 4);
assert_eq!(sym.perm_inv.len(), 4);
let mut sorted_perm = sym.perm.clone();
sorted_perm.sort();
assert_eq!(sorted_perm, vec![0, 1, 2, 3]);
assert!(sym.factor_nnz_estimate > 0);
let total_cols: usize = sym.supernodes.iter().map(|s| s.ncol()).sum();
assert_eq!(total_cols, 4);
}
#[test]
fn test_symbolic_factorize_dense() {
let m = CscMatrix::from_triplets(3, &[0, 1, 2, 1, 2, 2], &[0, 0, 0, 1, 1, 2], &[1.0; 6])
.unwrap();
let params = SupernodeParams {
nemin: 1,
..Default::default()
};
let sym = symbolic_factorize(&m, ¶ms).unwrap();
assert!(sym.factor_nnz_estimate >= 6);
}
#[test]
fn test_symbolic_factorize_kkt() {
let m = CscMatrix::from_triplets(
3,
&[0, 1, 2, 2, 2],
&[0, 1, 0, 1, 2],
&[2.0, 3.0, 1.0, 1.0, -1e-8],
)
.unwrap();
let params = SupernodeParams::default();
let sym = symbolic_factorize(&m, ¶ms).unwrap();
assert_eq!(sym.n, 3);
let total_cols: usize = sym.supernodes.iter().map(|s| s.ncol()).sum();
assert_eq!(total_cols, 3);
}
#[test]
fn test_perm_inverse_consistency() {
let m = CscMatrix::from_triplets(
5,
&[0, 1, 2, 3, 4, 1, 2, 3, 4],
&[0, 0, 0, 0, 0, 1, 2, 3, 4],
&[1.0; 9],
)
.unwrap();
let params = SupernodeParams::default();
let sym = symbolic_factorize(&m, ¶ms).unwrap();
for i in 0..5 {
assert_eq!(sym.perm[sym.perm_inv[i]], i);
assert_eq!(sym.perm_inv[sym.perm[i]], i);
}
}
#[test]
fn test_contrib_sizes_nonnegative() {
let m = CscMatrix::from_triplets(
5,
&[0, 1, 2, 3, 4, 1, 2, 3, 4],
&[0, 0, 0, 0, 0, 1, 2, 3, 4],
&[1.0; 9],
)
.unwrap();
let params = SupernodeParams {
nemin: 1,
..Default::default()
};
let sym = symbolic_factorize(&m, ¶ms).unwrap();
for &cs in &sym.contrib_sizes {
assert!(cs < 100000, "unreasonable contrib size: {}", cs);
}
if let Some(last) = sym.supernodes.last() {
assert_eq!(
last.contrib_size(),
0,
"root should have no contribution block"
);
}
}
fn small_grid_5x5() -> CscMatrix {
let m = 5;
let n = 5;
let idx = |r: usize, c: usize| r * n + c;
let mut rows: Vec<usize> = Vec::new();
let mut cols: Vec<usize> = Vec::new();
let mut vals: Vec<f64> = Vec::new();
for r in 0..m {
for c in 0..n {
let k = idx(r, c);
rows.push(k);
cols.push(k);
vals.push(4.0);
if r + 1 < m {
rows.push(idx(r + 1, c));
cols.push(k);
vals.push(-1.0);
}
if c + 1 < n {
rows.push(idx(r, c + 1));
cols.push(k);
vals.push(-1.0);
}
}
}
CscMatrix::from_triplets(m * n, &rows, &cols, &vals).unwrap()
}
#[test]
fn symbolic_factorize_amf_produces_valid_perm() {
let m = small_grid_5x5();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::Amf).unwrap();
assert_eq!(sym.n, 25);
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, (0..25).collect::<Vec<_>>(), "perm is a bijection");
for i in 0..25 {
assert_eq!(sym.perm[sym.perm_inv[i]], i);
}
assert_eq!(sym.resolved_method, OrderingMethod::Amf);
}
#[test]
fn symbolic_factorize_metis_produces_valid_perm() {
let m = small_grid_5x5();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::MetisND).unwrap();
assert_eq!(sym.n, 25);
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, (0..25).collect::<Vec<_>>(), "perm is a bijection");
for i in 0..25 {
assert_eq!(sym.perm[sym.perm_inv[i]], i);
}
}
#[test]
fn symbolic_factorize_scotch_produces_valid_perm() {
let m = small_grid_5x5();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::ScotchND).unwrap();
assert_eq!(sym.n, 25);
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, (0..25).collect::<Vec<_>>(), "perm is a bijection");
for i in 0..25 {
assert_eq!(sym.perm[sym.perm_inv[i]], i);
}
}
#[test]
fn symbolic_factorize_kahip_produces_valid_perm() {
let m = small_grid_5x5();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::KahipND).unwrap();
assert_eq!(sym.n, 25);
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, (0..25).collect::<Vec<_>>(), "perm is a bijection");
for i in 0..25 {
assert_eq!(sym.perm[sym.perm_inv[i]], i);
}
}
#[test]
fn symbolic_factorize_auto_produces_valid_perm() {
let m = small_grid_5x5();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::Auto).unwrap();
assert_eq!(sym.n, 25);
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, (0..25).collect::<Vec<_>>(), "perm is a bijection");
for i in 0..25 {
assert_eq!(sym.perm[sym.perm_inv[i]], i);
}
}
#[test]
fn choose_adaptive_rules() {
fn pat_bufs(n: usize, avg_deg: usize) -> (Vec<usize>, Vec<usize>) {
let total = n * avg_deg.max(1);
let mut col_ptr = Vec::with_capacity(n + 1);
let mut row_idx = Vec::with_capacity(total);
let per = avg_deg.max(1);
for j in 0..n {
col_ptr.push(row_idx.len());
for t in 0..per {
row_idx.push((j + t) % n.max(1));
}
}
col_ptr.push(row_idx.len());
(col_ptr, row_idx)
}
let (cp, ri) = pat_bufs(200_000, 3);
let p = CscPattern {
n: 200_000,
col_ptr: cp,
row_idx: ri,
};
assert_eq!(
choose_adaptive(&p, OrderingMethod::Auto),
OrderingMethod::Amd
);
let (cp, ri) = pat_bufs(500, 6);
let p = CscPattern {
n: 500,
col_ptr: cp,
row_idx: ri,
};
assert_eq!(
choose_adaptive(&p, OrderingMethod::Auto),
OrderingMethod::Amf
);
let (cp, ri) = pat_bufs(50_000, 20);
let p = CscPattern {
n: 50_000,
col_ptr: cp,
row_idx: ri,
};
assert_eq!(
choose_adaptive(&p, OrderingMethod::Auto),
OrderingMethod::Amf
);
let (cp, ri) = pat_bufs(150_000, 10);
let p = CscPattern {
n: 150_000,
col_ptr: cp,
row_idx: ri,
};
assert_eq!(
choose_adaptive(&p, OrderingMethod::Auto),
OrderingMethod::Amf
);
let (cp, ri) = pat_bufs(500, 6);
let p = CscPattern {
n: 500,
col_ptr: cp,
row_idx: ri,
};
assert_eq!(
choose_adaptive(&p, OrderingMethod::MetisND),
OrderingMethod::MetisND
);
}
fn pattern_with_degrees(degrees: &[usize]) -> CscPattern {
let n = degrees.len();
let mut col_ptr = Vec::with_capacity(n + 1);
let mut row_idx = Vec::new();
for (j, &d) in degrees.iter().enumerate() {
col_ptr.push(row_idx.len());
for t in 0..d {
row_idx.push((j + t) % n.max(1));
}
}
col_ptr.push(row_idx.len());
CscPattern {
n,
col_ptr,
row_idx,
}
}
#[test]
fn is_arrow_bordered_fires_on_synthetic_arrow() {
let mut degrees = vec![6usize; 11_900];
degrees.extend(std::iter::repeat_n(600usize, 100));
let pat = pattern_with_degrees(°rees);
assert!(is_arrow_bordered(&pat), "r05-shaped arrow must be detected");
}
#[test]
fn is_arrow_bordered_rejects_uniform_sparse() {
let pat = pattern_with_degrees(&vec![8usize; 12_000]);
assert!(
!is_arrow_bordered(&pat),
"uniform-sparse pattern must not be flagged as arrow"
);
}
#[test]
fn is_arrow_bordered_rejects_many_hubs() {
let mut degrees = vec![1000usize; 1000];
degrees.extend(std::iter::repeat_n(1usize, 9000));
let pat = pattern_with_degrees(°rees);
assert!(
!is_arrow_bordered(&pat),
"a large heavy set (10% of n) must be rejected by the count guard"
);
}
#[test]
fn is_arrow_bordered_rejects_low_nnz_share_border() {
let mut degrees = vec![44usize; 8030];
degrees.extend([614usize, 614usize]);
let pat = pattern_with_degrees(°rees);
assert!(
!is_arrow_bordered(&pat),
"a heavy set carrying a tiny nnz share must be rejected by the share guard"
);
}
#[test]
fn choose_adaptive_routes_arrow_to_amf() {
let mut degrees = vec![6usize; 11_900];
degrees.extend(std::iter::repeat_n(600usize, 100));
let pat = pattern_with_degrees(°rees);
assert_eq!(
choose_adaptive(&pat, OrderingMethod::Auto),
OrderingMethod::Amf,
"arrow/bordered pattern (n>10_000) must route to Amf, not MetisND"
);
let uniform = pattern_with_degrees(&vec![16usize; 120_000]);
assert_eq!(
choose_adaptive(&uniform, OrderingMethod::Auto),
OrderingMethod::Amf,
"uniform large-dense non-arrow pattern routes to AMF via the #73 catch"
);
let mut small_arrow = vec![6usize; 4900];
small_arrow.extend(std::iter::repeat_n(600usize, 100));
let small = pattern_with_degrees(&small_arrow);
assert_eq!(
choose_adaptive(&small, OrderingMethod::Auto),
OrderingMethod::Amf
);
}
#[test]
fn symbolic_factorize_default_uses_amf_for_small_matrices() {
let m = small_grid_5x5();
let params = SupernodeParams::default();
let a = symbolic_factorize(&m, ¶ms).unwrap();
let b = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::Amf).unwrap();
assert_eq!(
a.perm, b.perm,
"symbolic_factorize on small dense matrices must equal \
symbolic_factorize_with_method(Amf)"
);
assert_eq!(a.factor_nnz_estimate, b.factor_nnz_estimate);
assert_eq!(a.resolved_method, OrderingMethod::Amf);
}
#[test]
fn pick_default_method_rules() {
assert_eq!(pick_default_method(0, 0), OrderingMethod::Amd);
assert_eq!(pick_default_method(715, 2839), OrderingMethod::Amf); assert_eq!(pick_default_method(3000, 8999), OrderingMethod::Amf); assert_eq!(pick_default_method(3000, 13_000), OrderingMethod::Amf);
assert_eq!(pick_default_method(3083, 9484), OrderingMethod::Amf); assert_eq!(pick_default_method(4000, 7999), OrderingMethod::Amf); assert_eq!(pick_default_method(5000, 20_000), OrderingMethod::Amf);
assert_eq!(pick_default_method(5314, 22566), OrderingMethod::Amf); assert_eq!(pick_default_method(10_000, 100_000), OrderingMethod::Amf);
assert_eq!(
pick_default_method(20_000, 200_000),
OrderingMethod::MetisND
);
assert_eq!(
pick_default_method(2_813_976, 6_622_463),
OrderingMethod::MetisND
);
}
#[test]
fn pick_default_method_never_returns_kahip() {
let shapes: &[(usize, usize)] = &[
(0, 0),
(10, 30),
(500, 1500),
(3083, 13333), (5314, 22566), (10_000, 50_000),
(100_000, 500_000),
(345_241, 1_343_126), ];
for &(n, nnz) in shapes {
let m = pick_default_method(n, nnz);
assert_ne!(
m,
OrderingMethod::KahipND,
"pick_default_method({}, {}) returned KahipND; \
see dev/research/ordering-kahip-driver-integration.md",
n,
nnz
);
}
}
fn small_kkt_6x6() -> CscMatrix {
let mut rows = Vec::new();
let mut cols = Vec::new();
let mut vals = Vec::new();
for i in 0..4 {
rows.push(i);
cols.push(i);
vals.push((i + 1) as f64);
}
rows.push(4);
cols.push(0);
vals.push(0.5);
rows.push(4);
cols.push(2);
vals.push(0.7);
rows.push(5);
cols.push(1);
vals.push(0.3);
rows.push(5);
cols.push(3);
vals.push(0.9);
rows.push(4);
cols.push(4);
vals.push(1.5);
rows.push(5);
cols.push(4);
vals.push(0.2);
rows.push(5);
cols.push(5);
vals.push(2.5);
CscMatrix::from_triplets(6, &rows, &cols, &vals).unwrap()
}
#[test]
fn schur_symbolic_tail_invariant_user_order() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_schur(&m, ¶ms, &[4, 5]).unwrap();
assert_eq!(sym.n, 6);
assert_eq!(sym.is_schur_tail, Some(2));
assert_eq!(&sym.perm[4..], &[4, 5]);
}
#[test]
fn schur_symbolic_tail_invariant_reversed_user_order() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_schur(&m, ¶ms, &[5, 4]).unwrap();
assert_eq!(sym.is_schur_tail, Some(2));
assert_eq!(&sym.perm[4..], &[5, 4]);
}
#[test]
fn schur_symbolic_perm_is_valid_permutation() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_schur(&m, ¶ms, &[4, 5]).unwrap();
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, vec![0, 1, 2, 3, 4, 5]);
for (new, &old) in sym.perm.iter().enumerate() {
assert_eq!(sym.perm_inv[old], new);
}
}
#[test]
fn schur_symbolic_empty_falls_back_to_standard() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_schur(&m, ¶ms, &[]).unwrap();
assert_eq!(sym.is_schur_tail, None);
}
#[test]
fn schur_symbolic_full_n_rejected() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let result = symbolic_factorize_with_schur(&m, ¶ms, &[0, 1, 2, 3, 4, 5]);
assert!(matches!(result, Err(FeralError::InvalidInput(_))));
}
#[test]
fn schur_symbolic_duplicate_rejected() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let result = symbolic_factorize_with_schur(&m, ¶ms, &[4, 4]);
assert!(matches!(result, Err(FeralError::InvalidInput(_))));
}
#[test]
fn schur_symbolic_supernodes_cover_n() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_schur(&m, ¶ms, &[4, 5]).unwrap();
let total: usize = sym.supernodes.iter().map(|s| s.ncol()).sum();
assert_eq!(total, 6);
}
#[test]
fn schur_symbolic_single_schur_index() {
let m = small_kkt_6x6();
let params = SupernodeParams::default();
let sym = symbolic_factorize_with_schur(&m, ¶ms, &[5]).unwrap();
assert_eq!(sym.is_schur_tail, Some(1));
assert_eq!(sym.perm[5], 5);
let mut sorted = sym.perm.clone();
sorted.sort();
assert_eq!(sorted, vec![0, 1, 2, 3, 4, 5]);
}
fn poisson_kkt_csc(k: usize) -> CscMatrix {
let m = k * k;
let n_kkt = 3 * m;
let h = 1.0 / (k as f64 + 1.0);
let alpha = 0.01;
let inv_h2 = 1.0 / (h * h);
let mut rows: Vec<usize> = Vec::new();
let mut cols: Vec<usize> = Vec::new();
let mut vals: Vec<f64> = Vec::new();
for i in 0..m {
rows.push(i);
cols.push(i);
vals.push(h * h);
}
for i in 0..m {
rows.push(m + i);
cols.push(m + i);
vals.push(alpha * h * h);
}
for i in 0..k {
for j in 0..k {
let c = i * k + j;
let con_row = 2 * m + c;
rows.push(con_row);
cols.push(c);
vals.push(4.0 * inv_h2);
if i > 0 {
rows.push(con_row);
cols.push((i - 1) * k + j);
vals.push(-inv_h2);
}
if i + 1 < k {
rows.push(con_row);
cols.push((i + 1) * k + j);
vals.push(-inv_h2);
}
if j > 0 {
rows.push(con_row);
cols.push(i * k + (j - 1));
vals.push(-inv_h2);
}
if j + 1 < k {
rows.push(con_row);
cols.push(i * k + (j + 1));
vals.push(-inv_h2);
}
rows.push(con_row);
cols.push(m + c);
vals.push(-1.0);
}
}
CscMatrix::from_triplets(n_kkt, &rows, &cols, &vals).expect("kkt csc")
}
#[test]
fn issue_3_scotchnd_on_kkt_resolves_to_amd_when_bisection_degenerates() {
let m = poisson_kkt_csc(20);
let params = SupernodeParams {
preprocess: OrderingPreprocess::None,
..SupernodeParams::default()
};
let sym = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::ScotchND).unwrap();
assert_eq!(
sym.resolved_method,
OrderingMethod::Amd,
"ScotchND degenerated to AMD-leaf-only on this KKT pattern; \
resolved_method must report Amd, not ScotchND"
);
}
#[test]
fn issue_3_auto_on_kkt_routes_via_pick_default_method() {
let m = poisson_kkt_csc(58);
let params = SupernodeParams::default();
let auto = symbolic_factorize_with_method(&m, ¶ms, OrderingMethod::Auto).unwrap();
let default = symbolic_factorize(&m, ¶ms).unwrap();
assert_eq!(
auto.resolved_method, default.resolved_method,
"Auto must resolve to the same concrete method as \
`symbolic_factorize` (which also routes through choose_adaptive)"
);
assert_eq!(auto.resolved_method, OrderingMethod::Amf);
}
}