use crate::dense::rook::{rook_rescue, RookKind};
use crate::error::FeralError;
use crate::inertia::Inertia;
use crate::dense::schur_kernel;
pub static FORCE_SCALAR_FRONTAL: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(false);
pub static DISABLE_PANEL_INLINE_2X2: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(false);
pub static PANEL_DIAG_ENABLED: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(false);
pub mod panel_diag {
use std::sync::atomic::AtomicU64;
pub static PANEL_FULL: AtomicU64 = AtomicU64::new(0);
pub static PANEL_PARTIAL: AtomicU64 = AtomicU64::new(0);
pub static PANEL_DELAYED: AtomicU64 = AtomicU64::new(0);
pub static FALLBACK_2X2_NEED_SWAP_OR_BOUND: AtomicU64 = AtomicU64::new(0);
pub static FALLBACK_2X2_SWAP_1X1_WINS: AtomicU64 = AtomicU64::new(0);
pub static FALLBACK_2X2_LAPACK_1X1_WINS: AtomicU64 = AtomicU64::new(0);
pub static FALLBACK_2X2_GROWTH_OR_DET: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_TAIL_STEPS: AtomicU64 = AtomicU64::new(0);
pub static PIVOTS_INLINE: AtomicU64 = AtomicU64::new(0);
pub static PIVOTS_SCALAR: AtomicU64 = AtomicU64::new(0);
pub static INLINE_2X2_SWAP_OK: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_2X2_DELAY_GROWTH: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_2X2_DELAY_DET: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_2X2_DELAY_BOTH: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_1X1_DELAY: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_2X2_DELAY_NEGDET: AtomicU64 = AtomicU64::new(0);
pub static SCALAR_1X1_DELAY_TINY: AtomicU64 = AtomicU64::new(0);
pub fn reset() {
for c in [
&PANEL_FULL,
&PANEL_PARTIAL,
&PANEL_DELAYED,
&FALLBACK_2X2_NEED_SWAP_OR_BOUND,
&FALLBACK_2X2_SWAP_1X1_WINS,
&FALLBACK_2X2_LAPACK_1X1_WINS,
&FALLBACK_2X2_GROWTH_OR_DET,
&SCALAR_TAIL_STEPS,
&PIVOTS_INLINE,
&PIVOTS_SCALAR,
&INLINE_2X2_SWAP_OK,
&SCALAR_2X2_DELAY_GROWTH,
&SCALAR_2X2_DELAY_DET,
&SCALAR_2X2_DELAY_BOTH,
&SCALAR_1X1_DELAY,
&SCALAR_2X2_DELAY_NEGDET,
&SCALAR_1X1_DELAY_TINY,
] {
c.store(0, std::sync::atomic::Ordering::Relaxed);
}
}
pub fn snapshot() -> [(&'static str, u64); 17] {
use std::sync::atomic::Ordering::Relaxed;
[
("panel_full", PANEL_FULL.load(Relaxed)),
("panel_partial", PANEL_PARTIAL.load(Relaxed)),
("panel_delayed", PANEL_DELAYED.load(Relaxed)),
(
"fallback_2x2_need_swap_or_bound",
FALLBACK_2X2_NEED_SWAP_OR_BOUND.load(Relaxed),
),
(
"fallback_2x2_swap_1x1_wins",
FALLBACK_2X2_SWAP_1X1_WINS.load(Relaxed),
),
(
"fallback_2x2_lapack_1x1_wins",
FALLBACK_2X2_LAPACK_1X1_WINS.load(Relaxed),
),
(
"fallback_2x2_growth_or_det",
FALLBACK_2X2_GROWTH_OR_DET.load(Relaxed),
),
("scalar_tail_steps", SCALAR_TAIL_STEPS.load(Relaxed)),
("pivots_inline", PIVOTS_INLINE.load(Relaxed)),
("pivots_scalar", PIVOTS_SCALAR.load(Relaxed)),
("inline_2x2_swap_ok", INLINE_2X2_SWAP_OK.load(Relaxed)),
(
"scalar_2x2_delay_growth",
SCALAR_2X2_DELAY_GROWTH.load(Relaxed),
),
("scalar_2x2_delay_det", SCALAR_2X2_DELAY_DET.load(Relaxed)),
("scalar_2x2_delay_both", SCALAR_2X2_DELAY_BOTH.load(Relaxed)),
("scalar_1x1_delay", SCALAR_1X1_DELAY.load(Relaxed)),
(
"scalar_2x2_delay_negdet",
SCALAR_2X2_DELAY_NEGDET.load(Relaxed),
),
("scalar_1x1_delay_tiny", SCALAR_1X1_DELAY_TINY.load(Relaxed)),
]
}
}
pub static PHASE_TIMING_ENABLED: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(false);
pub mod phase_timing {
use std::sync::atomic::{AtomicU64, Ordering::Relaxed};
use std::time::Instant;
pub static ASSEMBLY_NS: AtomicU64 = AtomicU64::new(0);
pub static DENSEFACTOR_NS: AtomicU64 = AtomicU64::new(0);
pub static PANELFACTOR_NS: AtomicU64 = AtomicU64::new(0);
pub static SCHUR_NS: AtomicU64 = AtomicU64::new(0);
pub static SCALARTAIL_NS: AtomicU64 = AtomicU64::new(0);
pub static BUILDROW_NS: AtomicU64 = AtomicU64::new(0);
pub static SCATTER_NS: AtomicU64 = AtomicU64::new(0);
pub static EXTENDADD_NS: AtomicU64 = AtomicU64::new(0);
pub static LEXTRACT_NS: AtomicU64 = AtomicU64::new(0);
pub static CONTRIBEXTRACT_NS: AtomicU64 = AtomicU64::new(0);
pub static CONTRIBZEROFILL_NS: AtomicU64 = AtomicU64::new(0);
pub fn reset() {
for c in [
&ASSEMBLY_NS,
&DENSEFACTOR_NS,
&PANELFACTOR_NS,
&SCHUR_NS,
&SCALARTAIL_NS,
&BUILDROW_NS,
&SCATTER_NS,
&EXTENDADD_NS,
&LEXTRACT_NS,
&CONTRIBEXTRACT_NS,
&CONTRIBZEROFILL_NS,
] {
c.store(0, Relaxed);
}
}
pub fn snapshot() -> (u64, u64, u64, u64, u64) {
(
ASSEMBLY_NS.load(Relaxed),
DENSEFACTOR_NS.load(Relaxed),
PANELFACTOR_NS.load(Relaxed),
SCHUR_NS.load(Relaxed),
SCALARTAIL_NS.load(Relaxed),
)
}
pub fn snapshot_detail() -> (u64, u64, u64, u64, u64) {
(
BUILDROW_NS.load(Relaxed),
SCATTER_NS.load(Relaxed),
EXTENDADD_NS.load(Relaxed),
LEXTRACT_NS.load(Relaxed),
CONTRIBEXTRACT_NS.load(Relaxed),
)
}
pub fn snapshot_contrib_zerofill() -> u64 {
CONTRIBZEROFILL_NS.load(Relaxed)
}
#[inline]
pub fn start() -> Option<Instant> {
super::PHASE_TIMING_ENABLED.load(Relaxed).then(Instant::now)
}
#[inline]
pub fn stop(counter: &AtomicU64, started: Option<Instant>) {
if let Some(t) = started {
counter.fetch_add(t.elapsed().as_nanos() as u64, Relaxed);
}
}
}
#[derive(Default, Debug, Clone)]
pub struct FactorScratch {
pub subdiag: Vec<f64>,
pub d_panel: Vec<f64>,
pub contrib_pool: Option<Vec<f64>>,
}
impl FactorScratch {
pub fn new() -> Self {
Self::default()
}
}
#[inline(always)]
fn diag_inc(counter: &std::sync::atomic::AtomicU64) {
if PANEL_DIAG_ENABLED.load(std::sync::atomic::Ordering::Relaxed) {
counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
}
#[inline(always)]
fn diag_add(counter: &std::sync::atomic::AtomicU64, n: u64) {
if PANEL_DIAG_ENABLED.load(std::sync::atomic::Ordering::Relaxed) {
counter.fetch_add(n, std::sync::atomic::Ordering::Relaxed);
}
}
const SSIDS_DET_SMALL: f64 = 1e-20;
const L_GROWTH_THRESHOLD: f64 = 1e6;
fn flag_growth_for_refinement(l: &[f64], needs_refinement: &mut bool) {
if *needs_refinement {
return;
}
for &v in l {
if v.abs() > L_GROWTH_THRESHOLD {
*needs_refinement = true;
return;
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum TppMethod {
#[default]
Plain,
Maxfromm,
}
#[derive(Debug, Clone)]
pub struct BunchKaufmanParams {
pub alpha: f64,
pub zero_tol: f64,
pub zero_tol_2x2: f64,
pub null_pivot_tol: f64,
pub null_pivot_tol_2x2: f64,
pub on_zero_pivot: ZeroPivotAction,
pub pivot_threshold: f64,
pub block_size: usize,
pub fma: bool,
pub tpp_method: TppMethod,
pub static_pivot_floor: f64,
}
#[derive(Debug, Clone)]
pub enum ZeroPivotAction {
ForceAccept,
Fail,
PerturbToEps { abs_floor: f64 },
}
#[inline]
fn perturb_to_floor(d: f64, abs_floor: f64) -> f64 {
let mag = d.abs().max(abs_floor);
if d < 0.0 {
-mag
} else {
mag
}
}
#[inline]
fn perturb_2x2_to_floor(a00: f64, a10: f64, a11: f64, abs_floor: f64) -> Option<(f64, f64)> {
if abs_floor <= 0.0 {
return None;
}
let (lam1, lam2) = sym2_eigenvalues(a00, a10, a11);
let (lam_small, lam_other) = if lam1.abs() <= lam2.abs() {
(lam1, lam2)
} else {
(lam2, lam1)
};
if lam_small.abs() >= abs_floor {
return None;
}
let target = if lam_small > 0.0 {
abs_floor
} else if lam_small < 0.0 {
-abs_floor
} else if lam_other > 0.0 {
abs_floor
} else if lam_other < 0.0 {
-abs_floor
} else {
abs_floor
};
let tau = target - lam_small;
Some((a00 + tau, a11 + tau))
}
impl Default for BunchKaufmanParams {
fn default() -> Self {
let zero_tol = f64::EPSILON;
Self {
alpha: (1.0 + 17f64.sqrt()) / 8.0, zero_tol,
zero_tol_2x2: zero_tol * zero_tol,
null_pivot_tol: zero_tol,
null_pivot_tol_2x2: zero_tol * zero_tol,
on_zero_pivot: ZeroPivotAction::Fail,
pivot_threshold: 0.0,
block_size: 64,
fma: false,
tpp_method: TppMethod::Plain,
static_pivot_floor: 0.0,
}
}
}
#[derive(Debug)]
pub struct Factors {
pub n: usize,
pub l: Vec<f64>,
pub d_diag: Vec<f64>,
pub d_subdiag: Vec<f64>,
pub perm: Vec<usize>,
pub perm_inv: Vec<usize>,
pub d_eq: Vec<f64>,
pub needs_refinement: bool,
pub zero_tol: f64,
pub zero_tol_2x2: f64,
}
pub fn factor(
matrix: &crate::dense::matrix::SymmetricMatrix,
params: &BunchKaufmanParams,
) -> Result<(Factors, Inertia), FeralError> {
matrix.validate()?;
let n = matrix.n;
let d_eq = crate::dense::equilibrate::equilibrate_scaling(matrix);
let mut a = vec![0.0; n * n];
for j in 0..n {
for i in j..n {
a[j * n + i] = d_eq[i] * matrix.data[j * n + i] * d_eq[j];
}
}
let mut perm: Vec<usize> = (0..n).collect();
let mut subdiag = vec![0.0; n];
let mut pos = 0usize;
let mut neg = 0usize;
let mut zero = 0usize;
let mut needs_refinement = false;
let mut n_tiny = 0usize;
let alpha = params.alpha;
let mut k = 0;
let mut fused_gamma0 = 0.0f64;
let mut fused_r = 0usize;
let mut have_fused = false;
while k < n {
let remaining = n - k;
if remaining == 1 {
let mut d = a[k * n + k];
if params.static_pivot_floor > 0.0 && d.abs() < params.static_pivot_floor {
d = perturb_to_floor(d, params.static_pivot_floor);
a[k * n + k] = d;
needs_refinement = true;
n_tiny += 1;
if d > 0.0 {
pos += 1;
} else {
neg += 1;
}
k += 1;
continue;
}
if d.abs() <= params.zero_tol {
match params.on_zero_pivot {
ZeroPivotAction::ForceAccept => {
needs_refinement = true;
zero += 1;
}
ZeroPivotAction::Fail => {
return Err(FeralError::NumericallyRankDeficient);
}
ZeroPivotAction::PerturbToEps { abs_floor } => {
let d_new = perturb_to_floor(d, abs_floor);
a[k * n + k] = d_new;
needs_refinement = true;
n_tiny += 1;
if d_new > 0.0 {
pos += 1;
} else {
neg += 1;
}
}
}
} else if matches!(params.on_zero_pivot, ZeroPivotAction::ForceAccept)
&& d.abs() <= params.null_pivot_tol
{
needs_refinement = true;
if d > 0.0 {
pos += 1;
} else {
neg += 1;
}
} else if d > 0.0 {
pos += 1;
} else {
neg += 1;
}
k += 1;
continue;
}
let (gamma0, r) = if have_fused {
have_fused = false;
(fused_gamma0, fused_r)
} else {
column_offdiag_max(&a, n, k)
};
if gamma0 == 0.0 {
count_1x1_inertia(
&mut a,
n,
k,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)?;
set_l_column_identity(&mut a, n, k);
k += 1;
continue;
}
let akk = a[k * n + k].abs();
if akk >= alpha * gamma0 {
let (ng, nr) = do_1x1_pivot(
&mut a,
n,
k,
gamma0,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)?;
fused_gamma0 = ng;
fused_r = nr;
have_fused = k + 1 < n;
k += 1;
continue;
}
let gamma_r = symmetric_row_offdiag_max(&a, n, k, r);
let arr = a[r * n + r].abs();
if arr >= alpha * gamma_r {
swap_rows_cols(&mut a, n, k, r, &mut perm);
let (ng, nr) = do_1x1_pivot(
&mut a,
n,
k,
gamma_r,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)?;
fused_gamma0 = ng;
fused_r = nr;
have_fused = k + 1 < n;
k += 1;
continue;
}
if akk * gamma_r >= alpha * gamma0 * gamma0 {
let (ng, nr) = do_1x1_pivot(
&mut a,
n,
k,
gamma0,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)?;
fused_gamma0 = ng;
fused_r = nr;
have_fused = k + 1 < n;
k += 1;
continue;
}
if r != k + 1 {
swap_rows_cols(&mut a, n, k + 1, r, &mut perm);
}
let d11_v = a[k * n + k];
let d21_v = a[k * n + (k + 1)];
let d22_v = a[(k + 1) * n + (k + 1)];
let det_v = d11_v * d22_v - d21_v * d21_v;
let absdet = det_v.abs();
let mut rmax = 0.0f64;
let mut tmax = 0.0f64;
for i in (k + 2)..n {
let v0 = a[k * n + i].abs();
if v0 > rmax {
rmax = v0;
}
let v1 = a[(k + 1) * n + i].abs();
if v1 > tmax {
tmax = v1;
}
}
let amax = d21_v.abs();
let u = params.pivot_threshold;
let growth_fail = (d22_v.abs() * rmax + amax * tmax) * u > absdet
|| (d11_v.abs() * tmax + amax * rmax) * u > absdet;
if growth_fail {
let (ng, nr) = do_1x1_pivot(
&mut a,
n,
k,
gamma0,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)?;
fused_gamma0 = ng;
fused_r = nr;
have_fused = k + 1 < n;
k += 1;
continue;
}
let (ng, nr) = do_2x2_pivot(
&mut a,
n,
k,
&mut subdiag,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)?;
fused_gamma0 = ng;
fused_r = nr;
have_fused = k + 2 < n;
k += 2;
}
let mut l = vec![0.0; n * n];
let mut d_diag = vec![0.0; n];
let mut j = 0;
while j < n {
d_diag[j] = a[j * n + j];
l[j * n + j] = 1.0;
if j + 1 < n && subdiag[j] != 0.0 {
d_diag[j + 1] = a[(j + 1) * n + (j + 1)];
l[(j + 1) * n + (j + 1)] = 1.0;
for i in (j + 2)..n {
l[j * n + i] = a[j * n + i];
l[(j + 1) * n + i] = a[(j + 1) * n + i];
}
j += 2;
} else {
for i in (j + 1)..n {
l[j * n + i] = a[j * n + i];
}
j += 1;
}
}
let mut perm_inv = vec![0usize; n];
for (i, &p) in perm.iter().enumerate() {
perm_inv[p] = i;
}
let inertia = Inertia::new(pos, neg, zero);
flag_growth_for_refinement(&l, &mut needs_refinement);
Ok((
Factors {
n,
l,
d_diag,
d_subdiag: subdiag,
perm,
perm_inv,
d_eq,
needs_refinement,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
},
inertia,
))
}
pub fn factor_single_front(
matrix: &crate::dense::matrix::SymmetricMatrix,
params: &BunchKaufmanParams,
) -> Result<(Factors, Inertia), FeralError> {
matrix.validate()?;
let n = matrix.n;
let d_eq = crate::dense::equilibrate::equilibrate_scaling(matrix);
let mut eq_data = vec![0.0; n * n];
for j in 0..n {
for i in j..n {
eq_data[j * n + i] = d_eq[i] * matrix.data[j * n + i] * d_eq[j];
}
}
let eq_matrix = crate::dense::matrix::SymmetricMatrix { n, data: eq_data };
let front = factor_frontal_blocked(&eq_matrix, n, false, params)?;
debug_assert_eq!(front.nelim, n);
debug_assert_eq!(front.n_delayed, 0);
debug_assert_eq!(front.contrib_dim, 0);
let inertia = front.inertia;
let factors = Factors {
n,
l: front.l,
d_diag: front.d_diag,
d_subdiag: front.d_subdiag,
perm: front.perm,
perm_inv: front.perm_inv,
d_eq,
needs_refinement: front.needs_refinement,
zero_tol: front.zero_tol,
zero_tol_2x2: front.zero_tol_2x2,
};
Ok((factors, inertia))
}
#[derive(Debug)]
pub struct FrontalFactors {
pub nrow: usize,
pub ncol: usize,
pub nelim: usize,
pub l: Vec<f64>,
pub d_diag: Vec<f64>,
pub d_subdiag: Vec<f64>,
pub perm: Vec<usize>,
pub perm_inv: Vec<usize>,
pub contrib: Vec<f64>,
pub contrib_dim: usize,
pub n_delayed: usize,
pub inertia: Inertia,
pub needs_refinement: bool,
pub n_rook_rescues: usize,
pub n_tiny: usize,
pub zero_tol: f64,
pub zero_tol_2x2: f64,
}
#[derive(Debug, Clone, Copy, PartialEq)]
enum PivotOutcome {
Accepted,
Rejected,
Delayed,
AcceptedRook2x2 { d11: f64, d21: f64, d22: f64 },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PivotStepResult {
Advanced(usize),
Delayed,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PanelStatus {
Full,
ScalarFallback,
ScalarFallbackPeekedNext,
Delayed,
}
#[doc(hidden)]
#[derive(Default, Debug, Clone, Copy)]
pub struct FrontalProfile {
pub alloc_copy_ns: u128,
pub setup_ns: u128,
pub pivot_loop_ns: u128,
pub extract_ns: u128,
pub n_calls: u64,
}
pub fn factor_frontal(
matrix: &crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
) -> Result<FrontalFactors, FeralError> {
factor_frontal_with_profile(matrix, ncol, may_delay, params, None)
}
#[doc(hidden)]
pub fn factor_frontal_with_profile(
matrix: &crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
mut profile: Option<&mut FrontalProfile>,
) -> Result<FrontalFactors, FeralError> {
matrix.validate()?;
let nrow = matrix.n;
if ncol > nrow {
return Err(FeralError::InvalidInput(format!(
"ncol {} > nrow {}",
ncol, nrow
)));
}
if ncol == 0 {
return Ok(FrontalFactors {
nrow,
ncol: 0,
nelim: 0,
l: Vec::new(),
d_diag: Vec::new(),
d_subdiag: Vec::new(),
perm: (0..nrow).collect(),
perm_inv: (0..nrow).collect(),
contrib: matrix.data.clone(),
contrib_dim: nrow,
n_delayed: 0,
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
needs_refinement: false,
n_rook_rescues: 0,
n_tiny: 0,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
});
}
let t0 = profile.as_ref().map(|_| std::time::Instant::now());
let mut scratch_data = vec![0.0; nrow * nrow];
for j in 0..nrow {
for i in j..nrow {
scratch_data[j * nrow + i] = matrix.data[j * nrow + i];
}
}
let mut scratch_matrix = crate::dense::matrix::SymmetricMatrix {
n: nrow,
data: scratch_data,
};
if let (Some(p), Some(t)) = (profile.as_deref_mut(), t0) {
p.alloc_copy_ns += t.elapsed().as_nanos();
}
let mut tmp_scratch = FactorScratch::new();
factor_frontal_in_place_with_scratch_impl(
&mut scratch_matrix,
ncol,
may_delay,
params,
&mut tmp_scratch,
profile,
)
}
pub(crate) fn factor_frontal_in_place_with_scratch(
matrix: &mut crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
scratch: &mut FactorScratch,
) -> Result<FrontalFactors, FeralError> {
factor_frontal_in_place_with_scratch_impl(matrix, ncol, may_delay, params, scratch, None)
}
#[allow(clippy::too_many_arguments)]
fn factor_frontal_in_place_with_scratch_impl(
matrix: &mut crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
scratch: &mut FactorScratch,
mut profile: Option<&mut FrontalProfile>,
) -> Result<FrontalFactors, FeralError> {
let nrow = matrix.n;
if ncol > nrow {
return Err(FeralError::InvalidInput(format!(
"ncol {} > nrow {}",
ncol, nrow
)));
}
if ncol == 0 {
return Ok(FrontalFactors {
nrow,
ncol: 0,
nelim: 0,
l: Vec::new(),
d_diag: Vec::new(),
d_subdiag: Vec::new(),
perm: (0..nrow).collect(),
perm_inv: (0..nrow).collect(),
contrib: matrix.data.clone(),
contrib_dim: nrow,
n_delayed: 0,
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
needs_refinement: false,
n_rook_rescues: 0,
n_tiny: 0,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
});
}
debug_assert!(
matrix.data.iter().enumerate().all(|(idx, &v)| {
let j = idx / nrow;
let i = idx % nrow;
if i < j {
true
} else {
v.is_finite()
}
}),
"factor_frontal_in_place_with_scratch: lower triangle contains NaN/Inf"
);
let a: &mut [f64] = matrix.data.as_mut_slice();
let t0 = profile.as_ref().map(|_| std::time::Instant::now());
let mut perm: Vec<usize> = (0..nrow).collect();
scratch.subdiag.clear();
scratch.subdiag.resize(nrow, 0.0);
let subdiag: &mut [f64] = scratch.subdiag.as_mut_slice();
let mut pos = 0usize;
let mut neg = 0usize;
let mut zero = 0usize;
let mut needs_refinement = false;
let mut n_rook_rescues = 0usize;
let mut n_tiny = 0usize;
if let (Some(p), Some(t)) = (profile.as_deref_mut(), t0) {
p.setup_ns += t.elapsed().as_nanos();
}
let t_pivot = profile.as_ref().map(|_| std::time::Instant::now());
let mut k = 0;
let mut cached_maxfromm: Option<f64> = None;
let mut ncol_eff = ncol;
while k < ncol_eff {
match scalar_pivot_step(
a,
nrow,
ncol_eff,
k,
may_delay,
params,
&mut perm,
subdiag,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_rook_rescues,
&mut n_tiny,
&mut cached_maxfromm,
)? {
PivotStepResult::Advanced(n) => k += n,
PivotStepResult::Delayed => {
delay_swap_to_boundary(a, nrow, k, &mut ncol_eff, &mut perm);
cached_maxfromm = None;
}
}
}
if let (Some(p), Some(t)) = (profile.as_deref_mut(), t_pivot) {
p.pivot_loop_ns += t.elapsed().as_nanos();
}
let t_extract = profile.as_ref().map(|_| std::time::Instant::now());
let nelim = k;
let n_delayed = ncol - nelim;
let _pt_lx = phase_timing::start();
let mut l = vec![0.0; nrow * nelim];
let mut d_diag = vec![0.0; nelim];
let mut j = 0;
while j < nelim {
d_diag[j] = a[j * nrow + j];
l[j * nrow + j] = 1.0;
if j + 1 < nelim && subdiag[j] != 0.0 {
d_diag[j + 1] = a[(j + 1) * nrow + (j + 1)];
l[(j + 1) * nrow + (j + 1)] = 1.0;
for i in (j + 2)..nrow {
l[j * nrow + i] = a[j * nrow + i];
l[(j + 1) * nrow + i] = a[(j + 1) * nrow + i];
}
j += 2;
} else {
for i in (j + 1)..nrow {
l[j * nrow + i] = a[j * nrow + i];
}
j += 1;
}
}
phase_timing::stop(&phase_timing::LEXTRACT_NS, _pt_lx);
let _pt_cx = phase_timing::start();
let cdim = nrow - nelim;
let cdim2 = cdim * cdim;
let mut contrib = scratch.contrib_pool.take().unwrap_or_default();
contrib.clear();
contrib.reserve(cdim2);
let _pt_zf = phase_timing::start();
unsafe {
contrib.set_len(cdim2);
}
phase_timing::stop(&phase_timing::CONTRIBZEROFILL_NS, _pt_zf);
{
let slice: &mut [f64] = contrib.as_mut_slice();
for cj in 0..cdim {
let col_base = cj * cdim;
for ci in 0..cj {
slice[col_base + ci] = 0.0;
}
for ci in cj..cdim {
slice[col_base + ci] = a[(nelim + cj) * nrow + (nelim + ci)];
}
}
}
phase_timing::stop(&phase_timing::CONTRIBEXTRACT_NS, _pt_cx);
let mut perm_inv = vec![0usize; nrow];
for (i, &p) in perm.iter().enumerate() {
perm_inv[p] = i;
}
flag_growth_for_refinement(&l, &mut needs_refinement);
let result = FrontalFactors {
nrow,
ncol,
nelim,
l,
d_diag,
d_subdiag: subdiag[..nelim].to_vec(),
perm,
perm_inv,
contrib,
contrib_dim: cdim,
n_delayed,
inertia: Inertia::new(pos, neg, zero),
needs_refinement,
n_rook_rescues,
n_tiny,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
};
if let (Some(p), Some(t)) = (profile, t_extract) {
p.extract_ns += t.elapsed().as_nanos();
p.n_calls += 1;
}
Ok(result)
}
pub fn factor_frontal_blocked(
matrix: &crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
) -> Result<FrontalFactors, FeralError> {
matrix.validate()?;
let nrow = matrix.n;
let mut scratch_data = vec![0.0; nrow * nrow];
for j in 0..nrow {
for i in j..nrow {
scratch_data[j * nrow + i] = matrix.data[j * nrow + i];
}
}
let mut scratch = crate::dense::matrix::SymmetricMatrix {
n: nrow,
data: scratch_data,
};
factor_frontal_blocked_in_place(&mut scratch, ncol, may_delay, params)
}
pub fn factor_frontal_blocked_in_place(
matrix: &mut crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
) -> Result<FrontalFactors, FeralError> {
let mut scratch = FactorScratch::new();
factor_frontal_blocked_in_place_with_scratch(matrix, ncol, may_delay, params, &mut scratch)
}
pub fn factor_frontal_blocked_in_place_with_scratch(
matrix: &mut crate::dense::matrix::SymmetricMatrix,
ncol: usize,
may_delay: bool,
params: &BunchKaufmanParams,
scratch: &mut FactorScratch,
) -> Result<FrontalFactors, FeralError> {
let nrow = matrix.n;
if ncol > nrow {
return Err(FeralError::InvalidInput(format!(
"ncol {} > nrow {}",
ncol, nrow
)));
}
if ncol == 0 {
return Ok(FrontalFactors {
nrow,
ncol: 0,
nelim: 0,
l: Vec::new(),
d_diag: Vec::new(),
d_subdiag: Vec::new(),
perm: (0..nrow).collect(),
perm_inv: (0..nrow).collect(),
contrib: matrix.data.clone(),
contrib_dim: nrow,
n_delayed: 0,
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
needs_refinement: false,
n_rook_rescues: 0,
n_tiny: 0,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
});
}
if FORCE_SCALAR_FRONTAL.load(std::sync::atomic::Ordering::Relaxed) {
return factor_frontal(matrix, ncol, may_delay, params);
}
if nrow == crate::dense::block_ldlt32::BLOCK_SIZE
&& ncol == crate::dense::block_ldlt32::BLOCK_SIZE
{
return crate::dense::block_ldlt32::factor_block32(matrix, ncol, may_delay, params);
}
const PANEL_MIN_NCOL: usize = 8;
let bs = params.block_size.min(ncol);
if bs < 2 || ncol < PANEL_MIN_NCOL {
return factor_frontal_in_place_with_scratch(matrix, ncol, may_delay, params, scratch);
}
let a: &mut [f64] = matrix.data.as_mut_slice();
let mut perm: Vec<usize> = (0..nrow).collect();
scratch.subdiag.clear();
scratch.subdiag.resize(nrow, 0.0);
scratch.d_panel.clear();
scratch.d_panel.resize(bs, 0.0);
let subdiag: &mut [f64] = scratch.subdiag.as_mut_slice();
let d_panel: &mut [f64] = scratch.d_panel.as_mut_slice();
let mut pos = 0usize;
let mut neg = 0usize;
let mut zero = 0usize;
let mut needs_refinement = false;
let mut n_rook_rescues = 0usize;
let mut n_tiny = 0usize;
let mut cached_maxfromm: Option<f64> = None;
let mut k = 0;
let mut ncol_eff = ncol;
while k < ncol_eff {
let remaining = ncol_eff - k;
if remaining < PANEL_MIN_NCOL {
diag_inc(&panel_diag::SCALAR_TAIL_STEPS);
let _pt = phase_timing::start();
let step = scalar_pivot_step(
&mut *a,
nrow,
ncol_eff,
k,
may_delay,
params,
&mut perm,
&mut *subdiag,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_rook_rescues,
&mut n_tiny,
&mut cached_maxfromm,
)?;
phase_timing::stop(&phase_timing::SCALARTAIL_NS, _pt);
match step {
PivotStepResult::Advanced(n) => {
diag_add(&panel_diag::PIVOTS_SCALAR, n as u64);
k += n;
}
PivotStepResult::Delayed => {
delay_swap_to_boundary(&mut *a, nrow, k, &mut ncol_eff, &mut perm);
cached_maxfromm = None;
}
}
continue;
}
let panel_cap = bs.min(remaining);
cached_maxfromm = None;
let _pt = phase_timing::start();
let (n_elim, status) = lblt_panel_frontal(
&mut *a,
nrow,
ncol_eff,
k,
panel_cap,
may_delay,
params,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
&mut *d_panel,
&mut *subdiag,
&mut perm,
)?;
phase_timing::stop(&phase_timing::PANELFACTOR_NS, _pt);
let j_start = match status {
PanelStatus::Full => k + n_elim,
PanelStatus::ScalarFallback | PanelStatus::Delayed => k + n_elim + 1,
PanelStatus::ScalarFallbackPeekedNext => k + n_elim + 2,
};
let _pt = phase_timing::start();
apply_blocked_schur(
&mut *a, nrow, k, n_elim, j_start, &*d_panel, &*subdiag, params.fma,
);
phase_timing::stop(&phase_timing::SCHUR_NS, _pt);
k += n_elim;
diag_add(&panel_diag::PIVOTS_INLINE, n_elim as u64);
match status {
PanelStatus::Full => diag_inc(&panel_diag::PANEL_FULL),
PanelStatus::ScalarFallback | PanelStatus::ScalarFallbackPeekedNext => {
diag_inc(&panel_diag::PANEL_PARTIAL)
}
PanelStatus::Delayed => diag_inc(&panel_diag::PANEL_DELAYED),
}
match status {
PanelStatus::Full => {}
PanelStatus::ScalarFallback | PanelStatus::ScalarFallbackPeekedNext => {
if k >= ncol_eff {
break;
}
match scalar_pivot_step(
&mut *a,
nrow,
ncol_eff,
k,
may_delay,
params,
&mut perm,
&mut *subdiag,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_rook_rescues,
&mut n_tiny,
&mut cached_maxfromm,
)? {
PivotStepResult::Advanced(n) => {
diag_add(&panel_diag::PIVOTS_SCALAR, n as u64);
k += n;
}
PivotStepResult::Delayed => {
delay_swap_to_boundary(&mut *a, nrow, k, &mut ncol_eff, &mut perm);
cached_maxfromm = None;
}
}
}
PanelStatus::Delayed => {
delay_swap_to_boundary(&mut *a, nrow, k, &mut ncol_eff, &mut perm);
cached_maxfromm = None;
}
}
}
let nelim = k;
let n_delayed = ncol - nelim;
let _pt_lx = phase_timing::start();
let mut l = vec![0.0; nrow * nelim];
let mut d_diag = vec![0.0; nelim];
let mut j = 0;
while j < nelim {
d_diag[j] = a[j * nrow + j];
l[j * nrow + j] = 1.0;
if j + 1 < nelim && subdiag[j] != 0.0 {
d_diag[j + 1] = a[(j + 1) * nrow + (j + 1)];
l[(j + 1) * nrow + (j + 1)] = 1.0;
for i in (j + 2)..nrow {
l[j * nrow + i] = a[j * nrow + i];
l[(j + 1) * nrow + i] = a[(j + 1) * nrow + i];
}
j += 2;
} else {
for i in (j + 1)..nrow {
l[j * nrow + i] = a[j * nrow + i];
}
j += 1;
}
}
phase_timing::stop(&phase_timing::LEXTRACT_NS, _pt_lx);
let _pt_cx = phase_timing::start();
let cdim = nrow - nelim;
let cdim2 = cdim * cdim;
let mut contrib = scratch.contrib_pool.take().unwrap_or_default();
contrib.clear();
contrib.reserve(cdim2);
let _pt_zf = phase_timing::start();
unsafe {
contrib.set_len(cdim2);
}
phase_timing::stop(&phase_timing::CONTRIBZEROFILL_NS, _pt_zf);
{
let slice: &mut [f64] = contrib.as_mut_slice();
for cj in 0..cdim {
let col_base = cj * cdim;
for ci in 0..cj {
slice[col_base + ci] = 0.0;
}
for ci in cj..cdim {
slice[col_base + ci] = a[(nelim + cj) * nrow + (nelim + ci)];
}
}
}
phase_timing::stop(&phase_timing::CONTRIBEXTRACT_NS, _pt_cx);
let mut perm_inv = vec![0usize; nrow];
for (i, &p) in perm.iter().enumerate() {
perm_inv[p] = i;
}
flag_growth_for_refinement(&l, &mut needs_refinement);
Ok(FrontalFactors {
nrow,
ncol,
nelim,
l,
d_diag,
d_subdiag: subdiag[..nelim].to_vec(),
perm,
perm_inv,
contrib,
contrib_dim: cdim,
n_delayed,
inertia: Inertia::new(pos, neg, zero),
needs_refinement,
n_rook_rescues,
n_tiny,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
})
}
pub fn factor_diagonal(
matrix: &crate::dense::matrix::SymmetricMatrix,
params: &BunchKaufmanParams,
) -> Result<(Factors, Inertia), FeralError> {
matrix.validate()?;
let n = matrix.n;
let d_eq = crate::dense::equilibrate::equilibrate_scaling(matrix);
let mut scratch_data = vec![0.0; n * n];
for j in 0..n {
for i in j..n {
scratch_data[j * n + i] = d_eq[i] * matrix.data[j * n + i] * d_eq[j];
}
}
let mut scratch = crate::dense::matrix::SymmetricMatrix {
n,
data: scratch_data,
};
let frontal = factor_frontal_diagonal_in_place(&mut scratch, n, params)?;
debug_assert_eq!(frontal.nelim, n, "SQD must eliminate every column");
debug_assert_eq!(frontal.contrib_dim, 0, "SQD leaves no contribution");
Ok((
Factors {
n,
l: frontal.l,
d_diag: frontal.d_diag,
d_subdiag: frontal.d_subdiag,
perm: frontal.perm,
perm_inv: frontal.perm_inv,
d_eq,
needs_refinement: frontal.needs_refinement,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
},
frontal.inertia,
))
}
pub fn factor_frontal_diagonal_in_place(
matrix: &mut crate::dense::matrix::SymmetricMatrix,
ncol: usize,
params: &BunchKaufmanParams,
) -> Result<FrontalFactors, FeralError> {
let nrow = matrix.n;
if ncol > nrow {
return Err(FeralError::InvalidInput(format!(
"ncol {} > nrow {}",
ncol, nrow
)));
}
if ncol == 0 {
return Ok(FrontalFactors {
nrow,
ncol: 0,
nelim: 0,
l: Vec::new(),
d_diag: Vec::new(),
d_subdiag: Vec::new(),
perm: (0..nrow).collect(),
perm_inv: (0..nrow).collect(),
contrib: matrix.data.clone(),
contrib_dim: nrow,
n_delayed: 0,
inertia: Inertia {
positive: 0,
negative: 0,
zero: 0,
},
needs_refinement: false,
n_rook_rescues: 0,
n_tiny: 0,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
});
}
let a = matrix.data.as_mut_slice();
let mut pos = 0usize;
let mut neg = 0usize;
let fma = params.fma;
let l_growth_bound = 1.0 / f64::EPSILON.sqrt();
for k in 0..ncol {
let d = a[k * nrow + k];
if d.abs() <= params.zero_tol {
return Err(FeralError::SqdContractViolated {
column: k,
pivot: d,
});
}
do_1x1_update(a, nrow, k, fma);
let col_base = k * nrow;
let mut max_l: f64 = 0.0;
for i in (k + 1)..nrow {
let v = a[col_base + i].abs();
if v > max_l {
max_l = v;
}
}
if max_l > l_growth_bound {
return Err(FeralError::SqdContractViolated {
column: k,
pivot: d,
});
}
if d > 0.0 {
pos += 1;
} else {
neg += 1;
}
}
let nelim = ncol;
let mut l = vec![0.0; nrow * nelim];
let mut d_diag = vec![0.0; nelim];
for j in 0..nelim {
d_diag[j] = a[j * nrow + j];
l[j * nrow + j] = 1.0;
for i in (j + 1)..nrow {
l[j * nrow + i] = a[j * nrow + i];
}
}
let cdim = nrow - nelim;
let mut contrib = vec![0.0; cdim * cdim];
for cj in 0..cdim {
for ci in cj..cdim {
contrib[cj * cdim + ci] = a[(nelim + cj) * nrow + (nelim + ci)];
}
}
let perm: Vec<usize> = (0..nrow).collect();
let perm_inv = perm.clone();
let mut needs_refinement = false;
flag_growth_for_refinement(&l, &mut needs_refinement);
Ok(FrontalFactors {
nrow,
ncol,
nelim,
l,
d_diag,
d_subdiag: vec![0.0; nelim],
perm,
perm_inv,
contrib,
contrib_dim: cdim,
n_delayed: 0,
inertia: Inertia::new(pos, neg, 0),
needs_refinement,
n_rook_rescues: 0,
n_tiny: 0,
zero_tol: params.zero_tol,
zero_tol_2x2: params.zero_tol_2x2,
})
}
#[allow(clippy::too_many_arguments)]
fn lblt_panel_frontal(
a: &mut [f64],
nrow: usize,
ncol: usize,
k: usize,
bs: usize,
may_delay: bool,
params: &BunchKaufmanParams,
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_tiny: &mut usize,
d_panel: &mut [f64],
subdiag: &mut [f64],
perm: &mut [usize],
) -> Result<(usize, PanelStatus), FeralError> {
let alpha_bk = params.alpha;
let cap = bs;
let mut c = 0usize;
while c < cap {
let col = k + c;
peek_ahead_column(a, nrow, k, c, d_panel, subdiag, params.fma);
let mut gamma0 = 0.0f64;
let mut r = col + 1;
for i in (col + 1)..nrow {
let v = a[col * nrow + i].abs();
if v > gamma0 {
gamma0 = v;
r = i;
}
}
if gamma0 == 0.0 {
count_1x1_inertia(
a,
nrow,
col,
params,
pos,
neg,
zero,
needs_refinement,
n_tiny,
)?;
set_l_column_identity(a, nrow, col);
d_panel[c] = a[col * nrow + col];
c += 1;
continue;
}
let akk = a[col * nrow + col].abs();
if akk < alpha_bk * gamma0 {
let need_swap = r > col + 1;
let bounds_ok = col + 1 < ncol
&& c + 1 < cap
&& (!need_swap || r < ncol)
&& !DISABLE_PANEL_INLINE_2X2.load(std::sync::atomic::Ordering::Relaxed);
let allowed = bounds_ok && (!need_swap || c == 0);
if !allowed {
diag_inc(&panel_diag::FALLBACK_2X2_NEED_SWAP_OR_BOUND);
return Ok((c, PanelStatus::ScalarFallback));
}
if need_swap {
let gamma_r_pre = symmetric_row_offdiag_max(a, nrow, col, r);
let arr_pre = a[r * nrow + r].abs();
if arr_pre >= alpha_bk * gamma_r_pre {
diag_inc(&panel_diag::FALLBACK_2X2_SWAP_1X1_WINS);
return Ok((c, PanelStatus::ScalarFallback));
}
if akk * gamma_r_pre >= alpha_bk * gamma0 * gamma0 {
diag_inc(&panel_diag::FALLBACK_2X2_LAPACK_1X1_WINS);
return Ok((c, PanelStatus::ScalarFallback));
}
swap_rows_cols(a, nrow, col + 1, r, perm);
} else {
let r_idx = col + 1;
peek_ahead_replay(a, nrow, k, c, r_idx, d_panel, subdiag, params.fma);
let gamma_r = symmetric_row_offdiag_max(a, nrow, col, r_idx);
let arr = a[r_idx * nrow + r_idx].abs();
if arr >= alpha_bk * gamma_r {
diag_inc(&panel_diag::FALLBACK_2X2_SWAP_1X1_WINS);
return Ok((c, PanelStatus::ScalarFallbackPeekedNext));
}
if akk * gamma_r >= alpha_bk * gamma0 * gamma0 {
diag_inc(&panel_diag::FALLBACK_2X2_LAPACK_1X1_WINS);
return Ok((c, PanelStatus::ScalarFallbackPeekedNext));
}
}
let d11 = a[col * nrow + col];
let d21 = a[col * nrow + (col + 1)];
let d22 = a[(col + 1) * nrow + (col + 1)];
let det = d11 * d22 - d21 * d21;
let mut rmax = 0.0f64;
let mut tmax = 0.0f64;
for i in (col + 2)..nrow {
let v0 = a[col * nrow + i].abs();
if v0 > rmax {
rmax = v0;
}
let v1 = a[(col + 1) * nrow + i].abs();
if v1 > tmax {
tmax = v1;
}
}
let amax = d21.abs();
let absdet = det.abs();
let u = params.pivot_threshold;
let growth_fail = (d22.abs() * rmax + amax * tmax) * u > absdet
|| (d11.abs() * tmax + amax * rmax) * u > absdet;
let max_piv = d11.abs().max(d21.abs()).max(d22.abs());
let det_floor_fail = if max_piv < SSIDS_DET_SMALL {
true
} else {
let det_scale = 1.0 / max_piv;
let detpiv0 = (d11 * det_scale) * d22;
let detpiv1 = (d21 * det_scale) * d21;
let detpiv = detpiv0 - detpiv1;
let cancel_floor = SSIDS_DET_SMALL
.max(detpiv0.abs() * 0.5)
.max(detpiv1.abs() * 0.5);
detpiv.abs() < cancel_floor
};
if growth_fail || det_floor_fail {
diag_inc(&panel_diag::FALLBACK_2X2_GROWTH_OR_DET);
if need_swap {
return Ok((c, PanelStatus::ScalarFallback));
}
return Ok((c, PanelStatus::ScalarFallbackPeekedNext));
}
if need_swap {
diag_inc(&panel_diag::INLINE_2X2_SWAP_OK);
}
let pivot_inertia = count_2x2_inertia_val(d11, d21, d22);
*pos += pivot_inertia.positive;
*neg += pivot_inertia.negative;
*zero += pivot_inertia.zero;
d_panel[c] = d11;
d_panel[c + 1] = d22;
subdiag[k + c] = d21;
if det.abs() != 0.0 {
let inv_det = 1.0 / det;
for i in (col + 2)..nrow {
let a_ik = a[col * nrow + i];
let a_ik1 = a[(col + 1) * nrow + i];
a[col * nrow + i] = (d22 * a_ik - d21 * a_ik1) * inv_det;
a[(col + 1) * nrow + i] = (d11 * a_ik1 - d21 * a_ik) * inv_det;
}
}
c += 2;
continue;
}
let outcome = try_reject_1x1_frontal(
a,
nrow,
col,
gamma0,
may_delay,
params,
pos,
neg,
zero,
needs_refinement,
n_tiny,
)?;
match outcome {
PivotOutcome::Accepted => {
let d = a[col * nrow + col];
if d.abs() != 0.0 {
let inv_d = 1.0 / d;
for i in (col + 1)..nrow {
a[col * nrow + i] *= inv_d;
}
}
d_panel[c] = d;
}
PivotOutcome::Rejected => {
d_panel[c] = 0.0;
}
PivotOutcome::Delayed => {
return Ok((c, PanelStatus::Delayed));
}
PivotOutcome::AcceptedRook2x2 { .. } => {
unreachable!("panel path never enables rook rescue")
}
}
c += 1;
}
Ok((c, PanelStatus::Full))
}
fn peek_ahead_column(
a: &mut [f64],
nrow: usize,
k: usize,
c: usize,
d_panel: &[f64],
subdiag: &[f64],
fma: bool,
) {
peek_ahead_replay(a, nrow, k, c, k + c, d_panel, subdiag, fma);
}
#[allow(clippy::too_many_arguments)]
fn peek_ahead_replay(
a: &mut [f64],
nrow: usize,
k: usize,
n_committed: usize,
target_col: usize,
d_panel: &[f64],
subdiag: &[f64],
fma: bool,
) {
let col = target_col;
let mut q = 0usize;
while q < n_committed {
if q + 1 < n_committed && subdiag[k + q] != 0.0 {
let d11 = d_panel[q];
let d22 = d_panel[q + 1];
let d21 = subdiag[k + q];
let q_col = k + q;
let q1_col = k + q + 1;
let l_jq = a[q_col * nrow + col];
let l_jq1 = a[q1_col * nrow + col];
let dl_jq = d11 * l_jq + d21 * l_jq1;
let dl_jq1 = d21 * l_jq + d22 * l_jq1;
let (before, rest) = a.split_at_mut(col * nrow);
let src0 = &before[q_col * nrow + col..q_col * nrow + nrow];
let src1 = &before[q1_col * nrow + col..q1_col * nrow + nrow];
let dst = &mut rest[col..nrow];
if fma {
schur_kernel::axpy2_minus_unroll4(dst, src0, dl_jq, src1, dl_jq1);
} else {
schur_kernel::axpy2_minus_unroll4_nofma(dst, src0, dl_jq, src1, dl_jq1);
}
q += 2;
continue;
}
let d_q = d_panel[q];
if d_q.abs() == 0.0 {
q += 1;
continue;
}
let q_col = k + q;
let l_jk = a[q_col * nrow + col];
let alpha = l_jk * d_q;
if alpha == 0.0 {
q += 1;
continue;
}
let (before, rest) = a.split_at_mut(col * nrow);
let src = &before[q_col * nrow + col..q_col * nrow + nrow];
let dst = &mut rest[col..nrow];
if fma {
schur_kernel::axpy_minus_unroll4(dst, src, alpha);
} else {
schur_kernel::axpy_minus_unroll4_nofma(dst, src, alpha);
}
q += 1;
}
}
#[allow(clippy::too_many_arguments)]
fn apply_blocked_schur(
a: &mut [f64],
nrow: usize,
k: usize,
n_elim: usize,
j_start: usize,
d_panel: &[f64],
subdiag: &[f64],
fma: bool,
) {
if n_elim == 0 || j_start >= nrow {
return;
}
const W2_RANK_BS_MIN: usize = 2;
let any_zero_d = d_panel.iter().take(n_elim).any(|&d| d.abs() == 0.0);
let has_2x2 = subdiag[k..k + n_elim].iter().any(|&s| s != 0.0);
if !any_zero_d && !has_2x2 && n_elim >= W2_RANK_BS_MIN {
apply_blocked_schur_panel(a, nrow, k, n_elim, j_start, d_panel, fma);
return;
}
let mut q = 0usize;
while q < n_elim {
if q + 1 < n_elim && subdiag[k + q] != 0.0 {
let d11 = d_panel[q];
let d22 = d_panel[q + 1];
let d21 = subdiag[k + q];
let q_col = k + q;
let q1_col = k + q + 1;
for j in j_start..nrow {
let l_jq = a[q_col * nrow + j];
let l_jq1 = a[q1_col * nrow + j];
let dl_jq = d11 * l_jq + d21 * l_jq1;
let dl_jq1 = d21 * l_jq + d22 * l_jq1;
let (before, rest) = a.split_at_mut(j * nrow);
let src0 = &before[q_col * nrow + j..q_col * nrow + nrow];
let src1 = &before[q1_col * nrow + j..q1_col * nrow + nrow];
let dst = &mut rest[j..nrow];
if fma {
schur_kernel::axpy2_minus_unroll4(dst, src0, dl_jq, src1, dl_jq1);
} else {
schur_kernel::axpy2_minus_unroll4_nofma(dst, src0, dl_jq, src1, dl_jq1);
}
}
q += 2;
} else {
let d_q = d_panel[q];
if d_q.abs() == 0.0 {
q += 1;
continue;
}
let q_col = k + q;
for j in j_start..nrow {
let l_jk = a[q_col * nrow + j];
let alpha = l_jk * d_q;
if alpha == 0.0 {
continue;
}
let (before, rest) = a.split_at_mut(j * nrow);
let src = &before[q_col * nrow + j..q_col * nrow + nrow];
let dst = &mut rest[j..nrow];
if fma {
schur_kernel::axpy_minus_unroll4(dst, src, alpha);
} else {
schur_kernel::axpy_minus_unroll4_nofma(dst, src, alpha);
}
}
q += 1;
}
}
}
fn apply_blocked_schur_panel(
a: &mut [f64],
nrow: usize,
k: usize,
n_elim: usize,
j_start: usize,
d_panel: &[f64],
fma: bool,
) {
const MAX_N_ELIM: usize = 128;
assert!(
n_elim <= MAX_N_ELIM,
"apply_blocked_schur_panel: n_elim {} exceeds MAX_N_ELIM {}",
n_elim,
MAX_N_ELIM
);
let mut alphas0_buf = [0.0f64; MAX_N_ELIM];
let mut alphas1_buf = [0.0f64; MAX_N_ELIM];
let mut alphas2_buf = [0.0f64; MAX_N_ELIM];
let mut alphas3_buf = [0.0f64; MAX_N_ELIM];
let mut j = j_start;
while j + 3 < nrow {
let alphas0 = &mut alphas0_buf[..n_elim];
let alphas1 = &mut alphas1_buf[..n_elim];
let alphas2 = &mut alphas2_buf[..n_elim];
let alphas3 = &mut alphas3_buf[..n_elim];
let mut all_zero = true;
for q in 0..n_elim {
let q_col = k + q;
let base = q_col * nrow;
let l_jk0 = a[base + j];
let l_jk1 = a[base + j + 1];
let l_jk2 = a[base + j + 2];
let l_jk3 = a[base + j + 3];
let d_q = d_panel[q];
let alpha0 = l_jk0 * d_q;
let alpha1 = l_jk1 * d_q;
let alpha2 = l_jk2 * d_q;
let alpha3 = l_jk3 * d_q;
alphas0[q] = alpha0;
alphas1[q] = alpha1;
alphas2[q] = alpha2;
alphas3[q] = alpha3;
if alpha0 != 0.0 || alpha1 != 0.0 || alpha2 != 0.0 || alpha3 != 0.0 {
all_zero = false;
}
}
if !all_zero {
let (before, rest) = a.split_at_mut(j * nrow);
let (col_j, rest1) = rest.split_at_mut(nrow);
let (col_j1, rest2) = rest1.split_at_mut(nrow);
let (col_j2, col_j3_and_after) = rest2.split_at_mut(nrow);
let dst0 = &mut col_j[j..];
let dst1 = &mut col_j1[(j + 1)..nrow];
let dst2 = &mut col_j2[(j + 2)..nrow];
let dst3 = &mut col_j3_and_after[(j + 3)..nrow];
if fma {
schur_kernel::schur_panel_minus_fma_strided_quad(
dst0, dst1, dst2, dst3, before, k, n_elim, nrow, j, alphas0, alphas1, alphas2,
alphas3,
);
} else {
schur_kernel::schur_panel_minus_nofma_strided_quad(
dst0, dst1, dst2, dst3, before, k, n_elim, nrow, j, alphas0, alphas1, alphas2,
alphas3,
);
}
}
j += 4;
}
if j + 1 < nrow {
let alphas0 = &mut alphas0_buf[..n_elim];
let alphas1 = &mut alphas1_buf[..n_elim];
let mut all_zero = true;
for q in 0..n_elim {
let q_col = k + q;
let l_jk0 = a[q_col * nrow + j];
let l_jk1 = a[q_col * nrow + j + 1];
let d_q = d_panel[q];
let alpha0 = l_jk0 * d_q;
let alpha1 = l_jk1 * d_q;
alphas0[q] = alpha0;
alphas1[q] = alpha1;
if alpha0 != 0.0 || alpha1 != 0.0 {
all_zero = false;
}
}
if !all_zero {
let (before, rest) = a.split_at_mut(j * nrow);
let (col_j, after_j) = rest.split_at_mut(nrow);
let dst0 = &mut col_j[j..];
let dst1 = &mut after_j[(j + 1)..nrow];
if fma {
schur_kernel::schur_panel_minus_fma_strided_dual(
dst0, dst1, before, k, n_elim, nrow, j, alphas0, alphas1,
);
} else {
schur_kernel::schur_panel_minus_nofma_strided_dual(
dst0, dst1, before, k, n_elim, nrow, j, alphas0, alphas1,
);
}
}
j += 2;
}
if j < nrow {
let alphas = &mut alphas0_buf[..n_elim];
let mut all_zero_alpha = true;
for q in 0..n_elim {
let q_col = k + q;
let l_jk = a[q_col * nrow + j];
let d_q = d_panel[q];
let alpha = l_jk * d_q;
alphas[q] = alpha;
if alpha != 0.0 {
all_zero_alpha = false;
}
}
if !all_zero_alpha {
let trailing_len = nrow - j;
let (before, rest) = a.split_at_mut(j * nrow);
let dst = &mut rest[j..nrow];
if fma {
schur_kernel::schur_panel_minus_fma_strided(
dst,
before,
k,
n_elim,
nrow,
j,
trailing_len,
alphas,
);
} else {
schur_kernel::schur_panel_minus_nofma_strided(
dst,
before,
k,
n_elim,
nrow,
j,
trailing_len,
alphas,
);
}
}
}
}
#[inline]
fn capture_maxfromm_col(a: &[f64], n: usize, col: usize) -> Option<f64> {
if col + 1 >= n {
return None;
}
let mut mf = 0.0f64;
for i in (col + 1)..n {
let v = a[col * n + i].abs();
if v > mf {
mf = v;
}
}
Some(mf)
}
#[inline]
#[allow(clippy::too_many_arguments)]
fn finish_1x1_outcome(
outcome: PivotOutcome,
a: &mut [f64],
nrow: usize,
k: usize,
subdiag: &mut [f64],
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
fma: bool,
maxfromm: Option<&mut Option<f64>>,
) -> PivotStepResult {
match outcome {
PivotOutcome::Accepted => {
do_1x1_update(a, nrow, k, fma);
if let Some(slot) = maxfromm {
*slot = capture_maxfromm_col(a, nrow, k + 1);
}
PivotStepResult::Advanced(1)
}
PivotOutcome::Rejected => {
if let Some(slot) = maxfromm {
*slot = None;
}
PivotStepResult::Advanced(1)
}
PivotOutcome::Delayed => {
if let Some(slot) = maxfromm {
*slot = None;
}
PivotStepResult::Delayed
}
PivotOutcome::AcceptedRook2x2 { d11, d21, d22 } => {
let inertia = count_2x2_inertia_val(d11, d21, d22);
*pos += inertia.positive;
*neg += inertia.negative;
*zero += inertia.zero;
subdiag[k] = d21;
do_2x2_update(a, nrow, k, d11, d21, d22, fma);
if let Some(slot) = maxfromm {
*slot = None;
}
PivotStepResult::Advanced(2)
}
}
}
#[allow(clippy::too_many_arguments)]
fn scalar_pivot_step(
a: &mut [f64],
nrow: usize,
ncol: usize,
k: usize,
may_delay: bool,
params: &BunchKaufmanParams,
perm: &mut [usize],
subdiag: &mut [f64],
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_rook_rescues: &mut usize,
n_tiny: &mut usize,
cached_maxfromm: &mut Option<f64>,
) -> Result<PivotStepResult, FeralError> {
let alpha = params.alpha;
let remaining = ncol - k;
let use_maxfromm = matches!(params.tpp_method, TppMethod::Maxfromm);
let cached = cached_maxfromm.take();
if remaining == 1 {
let col_max = if let Some(mf) = cached {
mf
} else {
let mut m = 0.0f64;
for i in (k + 1)..nrow {
let v = a[k * nrow + i].abs();
if v > m {
m = v;
}
}
m
};
let outcome = try_reject_1x1_frontal(
a,
nrow,
k,
col_max,
may_delay,
params,
pos,
neg,
zero,
needs_refinement,
n_tiny,
)?;
match outcome {
PivotOutcome::Accepted => do_1x1_update(a, nrow, k, params.fma),
PivotOutcome::Rejected => {}
PivotOutcome::Delayed => return Ok(PivotStepResult::Delayed),
PivotOutcome::AcceptedRook2x2 { .. } => {
unreachable!("remaining==1 never triggers rook rescue")
}
}
return Ok(PivotStepResult::Advanced(1));
}
if use_maxfromm {
if let Some(mf) = cached {
let akk = a[k * nrow + k].abs();
if akk >= alpha * mf {
let outcome = try_reject_1x1_with_rook_rescue(
a,
nrow,
ncol,
k,
mf,
may_delay,
params,
perm,
pos,
neg,
zero,
needs_refinement,
n_rook_rescues,
n_tiny,
)?;
return Ok(finish_1x1_outcome(
outcome,
a,
nrow,
k,
subdiag,
pos,
neg,
zero,
params.fma,
Some(cached_maxfromm),
));
}
}
}
let (gamma0, r) = {
let mut max_val = 0.0f64;
let mut max_row = k + 1;
for i in (k + 1)..ncol {
let v = a[k * nrow + i].abs();
if v > max_val {
max_val = v;
max_row = i;
}
}
for i in ncol..nrow {
let v = a[k * nrow + i].abs();
if v > max_val {
max_val = v;
max_row = i;
}
}
(max_val, max_row)
};
if gamma0 == 0.0 {
count_1x1_inertia(a, nrow, k, params, pos, neg, zero, needs_refinement, n_tiny)?;
set_l_column_identity(a, nrow, k);
return Ok(PivotStepResult::Advanced(1));
}
let akk = a[k * nrow + k].abs();
if akk >= alpha * gamma0 {
let outcome = try_reject_1x1_with_rook_rescue(
a,
nrow,
ncol,
k,
gamma0,
may_delay,
params,
perm,
pos,
neg,
zero,
needs_refinement,
n_rook_rescues,
n_tiny,
)?;
return Ok(finish_1x1_outcome(
outcome,
a,
nrow,
k,
subdiag,
pos,
neg,
zero,
params.fma,
if use_maxfromm {
Some(&mut *cached_maxfromm)
} else {
None
},
));
}
let gamma_r = symmetric_row_offdiag_max(a, nrow, k, r);
let arr = a[r * nrow + r].abs();
let r_is_fully_summed = r < ncol;
if r_is_fully_summed && arr >= alpha * gamma_r {
swap_rows_cols(a, nrow, k, r, perm);
let outcome = try_reject_1x1_with_rook_rescue(
a,
nrow,
ncol,
k,
gamma_r,
may_delay,
params,
perm,
pos,
neg,
zero,
needs_refinement,
n_rook_rescues,
n_tiny,
)?;
return Ok(finish_1x1_outcome(
outcome,
a,
nrow,
k,
subdiag,
pos,
neg,
zero,
params.fma,
if use_maxfromm {
Some(&mut *cached_maxfromm)
} else {
None
},
));
}
if akk * gamma_r >= alpha * gamma0 * gamma0 {
let outcome = try_reject_1x1_with_rook_rescue(
a,
nrow,
ncol,
k,
gamma0,
may_delay,
params,
perm,
pos,
neg,
zero,
needs_refinement,
n_rook_rescues,
n_tiny,
)?;
return Ok(finish_1x1_outcome(
outcome,
a,
nrow,
k,
subdiag,
pos,
neg,
zero,
params.fma,
if use_maxfromm {
Some(&mut *cached_maxfromm)
} else {
None
},
));
}
let partner = if r_is_fully_summed && k + 1 < ncol {
Some(r)
} else if k + 1 < ncol && a[k * nrow + (k + 1)] != 0.0 {
Some(k + 1)
} else {
None
};
if let Some(partner) = partner {
if partner != k + 1 {
swap_rows_cols(a, nrow, k + 1, partner, perm);
}
let mut d11 = a[k * nrow + k];
let d21 = a[k * nrow + (k + 1)];
let mut d22 = a[(k + 1) * nrow + (k + 1)];
if let Some((new_d11, new_d22)) =
perturb_2x2_to_floor(d11, d21, d22, params.static_pivot_floor)
{
d11 = new_d11;
d22 = new_d22;
a[k * nrow + k] = d11;
a[(k + 1) * nrow + (k + 1)] = d22;
*needs_refinement = true;
*n_tiny += 1;
}
let det = d11 * d22 - d21 * d21;
let mut rmax = 0.0f64;
let mut tmax = 0.0f64;
for i in (k + 2)..nrow {
let v0 = a[k * nrow + i].abs();
if v0 > rmax {
rmax = v0;
}
let v1 = a[(k + 1) * nrow + i].abs();
if v1 > tmax {
tmax = v1;
}
}
let amax = d21.abs();
let absdet = det.abs();
let u = params.pivot_threshold;
let growth_fail = (d22.abs() * rmax + amax * tmax) * u > absdet
|| (d11.abs() * tmax + amax * rmax) * u > absdet;
let max_piv = d11.abs().max(d21.abs()).max(d22.abs());
let det_floor_fail = if max_piv < SSIDS_DET_SMALL {
true
} else {
let det_scale = 1.0 / max_piv;
let detpiv0 = (d11 * det_scale) * d22;
let detpiv1 = (d21 * det_scale) * d21;
let detpiv = detpiv0 - detpiv1;
let cancel_floor = SSIDS_DET_SMALL
.max(detpiv0.abs() * 0.5)
.max(detpiv1.abs() * 0.5);
detpiv.abs() < cancel_floor
};
if growth_fail || det_floor_fail {
if may_delay {
if growth_fail && det_floor_fail {
diag_inc(&panel_diag::SCALAR_2X2_DELAY_BOTH);
} else if growth_fail {
diag_inc(&panel_diag::SCALAR_2X2_DELAY_GROWTH);
} else {
diag_inc(&panel_diag::SCALAR_2X2_DELAY_DET);
}
if det < 0.0 {
diag_inc(&panel_diag::SCALAR_2X2_DELAY_NEGDET);
}
return Ok(PivotStepResult::Delayed);
}
if det_floor_fail {
match params.on_zero_pivot {
ZeroPivotAction::Fail => {
return Err(FeralError::NumericallyRankDeficient);
}
ZeroPivotAction::ForceAccept | ZeroPivotAction::PerturbToEps { .. } => {
*needs_refinement = true;
}
}
}
let outcome = try_reject_1x1_with_rook_rescue(
a,
nrow,
ncol,
k,
gamma0,
may_delay,
params,
perm,
pos,
neg,
zero,
needs_refinement,
n_rook_rescues,
n_tiny,
)?;
return Ok(finish_1x1_outcome(
outcome,
a,
nrow,
k,
subdiag,
pos,
neg,
zero,
params.fma,
if use_maxfromm {
Some(&mut *cached_maxfromm)
} else {
None
},
));
}
let pivot_inertia = count_2x2_inertia_val(d11, d21, d22);
*pos += pivot_inertia.positive;
*neg += pivot_inertia.negative;
*zero += pivot_inertia.zero;
subdiag[k] = d21;
do_2x2_update(a, nrow, k, d11, d21, d22, params.fma);
Ok(PivotStepResult::Advanced(2))
} else {
let outcome = try_reject_1x1_with_rook_rescue(
a,
nrow,
ncol,
k,
gamma0,
may_delay,
params,
perm,
pos,
neg,
zero,
needs_refinement,
n_rook_rescues,
n_tiny,
)?;
Ok(finish_1x1_outcome(
outcome,
a,
nrow,
k,
subdiag,
pos,
neg,
zero,
params.fma,
if use_maxfromm {
Some(&mut *cached_maxfromm)
} else {
None
},
))
}
}
#[allow(clippy::too_many_arguments)]
fn try_reject_1x1_frontal(
a: &mut [f64],
nrow: usize,
k: usize,
col_max: f64,
may_delay: bool,
params: &BunchKaufmanParams,
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_tiny: &mut usize,
) -> Result<PivotOutcome, FeralError> {
let d = a[k * nrow + k];
if params.static_pivot_floor > 0.0 && d.abs() < params.static_pivot_floor {
let d_new = perturb_to_floor(d, params.static_pivot_floor);
a[k * nrow + k] = d_new;
*needs_refinement = true;
*n_tiny += 1;
if d_new > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
return Ok(PivotOutcome::Accepted);
}
let threshold = (params.pivot_threshold * col_max).max(params.null_pivot_tol);
if d.abs() <= threshold {
if may_delay {
diag_inc(&panel_diag::SCALAR_1X1_DELAY);
if d.abs() <= params.zero_tol {
diag_inc(&panel_diag::SCALAR_1X1_DELAY_TINY);
}
return Ok(PivotOutcome::Delayed);
}
if d.abs() <= params.zero_tol {
match params.on_zero_pivot {
ZeroPivotAction::ForceAccept => {
*needs_refinement = true;
*zero += 1;
for i in (k + 1)..nrow {
a[k * nrow + i] = 0.0;
}
a[k * nrow + k] = 0.0;
return Ok(PivotOutcome::Rejected);
}
ZeroPivotAction::Fail => return Err(FeralError::NumericallyRankDeficient),
ZeroPivotAction::PerturbToEps { abs_floor } => {
let d_new = perturb_to_floor(d, abs_floor);
a[k * nrow + k] = d_new;
*needs_refinement = true;
*n_tiny += 1;
if d_new > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
return Ok(PivotOutcome::Accepted);
}
}
}
if matches!(params.on_zero_pivot, ZeroPivotAction::ForceAccept)
&& d.abs() <= params.null_pivot_tol
{
*needs_refinement = true;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
return Ok(PivotOutcome::Accepted);
}
*needs_refinement = true;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
return Ok(PivotOutcome::Accepted);
}
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
Ok(PivotOutcome::Accepted)
}
#[allow(clippy::too_many_arguments)]
fn try_reject_1x1_with_rook_rescue(
a: &mut [f64],
nrow: usize,
ncol: usize,
k: usize,
col_max: f64,
may_delay: bool,
params: &BunchKaufmanParams,
perm: &mut [usize],
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_rook_rescues: &mut usize,
n_tiny: &mut usize,
) -> Result<PivotOutcome, FeralError> {
let d = a[k * nrow + k];
let threshold = (params.pivot_threshold * col_max).max(params.null_pivot_tol);
if d.abs() > threshold {
return try_reject_1x1_frontal(
a,
nrow,
k,
col_max,
may_delay,
params,
pos,
neg,
zero,
needs_refinement,
n_tiny,
);
}
if let Some(pivot) = rook_rescue(a, nrow, ncol, k, params) {
*n_rook_rescues += 1;
for idx in 0..pivot.n_swaps {
let (p, q) = pivot.swaps[idx];
swap_rows_cols(a, nrow, p, q, perm);
}
match pivot.kind {
RookKind::Pivot1x1 => {
let d_new = a[k * nrow + k];
if d_new > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
return Ok(PivotOutcome::Accepted);
}
RookKind::Pivot2x2 => {
let d11 = a[k * nrow + k];
let d21 = a[k * nrow + (k + 1)];
let d22 = a[(k + 1) * nrow + (k + 1)];
return Ok(PivotOutcome::AcceptedRook2x2 { d11, d21, d22 });
}
}
}
try_reject_1x1_frontal(
a,
nrow,
k,
col_max,
may_delay,
params,
pos,
neg,
zero,
needs_refinement,
n_tiny,
)
}
pub(crate) fn do_1x1_update(a: &mut [f64], n: usize, k: usize, fma: bool) {
if n == crate::dense::block_ldlt32::BLOCK_SIZE {
crate::dense::block_ldlt32::update_1x1_block32(a, k, fma);
return;
}
let d = a[k * n + k];
if d.abs() == 0.0 {
return;
}
let inv_d = 1.0 / d;
for i in (k + 1)..n {
a[k * n + i] *= inv_d;
}
for j in (k + 1)..n {
let l_jk = a[k * n + j];
let alpha = l_jk * d;
let (before, rest) = a.split_at_mut(j * n);
let src = &before[k * n + j..k * n + n];
let dst = &mut rest[j..n];
if fma {
schur_kernel::axpy_minus_unroll4(dst, src, alpha);
} else {
schur_kernel::axpy_minus_unroll4_nofma(dst, src, alpha);
}
}
}
pub(crate) fn do_2x2_update(
a: &mut [f64],
n: usize,
k: usize,
d11: f64,
d21: f64,
d22: f64,
fma: bool,
) {
if n == crate::dense::block_ldlt32::BLOCK_SIZE {
crate::dense::block_ldlt32::update_2x2_block32(a, k, d11, d21, d22, fma);
return;
}
let det = d11 * d22 - d21 * d21;
if det.abs() == 0.0 {
return;
}
let inv_det = 1.0 / det;
for i in (k + 2)..n {
let a_ik = a[k * n + i];
let a_ik1 = a[(k + 1) * n + i];
a[k * n + i] = (d22 * a_ik - d21 * a_ik1) * inv_det;
a[(k + 1) * n + i] = (d11 * a_ik1 - d21 * a_ik) * inv_det;
}
for j in (k + 2)..n {
let l_j0 = a[k * n + j];
let l_j1 = a[(k + 1) * n + j];
let dl_j0 = d11 * l_j0 + d21 * l_j1;
let dl_j1 = d21 * l_j0 + d22 * l_j1;
let (before, rest) = a.split_at_mut(j * n);
let src0 = &before[k * n + j..k * n + n];
let src1 = &before[(k + 1) * n + j..(k + 1) * n + n];
let dst = &mut rest[j..n];
if fma {
schur_kernel::axpy2_minus_unroll4(dst, src0, dl_j0, src1, dl_j1);
} else {
schur_kernel::axpy2_minus_unroll4_nofma(dst, src0, dl_j0, src1, dl_j1);
}
}
}
fn count_2x2_inertia_val(d11: f64, d21: f64, d22: f64) -> Inertia {
classify_2x2_inertia(d11, d21, d22)
}
#[inline]
fn det_sym2x2(d11: f64, d21: f64, d22: f64) -> f64 {
let w = d21 * d21;
let e = d21.mul_add(d21, -w); let f = d11.mul_add(d22, -w); f + e
}
#[inline]
fn classify_2x2_inertia(d11: f64, d21: f64, d22: f64) -> Inertia {
let det = det_sym2x2(d11, d21, d22);
let tr = d11 + d22;
if det < 0.0 {
Inertia::new(1, 1, 0)
} else if det > 0.0 {
if tr > 0.0 {
Inertia::new(2, 0, 0)
} else {
Inertia::new(0, 2, 0)
}
} else {
if tr > 0.0 {
Inertia::new(1, 0, 1)
} else if tr < 0.0 {
Inertia::new(0, 1, 1)
} else {
Inertia::new(0, 0, 2)
}
}
}
#[inline]
fn sym2_eigenvalues(d11: f64, d21: f64, d22: f64) -> (f64, f64) {
let tr = d11 + d22;
let diff = d11 - d22;
let disc = (diff * diff + (2.0 * d21) * (2.0 * d21)).max(0.0);
let s = disc.sqrt();
let lam1 = 0.5 * (tr + s);
let lam2 = 0.5 * (tr - s);
(lam1, lam2)
}
fn column_offdiag_max(a: &[f64], n: usize, k: usize) -> (f64, usize) {
let mut max_val = 0.0;
let mut max_idx = k + 1;
for i in (k + 1)..n {
let val = a[k * n + i].abs();
if val > max_val {
max_val = val;
max_idx = i;
}
}
(max_val, max_idx)
}
fn symmetric_row_offdiag_max(a: &[f64], n: usize, k: usize, r: usize) -> f64 {
let mut max_val = 0.0;
for i in (r + 1)..n {
let val = a[r * n + i].abs();
if val > max_val {
max_val = val;
}
}
for j in k..r {
let val = a[j * n + r].abs();
if val > max_val {
max_val = val;
}
}
max_val
}
#[inline]
fn delay_swap_to_boundary(
a: &mut [f64],
nrow: usize,
k: usize,
ncol_eff: &mut usize,
perm: &mut [usize],
) {
debug_assert!(
k < *ncol_eff,
"stuck column {k} must lie within the eligible range [k, {})",
*ncol_eff,
);
*ncol_eff -= 1;
swap_rows_cols(a, nrow, k, *ncol_eff, perm);
}
fn swap_rows_cols(a: &mut [f64], n: usize, p: usize, q: usize, perm: &mut [usize]) {
if p == q {
return;
}
let (p, q) = if p < q { (p, q) } else { (q, p) };
perm.swap(p, q);
a.swap(p * n + p, q * n + q);
for i in (q + 1)..n {
a.swap(p * n + i, q * n + i);
}
for i in (p + 1)..q {
a.swap(p * n + i, i * n + q);
}
for j in 0..p {
a.swap(j * n + p, j * n + q);
}
}
#[allow(clippy::too_many_arguments)]
fn do_1x1_pivot(
a: &mut [f64],
n: usize,
k: usize,
col_max: f64,
params: &BunchKaufmanParams,
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_tiny: &mut usize,
) -> Result<(f64, usize), FeralError> {
let mut d = a[k * n + k];
if params.static_pivot_floor > 0.0 && d.abs() < params.static_pivot_floor {
d = perturb_to_floor(d, params.static_pivot_floor);
a[k * n + k] = d;
*needs_refinement = true;
*n_tiny += 1;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
let d_inv = 1.0 / d;
for i in (k + 1)..n {
a[k * n + i] *= d_inv;
}
let mut next_gamma0 = 0.0;
let mut next_r = k + 2;
if k + 1 < n {
let j = k + 1;
let l_jk = a[k * n + j];
let l_jk_d = l_jk * d;
a[j * n + j] -= a[k * n + j] * l_jk_d;
for i in (j + 1)..n {
a[j * n + i] -= a[k * n + i] * l_jk_d;
let val = a[j * n + i].abs();
if val > next_gamma0 {
next_gamma0 = val;
next_r = i;
}
}
}
for j in (k + 2)..n {
let l_jk = a[k * n + j];
let l_jk_d = l_jk * d;
for i in j..n {
a[j * n + i] -= a[k * n + i] * l_jk_d;
}
}
return Ok((next_gamma0, next_r));
}
let threshold = (params.pivot_threshold * col_max).max(params.null_pivot_tol);
if d.abs() <= threshold {
if d.abs() <= params.zero_tol {
match params.on_zero_pivot {
ZeroPivotAction::ForceAccept => {
*needs_refinement = true;
*zero += 1;
for i in (k + 1)..n {
a[k * n + i] = 0.0;
}
a[k * n + k] = 0.0;
return Ok((0.0, k + 2));
}
ZeroPivotAction::Fail => return Err(FeralError::NumericallyRankDeficient),
ZeroPivotAction::PerturbToEps { abs_floor } => {
d = perturb_to_floor(d, abs_floor);
a[k * n + k] = d;
*needs_refinement = true;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
}
}
} else if matches!(params.on_zero_pivot, ZeroPivotAction::ForceAccept)
&& d.abs() <= params.null_pivot_tol
{
*needs_refinement = true;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
} else {
*needs_refinement = true;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
}
} else {
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
}
let d_inv = 1.0 / d;
for i in (k + 1)..n {
a[k * n + i] *= d_inv;
}
let mut next_gamma0 = 0.0;
let mut next_r = k + 2;
if k + 1 < n {
let j = k + 1;
let l_jk = a[k * n + j];
let l_jk_d = l_jk * d;
a[j * n + j] -= a[k * n + j] * l_jk_d;
for i in (j + 1)..n {
a[j * n + i] -= a[k * n + i] * l_jk_d;
let val = a[j * n + i].abs();
if val > next_gamma0 {
next_gamma0 = val;
next_r = i;
}
}
}
for j in (k + 2)..n {
let l_jk = a[k * n + j];
let l_jk_d = l_jk * d;
for i in j..n {
a[j * n + i] -= a[k * n + i] * l_jk_d;
}
}
Ok((next_gamma0, next_r))
}
#[allow(clippy::too_many_arguments)]
fn do_2x2_pivot(
a: &mut [f64],
n: usize,
k: usize,
subdiag: &mut [f64],
params: &BunchKaufmanParams,
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_tiny: &mut usize,
) -> Result<(f64, usize), FeralError> {
let mut a00 = a[k * n + k];
let a10 = a[k * n + (k + 1)];
let mut a11 = a[(k + 1) * n + (k + 1)];
if let Some((new_a00, new_a11)) = perturb_2x2_to_floor(a00, a10, a11, params.static_pivot_floor)
{
a00 = new_a00;
a11 = new_a11;
a[k * n + k] = a00;
a[(k + 1) * n + (k + 1)] = a11;
*needs_refinement = true;
*n_tiny += 1;
}
subdiag[k] = a10;
let det = a00 * a11 - a10 * a10;
count_2x2_inertia(
det,
a00,
a10,
a11,
params,
pos,
neg,
zero,
needs_refinement,
n_tiny,
)?;
if (k + 2) >= n {
return Ok((0.0, 0));
}
let d10_abs = a10.abs();
if d10_abs < f64::EPSILON * 1e-10 {
for i in (k + 2)..n {
a[k * n + i] = 0.0;
a[(k + 1) * n + i] = 0.0;
}
return Ok((0.0, k + 3));
}
let d00 = a00 / d10_abs;
let d11 = a11 / d10_abs;
let t = 1.0 / (d00 * d11 - 1.0);
let d10 = a10 / d10_abs; let d = t / d10_abs;
let mut next_gamma0 = 0.0;
let mut next_r = k + 3;
if k + 2 < n {
let j = k + 2;
let x0 = a[k * n + j];
let x1 = a[(k + 1) * n + j];
let w0 = (x0 * d11 - x1 * d10) * d;
let w1 = (x1 * d00 - x0 * d10) * d;
a[j * n + j] -= a[k * n + j] * w0 + a[(k + 1) * n + j] * w1;
for i in (j + 1)..n {
a[j * n + i] -= a[k * n + i] * w0 + a[(k + 1) * n + i] * w1;
let val = a[j * n + i].abs();
if val > next_gamma0 {
next_gamma0 = val;
next_r = i;
}
}
a[k * n + j] = w0;
a[(k + 1) * n + j] = w1;
}
for j in (k + 3)..n {
let x0 = a[k * n + j];
let x1 = a[(k + 1) * n + j];
let w0 = (x0 * d11 - x1 * d10) * d;
let w1 = (x1 * d00 - x0 * d10) * d;
for i in j..n {
a[j * n + i] -= a[k * n + i] * w0 + a[(k + 1) * n + i] * w1;
}
a[k * n + j] = w0;
a[(k + 1) * n + j] = w1;
}
Ok((next_gamma0, next_r))
}
#[allow(clippy::too_many_arguments)]
fn count_1x1_inertia(
a: &mut [f64],
stride: usize,
k: usize,
params: &BunchKaufmanParams,
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
n_tiny: &mut usize,
) -> Result<(), FeralError> {
let d = a[k * stride + k];
if params.static_pivot_floor > 0.0 && d.abs() < params.static_pivot_floor {
let d_new = perturb_to_floor(d, params.static_pivot_floor);
a[k * stride + k] = d_new;
*needs_refinement = true;
*n_tiny += 1;
if d_new > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
return Ok(());
}
if d.abs() <= params.zero_tol {
match params.on_zero_pivot {
ZeroPivotAction::ForceAccept => {
*needs_refinement = true;
*zero += 1;
Ok(())
}
ZeroPivotAction::Fail => Err(FeralError::NumericallyRankDeficient),
ZeroPivotAction::PerturbToEps { abs_floor } => {
let d_new = perturb_to_floor(d, abs_floor);
a[k * stride + k] = d_new;
*needs_refinement = true;
*n_tiny += 1;
if d_new > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
Ok(())
}
}
} else if matches!(params.on_zero_pivot, ZeroPivotAction::ForceAccept)
&& d.abs() <= params.null_pivot_tol
{
*needs_refinement = true;
if d > 0.0 {
*pos += 1;
} else {
*neg += 1;
}
Ok(())
} else if d > 0.0 {
*pos += 1;
Ok(())
} else {
*neg += 1;
Ok(())
}
}
#[allow(clippy::too_many_arguments)]
fn count_2x2_inertia(
det: f64,
a00: f64,
a10: f64,
a11: f64,
params: &BunchKaufmanParams,
pos: &mut usize,
neg: &mut usize,
zero: &mut usize,
needs_refinement: &mut bool,
_n_tiny: &mut usize,
) -> Result<(), FeralError> {
if det.abs() <= params.zero_tol_2x2 {
match params.on_zero_pivot {
ZeroPivotAction::ForceAccept | ZeroPivotAction::PerturbToEps { .. } => {
*needs_refinement = true;
let inertia = classify_2x2_inertia(a00, a10, a11);
*pos += inertia.positive;
*neg += inertia.negative + inertia.zero;
Ok(())
}
ZeroPivotAction::Fail => Err(FeralError::NumericallyRankDeficient),
}
} else if matches!(params.on_zero_pivot, ZeroPivotAction::ForceAccept)
&& det.abs() <= params.null_pivot_tol_2x2
{
*needs_refinement = true;
let inertia = classify_2x2_inertia(a00, a10, a11);
*pos += inertia.positive;
*neg += inertia.negative + inertia.zero;
Ok(())
} else {
let inertia = classify_2x2_inertia(a00, a10, a11);
let _ = det; *pos += inertia.positive;
*neg += inertia.negative;
*zero += inertia.zero;
Ok(())
}
}
fn set_l_column_identity(a: &mut [f64], n: usize, k: usize) {
for i in (k + 1)..n {
a[k * n + i] = 0.0;
}
}
#[cfg(test)]
mod growth_flag_tests {
use super::*;
#[test]
fn growth_below_threshold_does_not_flag() {
let l = vec![1.0, 2.78, -2.5, 0.0, 100.0, -999_999.0];
let mut flag = false;
flag_growth_for_refinement(&l, &mut flag);
assert!(!flag, "max|L| = 999_999 < 1e6 should not flag");
}
#[test]
fn growth_above_threshold_flags() {
let l = vec![1.0, 2.0, 1.5e6, -3.0];
let mut flag = false;
flag_growth_for_refinement(&l, &mut flag);
assert!(flag, "max|L| = 1.5e6 > 1e6 must flag");
}
#[test]
fn catastrophic_growth_flags() {
let l = vec![1.0, 1.0, 8.06e16, 1.0];
let mut flag = false;
flag_growth_for_refinement(&l, &mut flag);
assert!(flag, "max|L| = 8e16 (bratu3d-class) must flag");
}
#[test]
fn negative_large_entry_flags() {
let l = vec![-2e10, 1.0];
let mut flag = false;
flag_growth_for_refinement(&l, &mut flag);
assert!(flag, "negative large |L| must flag");
}
#[test]
fn already_set_flag_is_preserved() {
let l = vec![0.0, 0.0]; let mut flag = true; flag_growth_for_refinement(&l, &mut flag);
assert!(flag, "must not clobber pre-set flag");
}
#[test]
fn empty_l_does_not_flag() {
let l: Vec<f64> = vec![];
let mut flag = false;
flag_growth_for_refinement(&l, &mut flag);
assert!(!flag);
}
#[test]
fn nan_and_inf_in_l_flag() {
let l_inf = vec![1.0, f64::INFINITY];
let mut flag = false;
flag_growth_for_refinement(&l_inf, &mut flag);
assert!(flag, "Inf entry must trigger");
}
}
#[cfg(test)]
mod sym2_inertia_tests {
use super::*;
fn close(a: f64, b: f64, tol: f64) -> bool {
(a - b).abs() <= tol * (1.0 + a.abs().max(b.abs()))
}
#[test]
fn sym2_eigs_diagonal_block() {
let (l1, l2) = sym2_eigenvalues(3.0, 0.0, 5.0);
assert!(close(l1, 5.0, 1e-15));
assert!(close(l2, 3.0, 1e-15));
}
#[test]
fn sym2_eigs_pure_off_diagonal() {
let (l1, l2) = sym2_eigenvalues(0.0, 1.0, 0.0);
assert!(close(l1, 1.0, 1e-15));
assert!(close(l2, -1.0, 1e-15));
}
#[test]
fn sym2_eigs_known_block() {
let (l1, l2) = sym2_eigenvalues(2.0, 1.0, 2.0);
assert!(close(l1, 3.0, 1e-15));
assert!(close(l2, 1.0, 1e-15));
}
#[test]
fn sym2_eigs_borderline_positive_definite_block() {
let (l1, l2) = sym2_eigenvalues(1.0 + 1e-12, 1.0, 1.0 + 1e-12);
assert!(l1 > 0.0, "max eigenvalue must be positive; got {l1:e}");
assert!(
l2 > 0.0,
"min eigenvalue of a (+,+) borderline block must remain positive under round-off; got {l2:e}"
);
}
#[test]
fn sym2_eigs_negative_definite_block() {
let (l1, l2) = sym2_eigenvalues(-(1.0 + 1e-12), 1.0, -(1.0 + 1e-12));
assert!(l1 < 0.0, "max eigenvalue must be negative; got {l1:e}");
assert!(l2 < 0.0, "min eigenvalue must be negative; got {l2:e}");
}
#[test]
fn count_2x2_inertia_val_positive_definite() {
let inertia = count_2x2_inertia_val(2.0, 1.0, 2.0);
assert_eq!(inertia.positive, 2);
assert_eq!(inertia.negative, 0);
assert_eq!(inertia.zero, 0);
}
#[test]
fn count_2x2_inertia_val_negative_definite() {
let inertia = count_2x2_inertia_val(-2.0, 1.0, -2.0);
assert_eq!(inertia.positive, 0);
assert_eq!(inertia.negative, 2);
assert_eq!(inertia.zero, 0);
}
#[test]
fn count_2x2_inertia_val_indefinite() {
let inertia = count_2x2_inertia_val(1.0, 2.0, 1.0);
assert_eq!(inertia.positive, 1);
assert_eq!(inertia.negative, 1);
assert_eq!(inertia.zero, 0);
}
#[test]
fn count_2x2_inertia_val_borderline_pd_does_not_misclassify() {
let a = 1.0 + f64::EPSILON;
let inertia = count_2x2_inertia_val(a, 1.0, a);
assert_eq!(inertia.negative, 0, "must not over-report negatives");
}
#[test]
fn count_2x2_inertia_val_cancellation_does_not_flip_sign() {
let a = 1.0;
let c = 1.0 + 4.0 * f64::EPSILON;
let b = (1.0 - f64::EPSILON).sqrt(); let inertia = count_2x2_inertia_val(a, b, c);
assert_eq!(inertia.negative, 0, "must not over-report negatives");
}
#[test]
fn count_2x2_inertia_val_diagonal_tiny_huge_positive() {
let inertia = count_2x2_inertia_val(1e-30, 0.0, 1e30);
assert_eq!(inertia, Inertia::new(2, 0, 0), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_diagonal_tiny_neg_huge_pos() {
let inertia = count_2x2_inertia_val(-1e-30, 0.0, 1e30);
assert_eq!(inertia, Inertia::new(1, 1, 0), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_diagonal_both_negative() {
let inertia = count_2x2_inertia_val(-1e-30, 0.0, -1e30);
assert_eq!(inertia, Inertia::new(0, 2, 0), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_genuine_singular_positive() {
let inertia = count_2x2_inertia_val(0.0, 0.0, 5.0);
assert_eq!(inertia, Inertia::new(1, 0, 1), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_genuine_singular_negative() {
let inertia = count_2x2_inertia_val(0.0, 0.0, -5.0);
assert_eq!(inertia, Inertia::new(0, 1, 1), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_genuine_double_zero() {
let inertia = count_2x2_inertia_val(0.0, 0.0, 0.0);
assert_eq!(inertia, Inertia::new(0, 0, 2), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_off_diagonal_straddle() {
let inertia = count_2x2_inertia_val(0.0, 1.0, 0.0);
assert_eq!(inertia, Inertia::new(1, 1, 0), "got {inertia}");
}
#[test]
fn count_2x2_inertia_val_well_separated_pd() {
let inertia = count_2x2_inertia_val(2.0, 1.0, 2.0);
assert_eq!(inertia, Inertia::new(2, 0, 0), "got {inertia}");
}
#[test]
fn count_2x2_inertia_nonsingular_branch_no_spurious_zero() {
let params = BunchKaufmanParams::default();
let det = 1e-30 * 1e30 - 0.0; let mut pos = 0;
let mut neg = 0;
let mut zero = 0;
let mut needs_refinement = false;
let mut n_tiny = 0usize;
count_2x2_inertia(
det,
1e-30,
0.0,
1e30,
¶ms,
&mut pos,
&mut neg,
&mut zero,
&mut needs_refinement,
&mut n_tiny,
)
.expect("non-singular 2×2 must classify without error");
assert_eq!((pos, neg, zero), (2, 0, 0), "spurious zero in else branch");
}
}
#[cfg(test)]
mod static_pivot_tests {
use super::*;
#[test]
fn perturb_2x2_helper_pushes_small_eigenvalue() {
let (a00, a10, a11) = (1.0, 0.0, -1e-10);
let floor = 1e-6;
let (new_a00, new_a11) = perturb_2x2_to_floor(a00, a10, a11, floor).expect("must perturb");
let (l1, l2) = sym2_eigenvalues(new_a00, a10, new_a11);
let lmin = if l1.abs() < l2.abs() { l1 } else { l2 };
assert!(
(lmin.abs() - floor).abs() < 1e-12,
"smaller |eig| should equal floor; got {lmin:e}"
);
assert!(lmin < 0.0, "sign of λ_min must stay negative; got {lmin:e}");
}
#[test]
fn perturb_2x2_helper_preserves_positive_sign() {
let (a00, a10, a11) = (-1.0, 0.0, 1e-10);
let floor = 1e-6;
let (new_a00, new_a11) = perturb_2x2_to_floor(a00, a10, a11, floor).expect("must perturb");
let (l1, l2) = sym2_eigenvalues(new_a00, a10, new_a11);
let lmin = if l1.abs() < l2.abs() { l1 } else { l2 };
assert!(lmin > 0.0, "sign of λ_min must stay positive; got {lmin:e}");
assert!((lmin - floor).abs() < 1e-12);
}
#[test]
fn perturb_2x2_helper_no_op_above_floor() {
let (a00, a10, a11) = (2.0, 0.0, -1.0);
assert!(perturb_2x2_to_floor(a00, a10, a11, 0.1).is_none());
}
#[test]
fn perturb_2x2_helper_disabled_when_floor_zero() {
let (a00, a10, a11) = (1e-20, 0.0, 1e-20);
assert!(perturb_2x2_to_floor(a00, a10, a11, 0.0).is_none());
}
#[test]
fn perturb_2x2_helper_handles_zero_small_eigenvalue() {
let (a00, a10, a11) = (1.0, 0.0, 0.0);
let floor = 1e-6;
let (new_a00, new_a11) = perturb_2x2_to_floor(a00, a10, a11, floor).expect("must perturb");
let (l1, l2) = sym2_eigenvalues(new_a00, a10, new_a11);
let lmin = if l1.abs() < l2.abs() { l1 } else { l2 };
assert!(lmin > 0.0, "λ_small pushed toward sign(λ_other)");
assert!((lmin - floor).abs() < 1e-12);
}
}