use faer::Par;
use faer::linalg::matmul::triangular::{self as tri_matmul, BlockStructure};
use faer::linalg::triangular_solve;
use faer::perm::Perm;
use faer::prelude::*;
use faer::{Accum, Conj, Mat, MatMut, MatRef};
use super::diagonal::MixedDiagonal;
use super::pivot::{Block2x2, PivotType};
use crate::error::SparseError;
use crate::ordering::perm_from_forward;
const BUNCH_KAUFMAN_ALPHA: f64 = 0.5;
#[cfg(test)]
const COMPLETE_PIVOTING_GROWTH_BOUND: f64 = 4.0;
#[derive(Debug, Clone)]
pub struct AptpOptions {
pub threshold: f64,
pub small: f64,
pub fallback: AptpFallback,
pub outer_block_size: usize,
pub inner_block_size: usize,
pub failed_pivot_method: FailedPivotMethod,
pub par: Par,
pub nemin: usize,
pub small_leaf_threshold: usize,
}
impl Default for AptpOptions {
fn default() -> Self {
Self {
threshold: 0.01,
small: 1e-20,
fallback: AptpFallback::BunchKaufman,
outer_block_size: 256,
inner_block_size: 32,
failed_pivot_method: FailedPivotMethod::Tpp,
par: Par::Seq,
nemin: 32,
small_leaf_threshold: 256,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AptpFallback {
BunchKaufman,
Delay,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FailedPivotMethod {
Tpp,
Pass,
}
pub(crate) struct AptpKernelWorkspace {
backup: Mat<f64>,
l11_buf: Mat<f64>,
ld_buf: Mat<f64>,
copy_buf: Mat<f64>,
}
impl AptpKernelWorkspace {
pub(crate) fn new(max_front: usize, inner_block_size: usize) -> Self {
Self {
backup: Mat::zeros(max_front, inner_block_size),
l11_buf: Mat::zeros(inner_block_size, inner_block_size),
ld_buf: Mat::zeros(max_front, inner_block_size),
copy_buf: Mat::zeros(max_front, inner_block_size),
}
}
}
#[derive(Debug)]
pub struct AptpFactorResult {
pub d: MixedDiagonal,
pub perm: Vec<usize>,
pub num_eliminated: usize,
pub delayed_cols: Vec<usize>,
pub stats: AptpStatistics,
pub pivot_log: Vec<AptpPivotRecord>,
}
#[derive(Debug)]
pub struct AptpFactorization {
pub l: Mat<f64>,
pub d: MixedDiagonal,
pub perm: Perm<usize>,
pub delayed_cols: Vec<usize>,
pub stats: AptpStatistics,
pub pivot_log: Vec<AptpPivotRecord>,
}
#[derive(Debug, Clone, Default)]
pub struct AptpStatistics {
pub num_1x1: usize,
pub num_2x2: usize,
pub num_delayed: usize,
pub max_l_entry: f64,
}
#[derive(Debug, Clone)]
pub struct AptpPivotRecord {
pub col: usize,
pub pivot_type: PivotType,
pub max_l_entry: f64,
pub was_fallback: bool,
}
pub fn aptp_factor_in_place(
mut a: MatMut<'_, f64>,
num_fully_summed: usize,
options: &AptpOptions,
) -> Result<AptpFactorResult, SparseError> {
let m = a.nrows();
if a.ncols() != m {
return Err(SparseError::InvalidInput {
reason: format!("matrix must be square, got {}x{}", m, a.ncols()),
});
}
if num_fully_summed > m {
return Err(SparseError::InvalidInput {
reason: format!(
"num_fully_summed ({}) > matrix dimension ({})",
num_fully_summed, m
),
});
}
if options.threshold <= 0.0 || options.threshold > 1.0 {
return Err(SparseError::InvalidInput {
reason: format!("threshold must be in (0, 1], got {}", options.threshold),
});
}
if options.small < 0.0 {
return Err(SparseError::InvalidInput {
reason: format!("small must be >= 0.0, got {}", options.small),
});
}
if options.outer_block_size == 0 {
return Err(SparseError::InvalidInput {
reason: "outer_block_size must be > 0".to_string(),
});
}
if options.inner_block_size == 0 {
return Err(SparseError::InvalidInput {
reason: "inner_block_size must be > 0".to_string(),
});
}
if options.inner_block_size > options.outer_block_size {
return Err(SparseError::InvalidInput {
reason: format!(
"inner_block_size ({}) must be <= outer_block_size ({})",
options.inner_block_size, options.outer_block_size
),
});
}
let mut kernel_ws = AptpKernelWorkspace::new(m, options.inner_block_size);
let mut result = if num_fully_summed < options.inner_block_size {
tpp_factor_rect(a.rb_mut(), num_fully_summed, options)?
} else if num_fully_summed > options.outer_block_size {
two_level_factor(a.rb_mut(), num_fully_summed, options, &mut kernel_ws)?
} else {
factor_inner(
a.rb_mut(),
num_fully_summed,
num_fully_summed,
options,
&mut kernel_ws,
)?
};
if result.num_eliminated < num_fully_summed
&& options.failed_pivot_method == FailedPivotMethod::Tpp
{
let q = result.num_eliminated;
result.d.grow(num_fully_summed);
let additional = tpp_factor(
a.rb_mut(),
q,
num_fully_summed,
&mut result.perm,
&mut result.d,
&mut result.stats,
&mut result.pivot_log,
options,
);
result.num_eliminated = q + additional;
result.d.truncate(result.num_eliminated);
result.delayed_cols = (result.num_eliminated..num_fully_summed)
.map(|i| result.perm[i])
.collect();
result.stats.num_delayed = num_fully_summed - result.num_eliminated;
}
Ok(result)
}
pub fn aptp_factor(
a: MatRef<'_, f64>,
options: &AptpOptions,
) -> Result<AptpFactorization, SparseError> {
let n = a.nrows();
if a.ncols() != n {
return Err(SparseError::InvalidInput {
reason: format!("matrix must be square, got {}x{}", n, a.ncols()),
});
}
let mut a_copy = a.to_owned();
let result = aptp_factor_in_place(a_copy.as_mut(), n, options)?;
let l = extract_l(a_copy.as_ref(), &result.d, result.num_eliminated);
let perm = perm_from_forward(result.perm.clone())?;
Ok(AptpFactorization {
l,
d: result.d,
perm,
delayed_cols: result.delayed_cols,
stats: result.stats,
pivot_log: result.pivot_log,
})
}
fn swap_symmetric(mut a: MatMut<'_, f64>, i: usize, j: usize) {
let m = a.nrows();
swap_symmetric_block(a.rb_mut(), i, j, 0, m);
}
fn swap_symmetric_block(
mut a: MatMut<'_, f64>,
i: usize,
j: usize,
col_start: usize,
row_limit: usize,
) {
if i == j {
return;
}
let (i, j) = if i < j { (i, j) } else { (j, i) };
let tmp = a[(i, i)];
a[(i, i)] = a[(j, j)];
a[(j, j)] = tmp;
for k in col_start..i {
let tmp = a[(i, k)];
a[(i, k)] = a[(j, k)];
a[(j, k)] = tmp;
}
for k in (i + 1)..j {
let tmp = a[(k, i)];
a[(k, i)] = a[(j, k)];
a[(j, k)] = tmp;
}
for k in (j + 1)..row_limit {
let tmp = a[(k, i)];
a[(k, i)] = a[(k, j)];
a[(k, j)] = tmp;
}
}
#[cfg(test)]
#[allow(dead_code)] fn update_schur_1x1(mut a: MatMut<'_, f64>, col: usize, d_value: f64) {
let m = a.nrows();
let n_trail = m - col - 1;
if n_trail == 0 {
return;
}
let l_col: Vec<f64> = (0..n_trail).map(|i| a[(col + 1 + i, col)]).collect();
for j in 0..n_trail {
let ldlj = l_col[j] * d_value;
for i in j..n_trail {
let old = a[(col + 1 + i, col + 1 + j)];
a[(col + 1 + i, col + 1 + j)] = old - l_col[i] * ldlj;
}
}
}
#[cfg(test)]
#[allow(dead_code)] fn update_schur_2x2(mut a: MatMut<'_, f64>, col: usize, partner: usize, block: &Block2x2) {
let m = a.nrows();
let start = col.max(partner) + 1;
let n_trail = m - start;
if n_trail == 0 {
return;
}
let l1: Vec<f64> = (0..n_trail).map(|i| a[(start + i, col)]).collect();
let l2: Vec<f64> = (0..n_trail).map(|i| a[(start + i, partner)]).collect();
let d_a = block.a;
let d_b = block.b;
let d_c = block.c;
for j in 0..n_trail {
let w_j1 = l1[j] * d_a + l2[j] * d_b;
let w_j2 = l1[j] * d_b + l2[j] * d_c;
for i in j..n_trail {
let update = l1[i] * w_j1 + l2[i] * w_j2;
let old = a[(start + i, start + j)];
a[(start + i, start + j)] = old - update;
}
}
}
fn extract_l(a: MatRef<'_, f64>, d: &MixedDiagonal, num_eliminated: usize) -> Mat<f64> {
let n = a.nrows();
let mut l = Mat::zeros(n, n);
for i in 0..n {
l[(i, i)] = 1.0;
}
let mut col = 0;
while col < num_eliminated {
match d.pivot_type(col) {
PivotType::OneByOne => {
for i in (col + 1)..n {
l[(i, col)] = a[(i, col)];
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
for i in (col + 2)..n {
l[(i, col)] = a[(i, col)];
l[(i, col + 1)] = a[(i, col + 1)];
}
col += 2;
}
_ => {
col += 1;
}
}
}
l
}
#[cfg(test)]
fn complete_pivoting_factor(mut a: MatMut<'_, f64>, small: f64) -> AptpFactorResult {
let n = a.nrows();
debug_assert_eq!(
n,
a.ncols(),
"complete_pivoting_factor requires square matrix"
);
let mut col_order: Vec<usize> = (0..n).collect();
let mut d = MixedDiagonal::new(n);
let mut stats = AptpStatistics::default();
let mut pivot_log = Vec::with_capacity(n);
let mut k = 0;
while k < n {
let remaining = n - k;
let mut max_val = 0.0_f64;
let mut max_row = k;
let mut max_col = k;
for j in k..n {
for i in j..n {
let v = a[(i, j)].abs();
if v > max_val {
max_val = v;
max_row = i;
max_col = j;
}
}
}
if max_val < small {
for &orig_col in &col_order[k..n] {
stats.num_delayed += 1;
pivot_log.push(AptpPivotRecord {
col: orig_col,
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
}
break;
}
if max_row == max_col {
if max_row != k {
swap_symmetric(a.rb_mut(), k, max_row);
col_order.swap(k, max_row);
}
let d_kk = a[(k, k)];
let inv_d = 1.0 / d_kk;
let mut max_l = 0.0_f64;
for i in (k + 1)..n {
let l_ik = a[(i, k)] * inv_d;
a[(i, k)] = l_ik;
let abs_l = l_ik.abs();
if abs_l > max_l {
max_l = abs_l;
}
}
update_schur_1x1(a.rb_mut(), k, d_kk);
d.set_1x1(k, d_kk);
stats.num_1x1 += 1;
if max_l > stats.max_l_entry {
stats.max_l_entry = max_l;
}
pivot_log.push(AptpPivotRecord {
col: col_order[k],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: false,
});
k += 1;
} else {
let t = max_row;
let m = max_col;
let a_mm = a[(m, m)];
let a_tt = a[(t, t)];
let a_tm = a[(t, m)]; let delta = a_mm * a_tt - a_tm * a_tm;
if remaining < 2 {
let d_kk = a[(k, k)];
if d_kk.abs() < small {
stats.num_delayed += 1;
pivot_log.push(AptpPivotRecord {
col: col_order[k],
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
break;
}
let inv_d = 1.0 / d_kk;
for i in (k + 1)..n {
a[(i, k)] *= inv_d;
}
d.set_1x1(k, d_kk);
stats.num_1x1 += 1;
k += 1;
continue;
}
if delta.abs() >= BUNCH_KAUFMAN_ALPHA * a_tm * a_tm {
if m != k {
swap_symmetric(a.rb_mut(), k, m);
col_order.swap(k, m);
}
let new_t = if t == k { m } else { t };
if new_t != k + 1 {
swap_symmetric(a.rb_mut(), k + 1, new_t);
col_order.swap(k + 1, new_t);
}
let a11 = a[(k, k)];
let a22 = a[(k + 1, k + 1)];
let a21 = a[(k + 1, k)];
let det = a11 * a22 - a21 * a21;
let inv_det = 1.0 / det;
let block = Block2x2 {
first_col: k,
a: a11,
b: a21,
c: a22,
};
let mut max_l = 0.0_f64;
let rows_start = k + 2;
for i in rows_start..n {
let ai1 = a[(i, k)];
let ai2 = a[(i, k + 1)];
let l_i1 = (ai1 * a22 - ai2 * a21) * inv_det;
let l_i2 = (ai2 * a11 - ai1 * a21) * inv_det;
a[(i, k)] = l_i1;
a[(i, k + 1)] = l_i2;
if l_i1.abs() > max_l {
max_l = l_i1.abs();
}
if l_i2.abs() > max_l {
max_l = l_i2.abs();
}
}
update_schur_2x2(a.rb_mut(), k, k + 1, &block);
d.set_2x2(block);
stats.num_2x2 += 1;
if max_l > stats.max_l_entry {
stats.max_l_entry = max_l;
}
pivot_log.push(AptpPivotRecord {
col: col_order[k],
pivot_type: PivotType::TwoByTwo {
partner: col_order[k + 1],
},
max_l_entry: max_l,
was_fallback: false,
});
pivot_log.push(AptpPivotRecord {
col: col_order[k + 1],
pivot_type: PivotType::TwoByTwo {
partner: col_order[k],
},
max_l_entry: max_l,
was_fallback: false,
});
k += 2;
} else {
let pivot_pos = if a_mm.abs() >= a_tt.abs() { m } else { t };
if pivot_pos != k {
swap_symmetric(a.rb_mut(), k, pivot_pos);
col_order.swap(k, pivot_pos);
}
let d_kk = a[(k, k)];
let inv_d = 1.0 / d_kk;
let mut max_l = 0.0_f64;
for i in (k + 1)..n {
let l_ik = a[(i, k)] * inv_d;
a[(i, k)] = l_ik;
let abs_l = l_ik.abs();
if abs_l > max_l {
max_l = abs_l;
}
}
update_schur_1x1(a.rb_mut(), k, d_kk);
d.set_1x1(k, d_kk);
stats.num_1x1 += 1;
if max_l > stats.max_l_entry {
stats.max_l_entry = max_l;
}
pivot_log.push(AptpPivotRecord {
col: col_order[k],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: true, });
k += 1;
}
}
}
let num_eliminated = k;
d.truncate(num_eliminated);
let delayed_cols: Vec<usize> = (num_eliminated..n).map(|i| col_order[i]).collect();
AptpFactorResult {
d,
perm: col_order,
num_eliminated,
delayed_cols,
stats,
pivot_log,
}
}
fn factor_block_diagonal(
mut a: MatMut<'_, f64>,
col_start: usize,
block_size: usize,
small: f64,
row_limit: usize,
) -> (
MixedDiagonal,
Vec<usize>,
usize,
AptpStatistics,
Vec<AptpPivotRecord>,
) {
let block_end = col_start + block_size;
let mut local_perm: Vec<usize> = (0..block_size).collect();
let mut d = MixedDiagonal::new(block_size);
let mut stats = AptpStatistics::default();
let mut pivot_log = Vec::with_capacity(block_size);
let mut k = 0;
while k < block_size {
let cur = col_start + k; let search_end = block_end;
let remaining = block_size - k;
let mut max_val = 0.0_f64;
let mut max_row = cur;
let mut max_col = cur;
for j in cur..search_end {
for i in j..search_end {
let v = a[(i, j)].abs();
if v > max_val {
max_val = v;
max_row = i;
max_col = j;
}
}
}
if max_val < small {
stats.num_delayed += remaining;
for &perm_val in &local_perm[k..block_size] {
pivot_log.push(AptpPivotRecord {
col: perm_val,
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
}
break;
}
if max_row == max_col {
if max_row != cur {
swap_symmetric_block(a.rb_mut(), cur, max_row, col_start, row_limit);
local_perm.swap(k, max_row - col_start);
}
let d_kk = a[(cur, cur)];
let inv_d = 1.0 / d_kk;
let mut max_l = 0.0_f64;
for i in (cur + 1)..block_end {
let l_ik = a[(i, cur)] * inv_d;
a[(i, cur)] = l_ik;
let abs_l = l_ik.abs();
if abs_l > max_l {
max_l = abs_l;
}
}
for j in (cur + 1)..block_end {
let l_j = a[(j, cur)];
let ldl_j = l_j * d_kk;
for i in j..block_end {
a[(i, j)] -= a[(i, cur)] * ldl_j;
}
}
d.set_1x1(k, d_kk);
stats.num_1x1 += 1;
if max_l > stats.max_l_entry {
stats.max_l_entry = max_l;
}
pivot_log.push(AptpPivotRecord {
col: local_perm[k],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: false,
});
k += 1;
} else {
let t = max_row; let m_idx = max_col;
let a_mm = a[(m_idx, m_idx)];
let a_tt = a[(t, t)];
let a_tm = a[(t, m_idx)]; let delta = a_mm * a_tt - a_tm * a_tm;
if remaining < 2 {
let pivot_pos = if a_mm.abs() >= a_tt.abs() { m_idx } else { t };
if pivot_pos != cur {
swap_symmetric_block(a.rb_mut(), cur, pivot_pos, col_start, row_limit);
local_perm.swap(k, pivot_pos - col_start);
}
let d_kk = a[(cur, cur)];
if d_kk.abs() < small {
stats.num_delayed += 1;
pivot_log.push(AptpPivotRecord {
col: local_perm[k],
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
break;
}
let inv_d = 1.0 / d_kk;
for i in (cur + 1)..block_end {
a[(i, cur)] *= inv_d;
}
d.set_1x1(k, d_kk);
stats.num_1x1 += 1;
k += 1;
continue;
}
if delta.abs() >= BUNCH_KAUFMAN_ALPHA * a_tm * a_tm {
if m_idx != cur {
swap_symmetric_block(a.rb_mut(), cur, m_idx, col_start, row_limit);
local_perm.swap(k, m_idx - col_start);
}
let new_t = if t == cur { m_idx } else { t };
if new_t != cur + 1 {
swap_symmetric_block(a.rb_mut(), cur + 1, new_t, col_start, row_limit);
local_perm.swap(k + 1, new_t - col_start);
}
let a11 = a[(cur, cur)];
let a22 = a[(cur + 1, cur + 1)];
let a21 = a[(cur + 1, cur)];
let det = a11 * a22 - a21 * a21;
let inv_det = 1.0 / det;
let block = Block2x2 {
first_col: k,
a: a11,
b: a21,
c: a22,
};
let mut max_l = 0.0_f64;
let rows_start = cur + 2;
for i in rows_start..block_end {
let ai1 = a[(i, cur)];
let ai2 = a[(i, cur + 1)];
let l_i1 = (ai1 * a22 - ai2 * a21) * inv_det;
let l_i2 = (ai2 * a11 - ai1 * a21) * inv_det;
a[(i, cur)] = l_i1;
a[(i, cur + 1)] = l_i2;
if l_i1.abs() > max_l {
max_l = l_i1.abs();
}
if l_i2.abs() > max_l {
max_l = l_i2.abs();
}
}
for j in rows_start..block_end {
let l1j = a[(j, cur)];
let l2j = a[(j, cur + 1)];
let w_j1 = l1j * a11 + l2j * a21;
let w_j2 = l1j * a21 + l2j * a22;
for i in j..block_end {
let l1i = a[(i, cur)];
let l2i = a[(i, cur + 1)];
a[(i, j)] -= l1i * w_j1 + l2i * w_j2;
}
}
d.set_2x2(block);
stats.num_2x2 += 1;
if max_l > stats.max_l_entry {
stats.max_l_entry = max_l;
}
pivot_log.push(AptpPivotRecord {
col: local_perm[k],
pivot_type: PivotType::TwoByTwo {
partner: local_perm[k + 1],
},
max_l_entry: max_l,
was_fallback: false,
});
pivot_log.push(AptpPivotRecord {
col: local_perm[k + 1],
pivot_type: PivotType::TwoByTwo {
partner: local_perm[k],
},
max_l_entry: max_l,
was_fallback: false,
});
k += 2;
} else {
let pivot_pos = if a_mm.abs() >= a_tt.abs() { m_idx } else { t };
if pivot_pos != cur {
swap_symmetric_block(a.rb_mut(), cur, pivot_pos, col_start, row_limit);
local_perm.swap(k, pivot_pos - col_start);
}
let d_kk = a[(cur, cur)];
let inv_d = 1.0 / d_kk;
let mut max_l = 0.0_f64;
for i in (cur + 1)..block_end {
let l_ik = a[(i, cur)] * inv_d;
a[(i, cur)] = l_ik;
let abs_l = l_ik.abs();
if abs_l > max_l {
max_l = abs_l;
}
}
for j in (cur + 1)..block_end {
let l_j = a[(j, cur)];
let ldl_j = l_j * d_kk;
for i in j..block_end {
a[(i, j)] -= a[(i, cur)] * ldl_j;
}
}
d.set_1x1(k, d_kk);
stats.num_1x1 += 1;
if max_l > stats.max_l_entry {
stats.max_l_entry = max_l;
}
pivot_log.push(AptpPivotRecord {
col: local_perm[k],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: true,
});
k += 1;
}
}
}
let nelim = k;
(d, local_perm, nelim, stats, pivot_log)
}
fn adjust_for_2x2_boundary(effective_nelim: usize, d: &MixedDiagonal) -> usize {
if effective_nelim == 0 {
return 0;
}
let last = effective_nelim - 1;
match d.pivot_type(last) {
PivotType::TwoByTwo { partner } if partner > last => {
effective_nelim - 1
}
_ => effective_nelim,
}
}
struct BlockBackup<'a> {
data: MatMut<'a, f64>,
}
impl<'a> BlockBackup<'a> {
fn create(
a: MatRef<'_, f64>,
col_start: usize,
block_cols: usize,
m: usize,
buf: &'a mut Mat<f64>,
) -> Self {
let rows = m - col_start;
let mut data = buf.as_mut().submatrix_mut(0, 0, rows, block_cols);
for j in 0..block_cols {
for i in 0..rows {
data[(i, j)] = a[(col_start + i, col_start + j)];
}
}
BlockBackup { data }
}
fn restore_failed(
&self,
mut a: MatMut<'_, f64>,
col_start: usize,
nelim: usize,
block_cols: usize,
m: usize,
) {
let rows = m - col_start;
for j in nelim..block_cols {
for i in 0..rows {
a[(col_start + i, col_start + j)] = self.data[(i, j)];
}
}
}
fn restore_diagonal_with_perm(
&self,
mut a: MatMut<'_, f64>,
col_start: usize,
nelim: usize,
block_cols: usize,
m: usize,
block_perm: &[usize],
) {
for j in nelim..block_cols {
let c = block_perm[j]; for i in nelim..block_cols {
let r = block_perm[i]; let val = if r >= c {
self.data[(r, c)]
} else {
self.data[(c, r)]
};
a[(col_start + i, col_start + j)] = val;
}
}
for j in nelim..block_cols {
let c = block_perm[j]; for i in block_cols..(m - col_start) {
a[(col_start + i, col_start + j)] = self.data[(i, c)];
}
}
}
}
#[allow(clippy::too_many_arguments)]
fn apply_and_check(
mut a: MatMut<'_, f64>,
col_start: usize,
block_nelim: usize,
block_cols: usize,
m: usize,
d: &MixedDiagonal,
threshold: f64,
par: Par,
l11_buf: &mut Mat<f64>,
) -> usize {
if block_nelim == 0 {
return 0;
}
let panel_rows = m - col_start - block_cols;
if panel_rows == 0 {
return block_nelim;
}
let panel_start = col_start + block_cols;
{
let src = a
.rb()
.submatrix(col_start, col_start, block_nelim, block_nelim);
let mut dst = l11_buf
.as_mut()
.submatrix_mut(0, 0, block_nelim, block_nelim);
dst.copy_from(src);
}
let l11_ref = l11_buf.as_ref().submatrix(0, 0, block_nelim, block_nelim);
let panel = a
.rb_mut()
.submatrix_mut(panel_start, col_start, panel_rows, block_nelim);
triangular_solve::solve_unit_lower_triangular_in_place(l11_ref, panel.transpose_mut(), par);
let mut col = 0;
while col < block_nelim {
match d.pivot_type(col) {
PivotType::OneByOne => {
let d_val = d.diagonal_1x1(col);
let inv_d = 1.0 / d_val;
for i in 0..panel_rows {
a[(panel_start + i, col_start + col)] *= inv_d;
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let block = d.diagonal_2x2(col);
let det = block.a * block.c - block.b * block.b;
let inv_det = 1.0 / det;
for i in 0..panel_rows {
let x1 = a[(panel_start + i, col_start + col)];
let x2 = a[(panel_start + i, col_start + col + 1)];
a[(panel_start + i, col_start + col)] = (x1 * block.c - x2 * block.b) * inv_det;
a[(panel_start + i, col_start + col + 1)] =
(x2 * block.a - x1 * block.b) * inv_det;
}
col += 2;
}
_ => {
col += 1;
}
}
}
let stability_bound = 1.0 / threshold;
let mut effective_nelim = block_nelim;
let mut scan_col = 0;
while scan_col < block_nelim {
let pivot_width = match d.pivot_type(scan_col) {
PivotType::TwoByTwo { partner } if partner > scan_col => 2,
_ => 1,
};
let mut col_ok = true;
for c in scan_col..scan_col + pivot_width {
if c >= block_nelim {
break;
}
for i in 0..panel_rows {
if a[(panel_start + i, col_start + c)].abs() >= stability_bound {
col_ok = false;
break;
}
}
if !col_ok {
break;
}
}
if !col_ok {
effective_nelim = scan_col;
break;
}
scan_col += pivot_width;
}
effective_nelim
}
#[allow(clippy::too_many_arguments)]
fn update_trailing(
mut a: MatMut<'_, f64>,
col_start: usize,
nelim: usize,
block_cols: usize,
m: usize,
num_fully_summed: usize,
d: &MixedDiagonal,
par: Par,
ld_buf: &mut Mat<f64>,
copy_buf: &mut Mat<f64>,
) {
if nelim == 0 {
return;
}
let trailing_start = col_start + block_cols;
let trailing_size = m - trailing_start;
if trailing_size == 0 {
return;
}
let p = num_fully_summed;
let mut w = ld_buf.as_mut().submatrix_mut(0, 0, trailing_size, nelim);
let mut col = 0;
while col < nelim {
match d.pivot_type(col) {
PivotType::OneByOne => {
let d_val = d.diagonal_1x1(col);
for i in 0..trailing_size {
w[(i, col)] = a[(trailing_start + i, col_start + col)] * d_val;
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let block = d.diagonal_2x2(col);
for i in 0..trailing_size {
let l1 = a[(trailing_start + i, col_start + col)];
let l2 = a[(trailing_start + i, col_start + col + 1)];
w[(i, col)] = l1 * block.a + l2 * block.b;
w[(i, col + 1)] = l1 * block.b + l2 * block.c;
}
col += 2;
}
_ => {
col += 1;
}
}
}
{
let src = a
.rb()
.submatrix(trailing_start, col_start, trailing_size, nelim);
let mut dst = copy_buf.as_mut().submatrix_mut(0, 0, trailing_size, nelim);
dst.copy_from(src);
}
let fs_size = p.saturating_sub(trailing_start);
if fs_size > 0 {
let w_fs = ld_buf.as_ref().submatrix(0, 0, fs_size, nelim);
let l21_fs = copy_buf.as_ref().submatrix(0, 0, fs_size, nelim);
let mut a_fs = a
.rb_mut()
.submatrix_mut(trailing_start, trailing_start, fs_size, fs_size);
tri_matmul::matmul_with_conj(
a_fs.rb_mut(),
BlockStructure::TriangularLower,
Accum::Add,
w_fs,
BlockStructure::Rectangular,
Conj::No,
l21_fs.transpose(),
BlockStructure::Rectangular,
Conj::No,
-1.0,
par,
);
}
let nfs_size = m - p;
if nfs_size > 0 && fs_size > 0 {
let w_nfs = ld_buf.as_ref().submatrix(fs_size, 0, nfs_size, nelim);
let l21_fs = copy_buf.as_ref().submatrix(0, 0, fs_size, nelim);
let mut a_cross = a.submatrix_mut(p, trailing_start, nfs_size, fs_size);
faer::linalg::matmul::matmul_with_conj(
a_cross.rb_mut(),
Accum::Add,
w_nfs,
Conj::No,
l21_fs.transpose(),
Conj::No,
-1.0,
par,
);
}
}
fn compute_ld_into(l: MatRef<'_, f64>, d: &MixedDiagonal, nelim: usize, mut dst: MatMut<'_, f64>) {
let nrows = l.nrows();
let mut col = 0;
while col < nelim {
match d.pivot_type(col) {
PivotType::OneByOne => {
let d_val = d.diagonal_1x1(col);
for i in 0..nrows {
dst[(i, col)] = l[(i, col)] * d_val;
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let block = d.diagonal_2x2(col);
for i in 0..nrows {
let l1 = l[(i, col)];
let l2 = l[(i, col + 1)];
dst[(i, col)] = l1 * block.a + l2 * block.b;
dst[(i, col + 1)] = l1 * block.b + l2 * block.c;
}
col += 2;
}
_ => {
col += 1;
}
}
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn compute_contribution_gemm(
frontal_data: &Mat<f64>,
num_fully_summed: usize,
num_eliminated: usize,
m: usize,
d: &MixedDiagonal,
contrib_buffer: &mut Mat<f64>,
ld_workspace: &mut Mat<f64>,
par: Par,
) {
let p = num_fully_summed;
let ne = num_eliminated;
let nfs = m - p;
if nfs == 0 {
return; }
for j in 0..nfs {
let col_len = nfs - j;
let src = &frontal_data.col_as_slice(p + j)[p + j..m];
contrib_buffer.col_as_slice_mut(j)[j..j + col_len].copy_from_slice(src);
}
if ne == 0 {
return; }
let l21_nfs = frontal_data.as_ref().submatrix(p, 0, nfs, ne);
if ld_workspace.nrows() < nfs || ld_workspace.ncols() < ne {
*ld_workspace = Mat::zeros(nfs.max(ld_workspace.nrows()), ne.max(ld_workspace.ncols()));
}
let mut w = ld_workspace.as_mut().submatrix_mut(0, 0, nfs, ne);
w.fill(0.0);
compute_ld_into(l21_nfs, d, ne, w.rb_mut());
tri_matmul::matmul_with_conj(
contrib_buffer.as_mut().submatrix_mut(0, 0, nfs, nfs),
BlockStructure::TriangularLower,
Accum::Add,
w.as_ref(),
BlockStructure::Rectangular,
Conj::No,
l21_nfs.transpose(),
BlockStructure::Rectangular,
Conj::No,
-1.0,
par,
);
}
#[allow(clippy::too_many_arguments)]
fn update_cross_terms(
mut a: MatMut<'_, f64>,
col_start: usize,
nelim: usize,
block_cols: usize,
m: usize,
d: &MixedDiagonal,
ld_buf: &mut Mat<f64>,
copy_buf: &mut Mat<f64>,
) {
if nelim == 0 || nelim >= block_cols {
return; }
let n_failed = block_cols - nelim;
let trailing_start = col_start + block_cols;
let trailing_size = m - trailing_start;
{
let src = a
.rb()
.submatrix(col_start + nelim, col_start, n_failed, nelim);
let mut dst = copy_buf.as_mut().submatrix_mut(0, 0, n_failed, nelim);
dst.copy_from(src);
}
{
let l_blk = copy_buf.as_ref().submatrix(0, 0, n_failed, nelim);
compute_ld_into(
l_blk,
d,
nelim,
ld_buf.as_mut().submatrix_mut(0, 0, n_failed, nelim),
);
}
{
let l_blk = copy_buf.as_ref().submatrix(0, 0, n_failed, nelim);
let w_blk = ld_buf.as_ref().submatrix(0, 0, n_failed, nelim);
for j in 0..n_failed {
for i in j..n_failed {
let mut sum = 0.0;
for c in 0..nelim {
sum += w_blk[(i, c)] * l_blk[(j, c)];
}
a[(col_start + nelim + i, col_start + nelim + j)] -= sum;
}
}
}
if trailing_size > 0 {
{
let src = a
.rb()
.submatrix(trailing_start, col_start, trailing_size, nelim);
let mut dst = copy_buf
.as_mut()
.submatrix_mut(n_failed, 0, trailing_size, nelim);
dst.copy_from(src);
}
let l_blk = copy_buf.as_ref().submatrix(0, 0, n_failed, nelim);
let l_panel = copy_buf
.as_ref()
.submatrix(n_failed, 0, trailing_size, nelim);
compute_ld_into(
l_panel,
d,
nelim,
ld_buf
.as_mut()
.submatrix_mut(n_failed, 0, trailing_size, nelim),
);
let w_panel = ld_buf.as_ref().submatrix(n_failed, 0, trailing_size, nelim);
for j in 0..n_failed {
for i in 0..trailing_size {
let mut sum = 0.0;
for c in 0..nelim {
sum += w_panel[(i, c)] * l_blk[(j, c)];
}
a[(trailing_start + i, col_start + nelim + j)] -= sum;
}
}
}
}
fn factor_inner(
mut a: MatMut<'_, f64>,
num_fully_summed: usize,
nfs_boundary: usize,
options: &AptpOptions,
kernel_ws: &mut AptpKernelWorkspace,
) -> Result<AptpFactorResult, SparseError> {
let m = a.nrows();
let ib = options.inner_block_size;
let small = options.small;
let threshold = options.threshold;
let p = num_fully_summed;
let mut col_order: Vec<usize> = (0..m).collect();
let mut d = MixedDiagonal::new(p);
let mut stats = AptpStatistics::default();
let mut pivot_log = Vec::with_capacity(p);
let mut k = 0;
let mut end_pos = p;
let mut panel_perm_buf = vec![0.0f64; ib];
let mut row_perm_buf = vec![0.0f64; ib];
let mut col_order_buf = vec![0usize; ib];
while k < end_pos {
let block_size = (end_pos - k).min(ib);
let block_end = k + block_size;
let backup = BlockBackup::create(a.as_ref(), k, block_size, m, &mut kernel_ws.backup);
let (block_d, block_perm, block_nelim, block_stats, block_log) =
factor_block_diagonal(a.rb_mut(), k, block_size, small, block_end);
if block_nelim == 0 {
backup.restore_failed(a.rb_mut(), k, 0, block_size, m);
for offset in 0..block_size {
let delayed_orig = col_order[k + block_perm[offset]];
end_pos -= 1;
if k + offset < end_pos {
swap_symmetric(a.rb_mut(), k + offset, end_pos);
col_order.swap(k + offset, end_pos);
}
stats.num_delayed += 1;
pivot_log.push(AptpPivotRecord {
col: delayed_orig,
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
}
continue;
}
let panel_start = block_end;
for r in panel_start..m {
for i in 0..block_size {
panel_perm_buf[i] = a[(r, k + block_perm[i])];
}
for i in 0..block_size {
a[(r, k + i)] = panel_perm_buf[i];
}
}
{
let mut bc = 0;
while bc < block_nelim {
match block_d.pivot_type(bc) {
PivotType::TwoByTwo { partner } if partner > bc => {
a[(k + bc + 1, k + bc)] = 0.0;
bc += 2;
}
_ => {
bc += 1;
}
}
}
}
if k > 0 {
for c in 0..k {
for i in 0..block_size {
row_perm_buf[i] = a[(k + block_perm[i], c)];
}
for i in 0..block_size {
a[(k + i, c)] = row_perm_buf[i];
}
}
}
col_order_buf[..block_size].copy_from_slice(&col_order[k..k + block_size]);
for i in 0..block_size {
col_order[k + i] = col_order_buf[block_perm[i]];
}
let mut effective_nelim = apply_and_check(
a.rb_mut(),
k,
block_nelim,
block_size,
m,
&block_d,
threshold,
options.par,
&mut kernel_ws.l11_buf,
);
effective_nelim = adjust_for_2x2_boundary(effective_nelim, &block_d);
if effective_nelim < block_nelim {
backup.restore_diagonal_with_perm(
a.rb_mut(),
k,
effective_nelim,
block_size,
m,
&block_perm,
);
}
let nelim = effective_nelim;
if nelim > 0 {
update_trailing(
a.rb_mut(),
k,
nelim,
block_size,
m,
nfs_boundary,
&block_d,
options.par,
&mut kernel_ws.ld_buf,
&mut kernel_ws.copy_buf,
);
update_cross_terms(
a.rb_mut(),
k,
nelim,
block_size,
m,
&block_d,
&mut kernel_ws.ld_buf,
&mut kernel_ws.copy_buf,
);
}
d.copy_from_offset(&block_d, k, nelim);
let mut passed_1x1 = 0;
let mut passed_2x2 = 0;
let mut sc = 0;
while sc < nelim {
match block_d.pivot_type(sc) {
PivotType::OneByOne => {
passed_1x1 += 1;
sc += 1;
}
PivotType::TwoByTwo { partner } if partner > sc => {
passed_2x2 += 1;
sc += 2;
}
_ => {
sc += 1;
}
}
}
stats.num_1x1 += passed_1x1;
stats.num_2x2 += passed_2x2;
if block_stats.max_l_entry > stats.max_l_entry {
stats.max_l_entry = block_stats.max_l_entry;
}
for c in 0..nelim {
for i in panel_start..m {
let v = a[(i, k + c)].abs();
if v > stats.max_l_entry {
stats.max_l_entry = v;
}
}
}
for record in &block_log {
if !matches!(record.pivot_type, PivotType::Delayed) && record.col < nelim {
let global_col = col_order[k + record.col];
let global_pivot_type = match record.pivot_type {
PivotType::TwoByTwo { partner } => PivotType::TwoByTwo {
partner: col_order[k + partner],
},
other => other,
};
pivot_log.push(AptpPivotRecord {
col: global_col,
pivot_type: global_pivot_type,
max_l_entry: record.max_l_entry,
was_fallback: record.was_fallback,
});
}
}
if nelim < block_size {
let n_delayed = block_size - nelim;
for i in (0..n_delayed).rev() {
let delayed_pos = k + nelim + i;
end_pos -= 1;
if delayed_pos < end_pos {
swap_symmetric(a.rb_mut(), delayed_pos, end_pos);
col_order.swap(delayed_pos, end_pos);
}
stats.num_delayed += 1;
pivot_log.push(AptpPivotRecord {
col: col_order[end_pos],
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
}
}
k += nelim;
}
let num_eliminated = k;
d.truncate(num_eliminated);
let delayed_cols: Vec<usize> = (num_eliminated..p).map(|i| col_order[i]).collect();
Ok(AptpFactorResult {
d,
perm: col_order,
num_eliminated,
delayed_cols,
stats,
pivot_log,
})
}
fn two_level_factor(
mut a: MatMut<'_, f64>,
num_fully_summed: usize,
options: &AptpOptions,
kernel_ws: &mut AptpKernelWorkspace,
) -> Result<AptpFactorResult, SparseError> {
let m = a.nrows();
let nb = options.outer_block_size;
let p = num_fully_summed;
let mut col_order: Vec<usize> = (0..m).collect();
let mut global_d = MixedDiagonal::new(p);
let mut stats = AptpStatistics::default();
let mut pivot_log = Vec::with_capacity(p);
let mut global_nelim = 0;
let mut remaining_fully_summed = p;
while remaining_fully_summed > 0 {
let col_start = global_nelim;
let block_cols = remaining_fully_summed.min(nb);
let block_m = m - col_start;
let block_result = {
let block_view = a
.rb_mut()
.submatrix_mut(col_start, col_start, block_m, block_m);
factor_inner(block_view, block_cols, p - col_start, options, kernel_ws)?
};
let block_nelim = block_result.num_eliminated;
if col_start > 0 {
let block_perm = &block_result.perm;
let mut temp = vec![0.0f64; block_cols];
for c in 0..col_start {
for i in 0..block_cols {
temp[i] = a[(col_start + block_perm[i], c)];
}
for i in 0..block_cols {
a[(col_start + i, c)] = temp[i];
}
}
}
{
let block_perm = &block_result.perm;
let orig_order: Vec<usize> = col_order[col_start..col_start + block_cols].to_vec();
for i in 0..block_cols {
if block_perm[i] < block_cols {
col_order[col_start + i] = orig_order[block_perm[i]];
}
}
}
if block_nelim < block_cols {
let n_failed = block_cols - block_nelim;
for i in 0..n_failed {
let failed_pos = col_start + block_nelim + i;
let end = col_start + remaining_fully_summed - 1 - i;
if failed_pos < end {
swap_symmetric(a.rb_mut(), failed_pos, end);
col_order.swap(failed_pos, end);
}
}
stats.num_delayed += n_failed;
}
global_d.copy_from_offset(&block_result.d, global_nelim, block_nelim);
stats.num_1x1 += block_result.stats.num_1x1;
stats.num_2x2 += block_result.stats.num_2x2;
if block_result.stats.max_l_entry > stats.max_l_entry {
stats.max_l_entry = block_result.stats.max_l_entry;
}
for record in &block_result.pivot_log {
if !matches!(record.pivot_type, PivotType::Delayed) {
pivot_log.push(record.clone());
}
}
let n_failed = block_cols - block_nelim;
for i in 0..n_failed {
let delayed_pos = col_start + remaining_fully_summed - 1 - i;
pivot_log.push(AptpPivotRecord {
col: col_order[delayed_pos],
pivot_type: PivotType::Delayed,
max_l_entry: 0.0,
was_fallback: false,
});
}
global_nelim += block_nelim;
remaining_fully_summed -= block_cols;
}
global_d.truncate(global_nelim);
let delayed_cols: Vec<usize> = (global_nelim..p).map(|i| col_order[i]).collect();
Ok(AptpFactorResult {
d: global_d,
perm: col_order,
num_eliminated: global_nelim,
delayed_cols,
stats,
pivot_log,
})
}
fn tpp_test_2x2(
a: MatRef<'_, f64>,
t: usize,
p: usize,
maxt: f64,
maxp: f64,
u: f64,
small: f64,
) -> Option<(f64, f64, f64)> {
debug_assert!(t < p, "tpp_test_2x2 requires t < p");
let a11 = a[(t, t)];
let a21 = a[(p, t)]; let a22 = a[(p, p)];
let maxpiv = a11.abs().max(a21.abs()).max(a22.abs());
if maxpiv < small {
return None;
}
let detscale = 1.0 / maxpiv;
let detpiv0 = (a11 * detscale) * a22;
let detpiv1 = (a21 * detscale) * a21;
let detpiv = detpiv0 - detpiv1;
if detpiv.abs() < small.max((detpiv0 / 2.0).abs()).max((detpiv1 / 2.0).abs()) {
return None;
}
let d_inv_11 = (a22 * detscale) / detpiv;
let d_inv_12 = (-a21 * detscale) / detpiv;
let d_inv_22 = (a11 * detscale) / detpiv;
if maxt.max(maxp) < small {
return Some((a11, a21, a22));
}
let x1 = d_inv_11.abs() * maxt + d_inv_12.abs() * maxp;
let x2 = d_inv_12.abs() * maxt + d_inv_22.abs() * maxp;
if u * x1.max(x2) < 1.0 {
Some((a11, a21, a22))
} else {
None
}
}
#[allow(clippy::too_many_arguments)]
fn tpp_factor(
mut a: MatMut<'_, f64>,
start_col: usize,
num_fully_summed: usize,
col_order: &mut [usize],
global_d: &mut MixedDiagonal,
stats: &mut AptpStatistics,
pivot_log: &mut Vec<AptpPivotRecord>,
options: &AptpOptions,
) -> usize {
let m = a.nrows();
let n = num_fully_summed;
let u = options.threshold;
let small = options.small;
let mut nelim = start_col;
while nelim < n {
if tpp_is_col_small(a.as_ref(), nelim, nelim, m, small) {
global_d.set_1x1(nelim, 0.0);
stats.num_1x1 += 1;
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: 0.0,
was_fallback: true,
});
for r in (nelim + 1)..m {
a[(r, nelim)] = 0.0;
}
nelim += 1;
continue;
}
let mut found = false;
for p in (nelim + 1)..n {
if tpp_is_col_small(a.as_ref(), p, nelim, m, small) {
if p != nelim {
swap_symmetric(a.rb_mut(), p, nelim);
col_order.swap(p, nelim);
}
global_d.set_1x1(nelim, 0.0);
stats.num_1x1 += 1;
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: 0.0,
was_fallback: true,
});
for r in (nelim + 1)..m {
a[(r, nelim)] = 0.0;
}
nelim += 1;
found = true;
break;
}
let t = tpp_find_row_abs_max(a.as_ref(), p, nelim, p);
let maxt = tpp_find_rc_abs_max_exclude(a.as_ref(), t, nelim, m, p);
let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), p, nelim, m, t);
if tpp_test_2x2(a.as_ref(), t, p, maxt, maxp, u, small).is_some() {
if t != nelim {
swap_symmetric(a.rb_mut(), t, nelim);
col_order.swap(t, nelim);
}
let new_p = if p == nelim { t } else { p };
if new_p != nelim + 1 {
swap_symmetric(a.rb_mut(), new_p, nelim + 1);
col_order.swap(new_p, nelim + 1);
}
let d11 = a[(nelim, nelim)];
let d21 = a[(nelim + 1, nelim)];
let d22 = a[(nelim + 1, nelim + 1)];
let max_l = tpp_apply_2x2(a.rb_mut(), nelim, m, n);
global_d.set_2x2(Block2x2 {
first_col: nelim,
a: d11,
b: d21,
c: d22,
});
stats.num_2x2 += 1;
stats.max_l_entry = stats.max_l_entry.max(max_l);
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::TwoByTwo {
partner: col_order[nelim + 1],
},
max_l_entry: max_l,
was_fallback: true,
});
pivot_log.push(AptpPivotRecord {
col: col_order[nelim + 1],
pivot_type: PivotType::TwoByTwo {
partner: col_order[nelim],
},
max_l_entry: max_l,
was_fallback: true,
});
nelim += 2;
found = true;
break;
}
let maxp_with_t = maxp.max(tpp_sym_entry(a.as_ref(), p, t).abs());
if a[(p, p)].abs() >= u * maxp_with_t {
if p != nelim {
swap_symmetric(a.rb_mut(), p, nelim);
col_order.swap(p, nelim);
}
let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
global_d.set_1x1(nelim, a[(nelim, nelim)]);
stats.num_1x1 += 1;
stats.max_l_entry = stats.max_l_entry.max(max_l);
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: true,
});
nelim += 1;
found = true;
break;
}
}
if !found {
let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), nelim, nelim, m, usize::MAX);
if a[(nelim, nelim)].abs() >= u * maxp {
let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
global_d.set_1x1(nelim, a[(nelim, nelim)]);
stats.num_1x1 += 1;
stats.max_l_entry = stats.max_l_entry.max(max_l);
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: true,
});
nelim += 1;
} else {
break;
}
}
}
nelim - start_col
}
fn swap_rect(mut a: MatMut<'_, f64>, i: usize, j: usize) {
if i == j {
return;
}
let (i, j) = if i < j { (i, j) } else { (j, i) };
let n = a.ncols();
if i < n && j < n {
let tmp = a[(i, i)];
a[(i, i)] = a[(j, j)];
a[(j, j)] = tmp;
}
for k in 0..i.min(n) {
let tmp = a[(i, k)];
a[(i, k)] = a[(j, k)];
a[(j, k)] = tmp;
}
for k in (i + 1)..j {
if k < n {
let tmp = a[(k, i)];
a[(k, i)] = a[(j, k)];
a[(j, k)] = tmp;
} else if i < n {
break;
}
}
if i < n && j < n {
for k in (j + 1)..a.nrows() {
let tmp = a[(k, i)];
a[(k, i)] = a[(k, j)];
a[(k, j)] = tmp;
}
}
}
#[inline]
fn tpp_sym_entry(a: MatRef<'_, f64>, row: usize, col: usize) -> f64 {
let n = a.ncols();
if col < n && row >= col {
a[(row, col)]
} else if row < n && col >= row {
a[(col, row)]
} else {
panic!("tpp_sym_entry: row={} col={} both >= ncols={}", row, col, n);
}
}
fn tpp_apply_1x1(mut a: MatMut<'_, f64>, nelim: usize, m: usize, num_fully_summed: usize) -> f64 {
let d = a[(nelim, nelim)];
let inv_d = 1.0 / d;
let p = num_fully_summed;
let mut max_l = 0.0_f64;
for i in (nelim + 1)..m {
let l_ik = a[(i, nelim)] * inv_d;
a[(i, nelim)] = l_ik;
max_l = max_l.max(l_ik.abs());
}
let schur_col_end = p.min(a.ncols());
for j in (nelim + 1)..schur_col_end {
let ldlj = a[(j, nelim)] * d;
for i in j..m {
a[(i, j)] -= a[(i, nelim)] * ldlj;
}
}
max_l
}
fn tpp_apply_2x2(mut a: MatMut<'_, f64>, nelim: usize, m: usize, num_fully_summed: usize) -> f64 {
let a11 = a[(nelim, nelim)];
let a21 = a[(nelim + 1, nelim)];
let a22 = a[(nelim + 1, nelim + 1)];
let det = a11 * a22 - a21 * a21;
let inv_det = 1.0 / det;
let p = num_fully_summed;
let mut max_l = 0.0_f64;
let start = nelim + 2;
for i in start..m {
let ai1 = a[(i, nelim)];
let ai2 = a[(i, nelim + 1)];
let l_i1 = (ai1 * a22 - ai2 * a21) * inv_det;
let l_i2 = (ai2 * a11 - ai1 * a21) * inv_det;
a[(i, nelim)] = l_i1;
a[(i, nelim + 1)] = l_i2;
max_l = max_l.max(l_i1.abs()).max(l_i2.abs());
}
let schur_col_end = p.min(a.ncols());
for j in start..schur_col_end {
let wj1 = a[(j, nelim)] * a11 + a[(j, nelim + 1)] * a21;
let wj2 = a[(j, nelim)] * a21 + a[(j, nelim + 1)] * a22;
for i in j..m {
a[(i, j)] -= a[(i, nelim)] * wj1 + a[(i, nelim + 1)] * wj2;
}
}
a[(nelim + 1, nelim)] = 0.0;
max_l
}
fn tpp_is_col_small(a: MatRef<'_, f64>, idx: usize, from: usize, to: usize, small: f64) -> bool {
let n = a.ncols();
for c in from..idx.min(n) {
if a[(idx, c)].abs() >= small {
return false;
}
}
if idx < n {
for r in idx..to {
if a[(r, idx)].abs() >= small {
return false;
}
}
}
true
}
fn tpp_find_row_abs_max(a: MatRef<'_, f64>, p: usize, from: usize, to: usize) -> usize {
if from >= to {
return from;
}
let mut best_idx = from;
let mut best_val = tpp_sym_entry(a, p, from).abs();
for c in (from + 1)..to {
let v = tpp_sym_entry(a, p, c).abs();
if v > best_val {
best_idx = c;
best_val = v;
}
}
best_idx
}
fn tpp_find_rc_abs_max_exclude(
a: MatRef<'_, f64>,
col: usize,
nelim: usize,
m: usize,
exclude: usize,
) -> f64 {
let n = a.ncols();
let mut best = 0.0_f64;
for c in nelim..col.min(n) {
if c == exclude {
continue;
}
best = best.max(a[(col, c)].abs());
}
if col < n {
for r in (col + 1)..m {
if r == exclude {
continue;
}
best = best.max(a[(r, col)].abs());
}
}
best
}
pub(super) fn tpp_factor_rect(
mut a: MatMut<'_, f64>,
num_fully_summed: usize,
options: &AptpOptions,
) -> Result<AptpFactorResult, SparseError> {
let m = a.nrows();
let n = num_fully_summed;
debug_assert!(
a.ncols() >= n,
"tpp_factor_rect: ncols {} < num_fully_summed {}",
a.ncols(),
n
);
debug_assert!(m >= n, "tpp_factor_rect: nrows {} < ncols {}", m, n);
let u = options.threshold;
let small = options.small;
let mut col_order: Vec<usize> = (0..m).collect();
let mut d = MixedDiagonal::new(n);
let mut stats = AptpStatistics::default();
let mut pivot_log = Vec::with_capacity(n);
let mut nelim = 0;
while nelim < n {
if tpp_is_col_small(a.as_ref(), nelim, nelim, m, small) {
d.set_1x1(nelim, 0.0);
stats.num_1x1 += 1;
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: 0.0,
was_fallback: false,
});
if nelim < a.ncols() {
for r in (nelim + 1)..m {
a[(r, nelim)] = 0.0;
}
}
nelim += 1;
continue;
}
let mut found = false;
for p in (nelim + 1)..n {
if tpp_is_col_small(a.as_ref(), p, nelim, m, small) {
if p != nelim {
swap_rect(a.rb_mut(), p, nelim);
col_order.swap(p, nelim);
}
d.set_1x1(nelim, 0.0);
stats.num_1x1 += 1;
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: 0.0,
was_fallback: false,
});
for r in (nelim + 1)..m {
a[(r, nelim)] = 0.0;
}
nelim += 1;
found = true;
break;
}
let t = tpp_find_row_abs_max(a.as_ref(), p, nelim, p);
let maxt = tpp_find_rc_abs_max_exclude(a.as_ref(), t, nelim, m, p);
let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), p, nelim, m, t);
if tpp_test_2x2(a.as_ref(), t, p, maxt, maxp, u, small).is_some() {
if t != nelim {
swap_rect(a.rb_mut(), t, nelim);
col_order.swap(t, nelim);
}
let new_p = if p == nelim { t } else { p };
if new_p != nelim + 1 {
swap_rect(a.rb_mut(), new_p, nelim + 1);
col_order.swap(new_p, nelim + 1);
}
let d11 = a[(nelim, nelim)];
let d21 = a[(nelim + 1, nelim)];
let d22 = a[(nelim + 1, nelim + 1)];
let max_l = tpp_apply_2x2(a.rb_mut(), nelim, m, n);
d.set_2x2(Block2x2 {
first_col: nelim,
a: d11,
b: d21,
c: d22,
});
stats.num_2x2 += 1;
stats.max_l_entry = stats.max_l_entry.max(max_l);
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::TwoByTwo {
partner: col_order[nelim + 1],
},
max_l_entry: max_l,
was_fallback: false,
});
pivot_log.push(AptpPivotRecord {
col: col_order[nelim + 1],
pivot_type: PivotType::TwoByTwo {
partner: col_order[nelim],
},
max_l_entry: max_l,
was_fallback: false,
});
nelim += 2;
found = true;
break;
}
let maxp_with_t = maxp.max(tpp_sym_entry(a.as_ref(), p, t).abs());
if a[(p, p)].abs() >= u * maxp_with_t {
if p != nelim {
swap_rect(a.rb_mut(), p, nelim);
col_order.swap(p, nelim);
}
let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
d.set_1x1(nelim, a[(nelim, nelim)]);
stats.num_1x1 += 1;
stats.max_l_entry = stats.max_l_entry.max(max_l);
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: false,
});
nelim += 1;
found = true;
break;
}
}
if !found {
let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), nelim, nelim, m, usize::MAX);
if a[(nelim, nelim)].abs() >= u * maxp {
let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
d.set_1x1(nelim, a[(nelim, nelim)]);
stats.num_1x1 += 1;
stats.max_l_entry = stats.max_l_entry.max(max_l);
pivot_log.push(AptpPivotRecord {
col: col_order[nelim],
pivot_type: PivotType::OneByOne,
max_l_entry: max_l,
was_fallback: false,
});
nelim += 1;
} else {
break;
}
}
}
d.truncate(nelim);
stats.num_delayed = n - nelim;
let delayed_cols: Vec<usize> = (nelim..n).map(|i| col_order[i]).collect();
Ok(AptpFactorResult {
d,
perm: col_order,
num_eliminated: nelim,
delayed_cols,
stats,
pivot_log,
})
}
#[allow(clippy::too_many_arguments)]
pub(super) fn compute_contribution_gemm_rect(
l_storage: &Mat<f64>,
num_fully_summed: usize,
num_eliminated: usize,
m: usize,
d: &MixedDiagonal,
contrib_buffer: &mut Mat<f64>,
ld_workspace: &mut Mat<f64>,
par: Par,
) {
let p = num_fully_summed;
let ne = num_eliminated;
let nfs = m - p;
if nfs == 0 || ne == 0 {
return;
}
let l21_nfs = l_storage.as_ref().submatrix(p, 0, nfs, ne);
if ld_workspace.nrows() < nfs || ld_workspace.ncols() < ne {
*ld_workspace = Mat::zeros(nfs.max(ld_workspace.nrows()), ne.max(ld_workspace.ncols()));
}
let mut w = ld_workspace.as_mut().submatrix_mut(0, 0, nfs, ne);
w.fill(0.0);
compute_ld_into(l21_nfs, d, ne, w.rb_mut());
tri_matmul::matmul_with_conj(
contrib_buffer.as_mut().submatrix_mut(0, 0, nfs, nfs),
BlockStructure::TriangularLower,
Accum::Add,
w.as_ref(),
BlockStructure::Rectangular,
Conj::No,
l21_nfs.transpose(),
BlockStructure::Rectangular,
Conj::No,
-1.0,
par,
);
}
pub(super) fn extract_front_factors_rect(
l_storage: &Mat<f64>,
m: usize,
k: usize,
frontal_row_indices: &[usize],
result: &AptpFactorResult,
) -> super::numeric::FrontFactors {
let ne = result.num_eliminated;
let l11 = if ne > 0 {
let mut l = Mat::zeros(ne, ne);
let mut col = 0;
while col < ne {
l[(col, col)] = 1.0;
match result.d.pivot_type(col) {
PivotType::OneByOne => {
let n_entries = ne - (col + 1);
if n_entries > 0 {
let src = &l_storage.col_as_slice(col)[col + 1..ne];
l.col_as_slice_mut(col)[col + 1..ne].copy_from_slice(src);
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
l[(col + 1, col + 1)] = 1.0;
let n_entries = ne - (col + 2);
if n_entries > 0 {
let src0 = &l_storage.col_as_slice(col)[col + 2..ne];
l.col_as_slice_mut(col)[col + 2..ne].copy_from_slice(src0);
let src1 = &l_storage.col_as_slice(col + 1)[col + 2..ne];
l.col_as_slice_mut(col + 1)[col + 2..ne].copy_from_slice(src1);
}
col += 2;
}
PivotType::TwoByTwo { .. } => {
col += 1;
}
PivotType::Delayed => {
unreachable!("unexpected Delayed pivot at col {} in 0..ne", col);
}
}
}
l
} else {
Mat::zeros(0, 0)
};
let mut d11 = MixedDiagonal::new(ne);
let mut col = 0;
while col < ne {
match result.d.pivot_type(col) {
PivotType::OneByOne => {
d11.set_1x1(col, result.d.diagonal_1x1(col));
col += 1;
}
PivotType::TwoByTwo { partner: _ } => {
let block = result.d.diagonal_2x2(col);
d11.set_2x2(Block2x2 {
first_col: col,
a: block.a,
b: block.b,
c: block.c,
});
col += 2;
}
PivotType::Delayed => {
unreachable!("unexpected Delayed pivot at col {} in 0..ne", col);
}
}
}
let r = m - ne;
let l21 = if ne > 0 && r > 0 {
let mut l = Mat::zeros(r, ne);
for j in 0..ne {
let src = &l_storage.col_as_slice(j)[ne..m];
l.col_as_slice_mut(j)[..r].copy_from_slice(src);
}
l
} else {
Mat::zeros(r, ne)
};
let local_perm = result.perm[..k].to_vec();
let col_indices: Vec<usize> = local_perm[..ne]
.iter()
.map(|&lp| frontal_row_indices[lp])
.collect();
let mut row_indices = Vec::with_capacity(m - ne);
for &lp in &result.perm[ne..k] {
row_indices.push(frontal_row_indices[lp]);
}
row_indices.extend_from_slice(&frontal_row_indices[k..m]);
super::numeric::FrontFactors {
l11,
d11,
l21,
local_perm,
num_eliminated: ne,
col_indices,
row_indices,
}
}
pub(super) fn extract_contribution_rect(
l_storage: &Mat<f64>,
m: usize,
k: usize,
frontal_row_indices: &[usize],
result: &AptpFactorResult,
mut contrib_buffer: Mat<f64>,
) -> super::numeric::ContributionBlock {
let ne = result.num_eliminated;
let size = m - ne;
let num_delayed = k - ne;
let nfs = m - k;
if num_delayed > 0 {
let mut data = Mat::zeros(size, size);
for j in 0..num_delayed {
let col_len = num_delayed - j;
let src = &l_storage.col_as_slice(ne + j)[ne + j..ne + j + col_len];
data.col_as_slice_mut(j)[j..j + col_len].copy_from_slice(src);
}
for j in 0..num_delayed {
let src = &l_storage.col_as_slice(ne + j)[k..m];
data.col_as_slice_mut(j)[num_delayed..size].copy_from_slice(src);
}
for j in 0..nfs {
let col_len = nfs - j;
let src = &contrib_buffer.col_as_slice(j)[j..j + col_len];
data.col_as_slice_mut(num_delayed + j)[num_delayed + j..size].copy_from_slice(src);
}
contrib_buffer = data;
}
let mut row_indices = Vec::with_capacity(size);
for &lp in &result.perm[ne..k] {
row_indices.push(frontal_row_indices[lp]);
}
row_indices.extend_from_slice(&frontal_row_indices[k..m]);
super::numeric::ContributionBlock {
data: contrib_buffer,
row_indices,
num_delayed,
}
}
#[cfg(test)]
mod tests {
use super::*;
use faer::Mat;
fn reconstruct_dense_ldlt(l: &Mat<f64>, d: &MixedDiagonal, perm: &[usize]) -> Mat<f64> {
let n = l.nrows();
let mut d_mat = Mat::zeros(n, n);
let nd = d.dimension();
let mut col = 0;
while col < nd {
match d.pivot_type(col) {
PivotType::OneByOne => {
d_mat[(col, col)] = d.diagonal_1x1(col);
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let block = d.diagonal_2x2(col);
d_mat[(col, col)] = block.a;
d_mat[(col, col + 1)] = block.b;
d_mat[(col + 1, col)] = block.b;
d_mat[(col + 1, col + 1)] = block.c;
col += 2;
}
_ => {
col += 1;
}
}
}
let mut w = Mat::zeros(n, n);
for i in 0..n {
for j in 0..n {
let mut sum = 0.0;
for k in 0..n {
sum += l[(i, k)] * d_mat[(k, j)];
}
w[(i, j)] = sum;
}
}
let mut ldlt = Mat::zeros(n, n);
for i in 0..n {
for j in 0..n {
let mut sum = 0.0;
for k in 0..n {
sum += w[(i, k)] * l[(j, k)];
}
ldlt[(i, j)] = sum;
}
}
let mut result = Mat::zeros(n, n);
for i in 0..n {
for j in 0..n {
result[(perm[i], perm[j])] = ldlt[(i, j)];
}
}
result
}
fn dense_reconstruction_error(
original: &Mat<f64>,
l: &Mat<f64>,
d: &MixedDiagonal,
perm: &[usize],
) -> f64 {
let reconstructed = reconstruct_dense_ldlt(l, d, perm);
let n = original.nrows();
let mut diff_norm_sq = 0.0_f64;
let mut orig_norm_sq = 0.0_f64;
for i in 0..n {
for j in 0..n {
let diff = original[(i, j)] - reconstructed[(i, j)];
diff_norm_sq += diff * diff;
orig_norm_sq += original[(i, j)] * original[(i, j)];
}
}
if orig_norm_sq == 0.0 {
return diff_norm_sq.sqrt();
}
(diff_norm_sq / orig_norm_sq).sqrt()
}
fn symmetric_matrix(n: usize, f: impl Fn(usize, usize) -> f64) -> Mat<f64> {
Mat::from_fn(n, n, |i, j| if i >= j { f(i, j) } else { f(j, i) })
}
#[test]
fn test_reconstruction_trivial_identity() {
let l = Mat::identity(2, 2);
let mut d = MixedDiagonal::new(2);
d.set_1x1(0, 1.0);
d.set_1x1(1, 1.0);
let perm = vec![0, 1];
let result = reconstruct_dense_ldlt(&l, &d, &perm);
for i in 0..2 {
for j in 0..2 {
let expected = if i == j { 1.0 } else { 0.0 };
assert!(
(result[(i, j)] - expected).abs() < 1e-14,
"({},{}) = {}, expected {}",
i,
j,
result[(i, j)],
expected
);
}
}
}
fn complete_pivoting_factor_and_extract(
a: &Mat<f64>,
) -> (Mat<f64>, MixedDiagonal, Vec<usize>, AptpStatistics) {
let mut a_copy = a.clone();
let result = complete_pivoting_factor(a_copy.as_mut(), 1e-20);
let l = extract_l(a_copy.as_ref(), &result.d, result.num_eliminated);
(l, result.d, result.perm, result.stats)
}
#[test]
fn test_cp_identity() {
let a = Mat::identity(3, 3);
let mut a_copy = a.clone();
let result = complete_pivoting_factor(a_copy.as_mut(), 1e-20);
assert_eq!(result.num_eliminated, 3);
assert_eq!(result.stats.num_1x1, 3);
assert_eq!(result.stats.num_2x2, 0);
assert_eq!(result.stats.num_delayed, 0);
for i in 0..3 {
assert!((result.d.diagonal_1x1(i) - 1.0).abs() < 1e-14);
}
assert!(result.stats.max_l_entry < 1e-14);
}
#[test]
fn test_cp_diagonal_pivot_ordering() {
let a = symmetric_matrix(3, |i, j| if i == j { [2.0, 5.0, 3.0][i] } else { 0.0 });
let mut a_copy = a.clone();
let result = complete_pivoting_factor(a_copy.as_mut(), 1e-20);
assert_eq!(result.num_eliminated, 3);
assert_eq!(result.stats.num_1x1, 3);
assert!(
(result.d.diagonal_1x1(0) - 5.0).abs() < 1e-14,
"first pivot should be 5.0, got {}",
result.d.diagonal_1x1(0)
);
assert!(
(result.d.diagonal_1x1(1) - 3.0).abs() < 1e-14,
"second pivot should be 3.0, got {}",
result.d.diagonal_1x1(1)
);
assert!(
(result.d.diagonal_1x1(2) - 2.0).abs() < 1e-14,
"third pivot should be 2.0, got {}",
result.d.diagonal_1x1(2)
);
}
#[test]
fn test_cp_2x2_pivot() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[0.1, 10.0, 0.0, 0.0],
[10.0, 0.1, 0.0, 0.0],
[0.0, 0.0, 5.0, 1.0],
[0.0, 0.0, 1.0, 3.0],
];
vals[i][j]
});
let (l, d, perm, stats) = complete_pivoting_factor_and_extract(&a);
assert!(
stats.num_2x2 >= 1,
"expected at least one 2×2 pivot, got {}",
stats.num_2x2
);
assert!(
stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
"L entries should be bounded by 4, got {}",
stats.max_l_entry
);
let error = dense_reconstruction_error(&a, &l, &d, &perm);
assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
}
#[test]
fn test_cp_failed_2x2_fallback() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[2.0, 2.0, 0.1, 0.1],
[2.0, 2.0, 0.1, 0.1],
[0.1, 0.1, 5.0, 0.0],
[0.1, 0.1, 0.0, 3.0],
];
vals[i][j]
});
let (l, d, perm, stats) = complete_pivoting_factor_and_extract(&a);
assert!(
stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
"L entries should be bounded by 4"
);
let error = dense_reconstruction_error(&a, &l, &d, &perm);
assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
}
#[test]
fn test_cp_singular_block() {
let mut a = Mat::zeros(3, 3);
a[(0, 0)] = 1e-25;
a[(1, 1)] = 1e-25;
a[(2, 2)] = 1e-25;
let result = complete_pivoting_factor(a.as_mut(), 1e-20);
assert_eq!(
result.num_eliminated, 0,
"near-singular block should have 0 eliminations"
);
assert_eq!(result.stats.num_delayed, 3);
}
#[test]
fn test_cp_reconstruction_random() {
let sizes = [8, 16, 32];
for &n in &sizes {
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j {
val * 10.0 } else {
val
}
});
let (l, d, perm, stats) = complete_pivoting_factor_and_extract(&a);
assert_eq!(
stats.num_1x1 + 2 * stats.num_2x2 + stats.num_delayed,
n,
"statistics invariant for {}x{}",
n,
n
);
if stats.num_delayed == 0 {
let error = dense_reconstruction_error(&a, &l, &d, &perm);
assert!(
error < 1e-12,
"complete pivoting {}x{}: reconstruction error {:.2e} >= 1e-12",
n,
n,
error
);
}
assert!(
stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
"complete pivoting {}x{}: max_l_entry {:.2e} > 4",
n,
n,
stats.max_l_entry
);
}
}
#[test]
fn test_1x1_trivial_diagonal() {
let a = symmetric_matrix(2, |i, j| if i == j { [4.0, 9.0][i] } else { 0.0 });
let opts = AptpOptions {
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 2);
assert_eq!(result.stats.num_delayed, 0);
assert!(result.delayed_cols.is_empty());
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
}
#[test]
fn test_1x1_positive_definite_3x3() {
let a = symmetric_matrix(3, |i, j| {
let vals = [[4.0, 2.0, 1.0], [2.0, 5.0, 3.0], [1.0, 3.0, 6.0]];
vals[i][j]
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 3);
assert_eq!(result.stats.num_delayed, 0);
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
}
#[test]
fn test_all_delayed_zero_matrix() {
let n = 4;
let a = Mat::zeros(n, n);
let opts = AptpOptions {
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let total = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(total, n, "total pivots + delays should equal n");
}
#[test]
fn test_1x1_singularity_detection() {
let a = symmetric_matrix(3, |i, j| if i == j { [4.0, 1e-25, 9.0][i] } else { 0.0 });
let opts = AptpOptions {
fallback: AptpFallback::Delay,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let eliminated = result.stats.num_1x1 + 2 * result.stats.num_2x2;
assert!(
eliminated >= 2,
"should eliminate at least 2 columns, got {}",
eliminated
);
let total = eliminated + result.stats.num_delayed;
assert_eq!(total, 3, "total pivots + delays should equal n");
}
#[test]
fn test_stability_bound_enforced() {
let a = symmetric_matrix(2, |i, j| {
let vals = [[1e-4, 1.0], [1.0, 1.0]];
vals[i][j]
});
let opts = AptpOptions {
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, 2);
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
}
#[test]
fn test_1x1_matrix() {
let a = Mat::from_fn(1, 1, |_, _| 5.0);
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_1x1, 1);
assert_eq!(result.stats.num_delayed, 0);
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-14, "reconstruction error {:.2e}", error);
}
#[test]
fn test_statistics_sum_invariant() {
let a = symmetric_matrix(5, |i, j| {
let vals = [
[10.0, 1.0, 0.0, 0.0, 0.0],
[1.0, 20.0, 2.0, 0.0, 0.0],
[0.0, 2.0, 30.0, 3.0, 0.0],
[0.0, 0.0, 3.0, 40.0, 4.0],
[0.0, 0.0, 0.0, 4.0, 50.0],
];
vals[i][j]
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, 5, "statistics sum {} != n=5", sum);
}
#[test]
fn test_2x2_pivot_known_indefinite() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[0.01, 10.0, 0.0, 0.0],
[10.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 5.0, 1.0],
[0.0, 0.0, 1.0, 3.0],
];
vals[i][j]
});
let opts = AptpOptions {
fallback: AptpFallback::BunchKaufman,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert!(
result.stats.num_2x2 >= 1,
"expected 2x2 pivot, got num_2x2={}",
result.stats.num_2x2
);
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
}
#[test]
fn test_2x2_stability_test() {
let a_good = symmetric_matrix(2, |i, j| {
let vals = [[1.0, 5.0], [5.0, 1.0]];
vals[i][j]
});
let opts = AptpOptions {
fallback: AptpFallback::BunchKaufman,
..AptpOptions::default()
};
let result = aptp_factor(a_good.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, 2);
}
#[test]
fn test_bk_vs_delay_fallback() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[0.01, 10.0, 0.0, 0.0],
[10.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 5.0, 1.0],
[0.0, 0.0, 1.0, 3.0],
];
vals[i][j]
});
let bk_opts = AptpOptions {
fallback: AptpFallback::BunchKaufman,
..AptpOptions::default()
};
let delay_opts = AptpOptions {
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let bk_result = aptp_factor(a.as_ref(), &bk_opts).unwrap();
let delay_result = aptp_factor(a.as_ref(), &delay_opts).unwrap();
assert!(
bk_result.stats.num_delayed <= delay_result.stats.num_delayed,
"BK delayed {} > Delay delayed {}",
bk_result.stats.num_delayed,
delay_result.stats.num_delayed
);
if bk_result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&bk_result.l,
&bk_result.d,
bk_result.perm.as_ref().arrays().0,
);
assert!(error < 1e-12, "BK reconstruction error {:.2e}", error);
}
}
#[test]
fn test_strict_threshold() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[4.0, 2.0, 1.0, 0.5],
[2.0, 5.0, 2.0, 1.0],
[1.0, 2.0, 6.0, 2.0],
[0.5, 1.0, 2.0, 7.0],
];
vals[i][j]
});
let loose = AptpOptions {
threshold: 0.01,
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let strict = AptpOptions {
threshold: 0.5,
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let loose_result = aptp_factor(a.as_ref(), &loose).unwrap();
let strict_result = aptp_factor(a.as_ref(), &strict).unwrap();
assert!(
strict_result.stats.num_delayed >= loose_result.stats.num_delayed,
"strict delayed {} < loose delayed {}",
strict_result.stats.num_delayed,
loose_result.stats.num_delayed,
);
}
#[test]
fn test_permutation_valid() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[0.01, 10.0, 0.0, 0.0],
[10.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 5.0, 1.0],
[0.0, 0.0, 1.0, 3.0],
];
vals[i][j]
});
let opts = AptpOptions {
fallback: AptpFallback::BunchKaufman,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let (fwd, inv) = result.perm.as_ref().arrays();
let n = fwd.len();
assert_eq!(n, 4);
let mut seen = vec![false; n];
for &v in fwd {
assert!(v < n, "perm value {} >= n={}", v, n);
assert!(!seen[v], "duplicate perm value {}", v);
seen[v] = true;
}
for i in 0..n {
assert_eq!(inv[fwd[i]], i);
assert_eq!(fwd[inv[i]], i);
}
}
#[test]
fn test_pd_statistics() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[10.0, 1.0, 0.0, 0.0],
[1.0, 20.0, 2.0, 0.0],
[0.0, 2.0, 30.0, 3.0],
[0.0, 0.0, 3.0, 40.0],
];
vals[i][j]
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 4);
assert_eq!(result.stats.num_delayed, 0);
assert!(result.stats.max_l_entry < 1.0 / opts.threshold);
}
#[test]
fn test_max_l_entry_accuracy() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[4.0, 2.0, 1.0, 0.5],
[2.0, 5.0, 2.0, 1.0],
[1.0, 2.0, 6.0, 2.0],
[0.5, 1.0, 2.0, 7.0],
];
vals[i][j]
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let n = result.l.nrows();
let mut actual_max = 0.0_f64;
for j in 0..n {
for i in (j + 1)..n {
let val = result.l[(i, j)].abs();
if val > actual_max {
actual_max = val;
}
}
}
assert!(
(result.stats.max_l_entry - actual_max).abs() < 1e-14,
"stats.max_l_entry={:.6e}, actual={:.6e}",
result.stats.max_l_entry,
actual_max
);
}
#[test]
fn test_pivot_log_completeness() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[4.0, 2.0, 1.0, 0.5],
[2.0, 5.0, 2.0, 1.0],
[1.0, 2.0, 6.0, 2.0],
[0.5, 1.0, 2.0, 7.0],
];
vals[i][j]
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.pivot_log.len(), 4);
}
#[test]
fn test_inertia_from_d() {
let a = symmetric_matrix(5, |i, j| {
if i == j {
[3.0, -2.0, 1.0, -4.0, 5.0][i]
} else {
0.0
}
});
let opts = AptpOptions {
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_delayed, 0, "should have no delays");
let inertia = result.d.compute_inertia();
assert_eq!(inertia.positive, 3, "expected 3 positive");
assert_eq!(inertia.negative, 2, "expected 2 negative");
assert_eq!(inertia.zero, 0, "expected 0 zero");
}
#[test]
fn test_partial_factorization() {
let n = 8;
let p = 4;
let a = symmetric_matrix(n, |i, j| {
if i == j {
10.0 + i as f64
} else {
1.0 / (1.0 + (i as f64 - j as f64).abs())
}
});
let opts = AptpOptions::default();
let mut a_copy = a.clone();
let result = aptp_factor_in_place(a_copy.as_mut(), p, &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, p, "statistics should sum to num_fully_summed={}", p);
}
#[test]
fn test_edge_case_extreme_thresholds() {
let a = symmetric_matrix(3, |i, j| {
let vals = [[4.0, 2.0, 1.0], [2.0, 5.0, 2.0], [1.0, 2.0, 6.0]];
vals[i][j]
});
let loose = AptpOptions {
threshold: 0.001,
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let result_loose = aptp_factor(a.as_ref(), &loose).unwrap();
assert_eq!(result_loose.stats.num_delayed, 0);
let strict = AptpOptions {
threshold: 1.0,
fallback: AptpFallback::Delay,
..AptpOptions::default()
};
let result_strict = aptp_factor(a.as_ref(), &strict).unwrap();
let sum = result_strict.stats.num_1x1
+ 2 * result_strict.stats.num_2x2
+ result_strict.stats.num_delayed;
assert_eq!(sum, 3);
}
#[test]
fn test_both_fallback_strategies_valid() {
let matrices = [
symmetric_matrix(3, |i, j| {
let vals = [[1.0, 2.0, 0.0], [2.0, -1.0, 1.0], [0.0, 1.0, 3.0]];
vals[i][j]
}),
symmetric_matrix(4, |i, j| {
let vals = [
[0.01, 10.0, 0.0, 0.0],
[10.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 5.0, 1.0],
[0.0, 0.0, 1.0, 3.0],
];
vals[i][j]
}),
];
for (idx, a) in matrices.iter().enumerate() {
for fallback in [AptpFallback::BunchKaufman, AptpFallback::Delay] {
let opts = AptpOptions {
fallback,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"matrix {} {:?}: error {:.2e}",
idx,
fallback,
error
);
}
let sum =
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
let n = a.nrows();
assert_eq!(sum, n, "statistics invariant broken for matrix {}", idx);
}
}
}
#[test]
fn test_invalid_non_square() {
let a = Mat::zeros(3, 4);
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts);
assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
}
#[test]
fn test_invalid_threshold() {
let a = Mat::zeros(2, 2);
let opts = AptpOptions {
threshold: 0.0,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts);
assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
let opts2 = AptpOptions {
threshold: 1.5,
..AptpOptions::default()
};
let result2 = aptp_factor(a.as_ref(), &opts2);
assert!(matches!(result2, Err(SparseError::InvalidInput { .. })));
}
#[test]
fn test_invalid_num_fully_summed_too_large() {
let mut a = Mat::zeros(3, 3);
let opts = AptpOptions::default();
let result = aptp_factor_in_place(a.as_mut(), 5, &opts);
assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
}
#[test]
fn test_zero_fully_summed() {
let a = symmetric_matrix(3, |i, j| {
let vals = [[4.0, 2.0, 1.0], [2.0, 5.0, 3.0], [1.0, 3.0, 6.0]];
vals[i][j]
});
let opts = AptpOptions::default();
let mut a_copy = a.clone();
let result = aptp_factor_in_place(a_copy.as_mut(), 0, &opts).unwrap();
assert_eq!(result.num_eliminated, 0);
assert_eq!(result.stats.num_1x1, 0);
assert_eq!(result.stats.num_2x2, 0);
assert_eq!(result.stats.num_delayed, 0);
assert!(result.pivot_log.is_empty());
}
#[test]
fn test_2x2_fallback_also_fails() {
let a = symmetric_matrix(3, |i, j| {
let vals = [[0.001, 0.11, 0.0], [0.11, 12.0, 0.1], [0.0, 0.1, 5.0]];
vals[i][j]
});
let opts = AptpOptions {
fallback: AptpFallback::BunchKaufman,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, 3, "statistics sum {} != n=3", sum);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(error < 1e-12, "reconstruction error {:.2e}", error);
}
assert!(
result.stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
"max_l_entry {} > 4",
result.stats.max_l_entry
);
}
#[test]
fn test_invalid_small_negative() {
let a = Mat::zeros(2, 2);
let opts = AptpOptions {
small: -1.0,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts);
assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
}
#[cfg(feature = "test-util")]
mod stress_tests {
use super::*;
use crate::testing::generators::{RandomMatrixConfig, generate_random_symmetric};
use rand::SeedableRng;
use rand::rngs::StdRng;
fn random_dense_pd(n: usize, rng: &mut impl rand::Rng) -> Mat<f64> {
let config = RandomMatrixConfig {
size: n,
target_nnz: n * n / 2,
positive_definite: true,
};
generate_random_symmetric(&config, rng).unwrap().to_dense()
}
fn random_dense_indefinite(n: usize, rng: &mut impl rand::Rng) -> Mat<f64> {
let config = RandomMatrixConfig {
size: n,
target_nnz: n * n / 2,
positive_definite: false,
};
generate_random_symmetric(&config, rng).unwrap().to_dense()
}
#[test]
fn test_random_pd_matrices() {
let mut rng = StdRng::seed_from_u64(42);
let opts = AptpOptions::default();
let sizes = [5, 10, 20, 50];
let mut total = 0;
for &n in &sizes {
for _ in 0..25 {
let a = random_dense_pd(n, &mut rng);
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(
result.stats.num_delayed, 0,
"PD matrix {}x{} should have zero delays",
n, n
);
let total_elim = result.stats.num_1x1 + 2 * result.stats.num_2x2;
assert_eq!(
total_elim, n,
"PD matrix {}x{} should eliminate all columns (1x1={}, 2x2={})",
n, n, result.stats.num_1x1, result.stats.num_2x2
);
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"PD {}x{}: reconstruction error {:.2e}",
n,
n,
error
);
total += 1;
}
}
assert!(total >= 100, "ran {} PD tests, need >= 100", total);
}
#[test]
fn test_random_indefinite_matrices() {
let mut rng = StdRng::seed_from_u64(123);
let opts = AptpOptions {
fallback: AptpFallback::BunchKaufman,
..AptpOptions::default()
};
let sizes = [5, 10, 20, 50];
let mut total = 0;
for &n in &sizes {
for _ in 0..25 {
let a = random_dense_indefinite(n, &mut rng);
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum =
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, n, "stats invariant for {}x{}", n, n);
assert!(
result.stats.max_l_entry < 1.0 / opts.threshold,
"stability bound violated for {}x{}",
n,
n
);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"indefinite {}x{}: reconstruction error {:.2e}",
n,
n,
error
);
}
total += 1;
}
}
assert!(total >= 100, "ran {} indefinite tests, need >= 100", total);
}
}
#[cfg(feature = "test-util")]
mod integration_tests {
use super::*;
use crate::testing::cases::{TestCaseFilter, load_test_cases};
#[test]
#[ignore] fn test_hand_constructed_matrices() {
let cases = load_test_cases(&TestCaseFilter::hand_constructed())
.expect("failed to load hand-constructed matrices");
assert_eq!(cases.len(), 15, "expected 15 hand-constructed matrices");
let opts = AptpOptions::default();
let mut passed = 0;
for case in &cases {
let dense = case.matrix.to_dense();
let n = dense.nrows();
if n > 500 {
continue;
}
let result = aptp_factor(dense.as_ref(), &opts).unwrap();
let sum =
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, n, "{}: stats invariant failed", case.name);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&dense,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"{}: reconstruction error {:.2e}",
case.name,
error
);
}
passed += 1;
}
assert!(passed >= 10, "only {} hand-constructed passed", passed);
}
#[test]
#[ignore] fn test_suitesparse_ci_dense() {
let cases = load_test_cases(&TestCaseFilter::ci_subset())
.expect("failed to load CI-subset matrices");
let opts = AptpOptions::default();
let mut tested = 0;
for case in &cases {
let n = case.matrix.nrows();
if n > 200 {
continue;
}
let dense = case.matrix.to_dense();
let result = aptp_factor(dense.as_ref(), &opts).unwrap();
let sum =
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, n, "{}: stats invariant failed", case.name);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&dense,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"{}: reconstruction error {:.2e}",
case.name,
error
);
}
tested += 1;
}
assert!(
tested > 0,
"no CI-subset matrices small enough for dense test"
);
}
}
#[test]
fn test_factor_inner_reconstruction_moderate() {
let sizes = [64, 128, 256];
let opts = AptpOptions::default();
for &n in &sizes {
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
n,
"factor_inner {}x{}: statistics invariant",
n,
n
);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"factor_inner {}x{}: reconstruction error {:.2e} >= 1e-12",
n,
n,
error
);
}
}
}
#[test]
fn test_factor_inner_partial_factorization() {
let n = 64;
let p = 48; let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let opts = AptpOptions::default();
let mut a_copy = a.to_owned();
let result = aptp_factor_in_place(a_copy.as_mut(), p, &opts).unwrap();
assert!(
result.num_eliminated <= p,
"eliminated {} > p={}",
result.num_eliminated,
p
);
assert_eq!(
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
p,
"statistics invariant for partial factorization"
);
}
#[test]
fn test_two_level_dispatch_small_block_size() {
let sizes = [33, 64, 100];
let opts = AptpOptions {
outer_block_size: 32,
inner_block_size: 8,
..AptpOptions::default()
};
for &n in &sizes {
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
n,
"two-level {}x{}: statistics invariant",
n,
n
);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"two-level {}x{}: reconstruction error {:.2e} >= 1e-12",
n,
n,
error
);
}
}
}
#[test]
fn test_two_level_single_outer_block() {
let n = 32;
let opts = AptpOptions {
outer_block_size: 32,
inner_block_size: 8,
..AptpOptions::default()
};
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let result = aptp_factor(a.as_ref(), &opts).unwrap();
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(error < 1e-12, "single block: error {:.2e}", error);
}
}
#[test]
fn test_two_level_boundary_nb_plus_1() {
let n = 33;
let opts = AptpOptions {
outer_block_size: 32,
inner_block_size: 8,
..AptpOptions::default()
};
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
n,
);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(error < 1e-12, "nb+1 boundary: error {:.2e}", error);
}
}
#[test]
fn test_two_level_partial_factorization() {
let n = 80;
let p = 50; let opts = AptpOptions {
outer_block_size: 32,
inner_block_size: 8,
..AptpOptions::default()
};
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let mut a_copy = a.to_owned();
let result = aptp_factor_in_place(a_copy.as_mut(), p, &opts).unwrap();
assert!(result.num_eliminated <= p);
assert_eq!(
result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
p,
);
}
#[test]
fn test_two_level_vs_single_level_equivalence() {
let n = 64;
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 1000 + j * 7 + 13) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j { val * 10.0 } else { val }
});
let opts_single = AptpOptions {
outer_block_size: 256,
inner_block_size: 32,
..AptpOptions::default()
};
let result_single = aptp_factor(a.as_ref(), &opts_single).unwrap();
let opts_two = AptpOptions {
outer_block_size: 16,
inner_block_size: 8,
..AptpOptions::default()
};
let result_two = aptp_factor(a.as_ref(), &opts_two).unwrap();
if result_single.stats.num_delayed == 0 {
let err_s = dense_reconstruction_error(
&a,
&result_single.l,
&result_single.d,
result_single.perm.as_ref().arrays().0,
);
assert!(err_s < 1e-12, "single-level error {:.2e}", err_s);
}
if result_two.stats.num_delayed == 0 {
let err_t = dense_reconstruction_error(
&a,
&result_two.l,
&result_two.d,
result_two.perm.as_ref().arrays().0,
);
assert!(err_t < 1e-12, "two-level error {:.2e}", err_t);
}
}
#[test]
fn test_two_level_vs_unblocked_reconstruction() {
let n = 512;
let a = symmetric_matrix(n, |i, j| {
let seed = (i * 997 + j * 1013 + 42) as f64;
let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
if i == j {
val * 0.5
} else {
val
}
});
let opts_unblocked = AptpOptions {
outer_block_size: 100_000,
inner_block_size: 32,
..AptpOptions::default()
};
let result_unblocked = aptp_factor(a.as_ref(), &opts_unblocked).unwrap();
let err_unblocked = dense_reconstruction_error(
&a,
&result_unblocked.l,
&result_unblocked.d,
result_unblocked.perm.as_ref().arrays().0,
);
assert!(
err_unblocked < 1e-12,
"unblocked reconstruction error {:.2e} exceeds 1e-12",
err_unblocked,
);
let opts_two_level = AptpOptions {
outer_block_size: 128,
inner_block_size: 32,
..AptpOptions::default()
};
let result_two_level = aptp_factor(a.as_ref(), &opts_two_level).unwrap();
let err_two_level = dense_reconstruction_error(
&a,
&result_two_level.l,
&result_two_level.d,
result_two_level.perm.as_ref().arrays().0,
);
assert!(
err_two_level < 1e-12,
"two-level reconstruction error {:.2e} exceeds 1e-12 \
(unblocked was {:.2e})",
err_two_level,
err_unblocked,
);
let ratio = if err_unblocked > 0.0 {
err_two_level / err_unblocked
} else {
1.0
};
assert!(
ratio < 10.0,
"two-level error ({:.2e}) is {:.1}x worse than unblocked ({:.2e})",
err_two_level,
ratio,
err_unblocked,
);
}
#[test]
fn test_factor_block_diagonal_basic() {
let mut a = Mat::identity(8, 8);
let (d, perm, nelim, stats, _log) = factor_block_diagonal(a.as_mut(), 0, 4, 1e-20, 4);
assert_eq!(nelim, 4);
assert_eq!(stats.num_1x1, 4);
assert_eq!(stats.num_2x2, 0);
assert_eq!(stats.num_delayed, 0);
assert!(stats.max_l_entry < 1e-14);
for i in 0..4 {
assert!((d.diagonal_1x1(i) - 1.0).abs() < 1e-14);
}
assert_eq!(perm, vec![0, 1, 2, 3]);
for i in 4..8 {
for j in 0..4 {
assert!(
a[(i, j)].abs() < 1e-14,
"panel entry ({},{}) should be 0, got {}",
i,
j,
a[(i, j)]
);
}
}
}
#[test]
fn test_factor_block_diagonal_block_scoped_swap() {
let mut a = symmetric_matrix(8, |i, j| {
if i == j {
[1.0, 5.0, 2.0, 3.0, 0.1, 0.1, 0.1, 0.1][i]
} else if (i == 4 && j == 1) || (i == 1 && j == 4) {
0.99
} else {
0.0
}
});
let panel_row_before: Vec<f64> = (0..4).map(|j| a[(4, j)]).collect();
let (_d, perm, nelim, _stats, _log) = factor_block_diagonal(a.as_mut(), 0, 4, 1e-20, 4);
assert!(nelim > 0, "should eliminate at least 1 column");
assert_eq!(
perm[0], 1,
"first pivot should be original column 1 (max diag = 5.0)"
);
for j in 0..4 {
assert!(
(a[(4, j)] - panel_row_before[j]).abs() < 1e-14,
"panel row (4,{}) should be unchanged: got {}, expected {}",
j,
a[(4, j)],
panel_row_before[j]
);
}
}
#[test]
fn test_blas3_pipeline_reconstruction() {
let a = symmetric_matrix(8, |i, j| {
if i == j {
10.0 + i as f64
} else {
1.0 / (1.0 + (i as f64 - j as f64).abs())
}
});
let opts = AptpOptions {
inner_block_size: 4, ..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_delayed, 0, "should have no delays");
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(
error < 1e-12,
"BLAS-3 pipeline reconstruction error {:.2e} >= 1e-12",
error
);
}
#[test]
fn test_blas3_threshold_failure_and_retry() {
let a = symmetric_matrix(8, |i, j| {
let vals = [
[1e-3, 1.0, 0.5, 0.5, 0.1, 0.1, 0.1, 0.1],
[1.0, 1e-3, 0.5, 0.5, 0.1, 0.1, 0.1, 0.1],
[0.5, 0.5, 10.0, 1.0, 0.1, 0.1, 0.1, 0.1],
[0.5, 0.5, 1.0, 10.0, 0.1, 0.1, 0.1, 0.1],
[0.1, 0.1, 0.1, 0.1, 5.0, 0.5, 0.0, 0.0],
[0.1, 0.1, 0.1, 0.1, 0.5, 6.0, 0.0, 0.0],
[0.1, 0.1, 0.1, 0.1, 0.0, 0.0, 7.0, 0.5],
[0.1, 0.1, 0.1, 0.1, 0.0, 0.0, 0.5, 8.0],
];
vals[i][j]
});
let opts = AptpOptions {
inner_block_size: 4,
threshold: 0.01, ..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, 8, "statistics sum {} != 8", sum);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"threshold failure retry: reconstruction error {:.2e}",
error
);
}
}
#[test]
fn test_adjust_for_2x2_boundary() {
let mut d = MixedDiagonal::new(4);
d.set_1x1(0, 1.0);
d.set_2x2(Block2x2 {
first_col: 1,
a: 1.0,
b: 0.5,
c: 2.0,
});
d.set_1x1(3, 3.0);
assert_eq!(adjust_for_2x2_boundary(2, &d), 1);
assert_eq!(adjust_for_2x2_boundary(3, &d), 3);
assert_eq!(adjust_for_2x2_boundary(1, &d), 1);
assert_eq!(adjust_for_2x2_boundary(4, &d), 4);
assert_eq!(adjust_for_2x2_boundary(0, &d), 0);
}
#[test]
fn test_blas3_full_block_singular() {
let mut a = Mat::zeros(8, 8);
for i in 0..8 {
a[(i, i)] = 1e-25;
}
let opts = AptpOptions {
inner_block_size: 4,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let mut a_copy = a.clone();
let result = aptp_factor_in_place(a_copy.as_mut(), 8, &opts).unwrap();
assert_eq!(result.num_eliminated, 0);
assert_eq!(result.stats.num_delayed, 8);
assert_eq!(result.delayed_cols.len(), 8);
}
fn deterministic_indefinite_matrix(n: usize, seed: u64, diag_scale: f64) -> Mat<f64> {
let hash = |a: usize, b: usize| -> f64 {
let mut s = seed
.wrapping_add((a * 10007) as u64)
.wrapping_add((b * 7) as u64);
s = s
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
s = s
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
((s >> 33) as f64) / (u32::MAX as f64 / 2.0) - 1.0
};
symmetric_matrix(n, |i, j| {
if i == j {
hash(i, i) * diag_scale
} else {
hash(i.min(j), i.max(j) + n)
}
})
}
fn cause_delays(a: &mut Mat<f64>, seed: u64, scale: f64) {
let n = a.nrows();
let n_scaled = (n / 8).max(1);
let mut state = seed;
let mut next_idx = || -> usize {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
((state >> 33) as usize) % n
};
let mut scaled_rows = Vec::new();
while scaled_rows.len() < n_scaled {
let idx = next_idx();
if !scaled_rows.contains(&idx) {
scaled_rows.push(idx);
}
}
for &r in &scaled_rows {
for j in 0..n {
a[(r, j)] *= scale;
a[(j, r)] *= scale;
}
}
}
#[test]
fn test_factor_inner_with_delays() {
struct TestConfig {
n: usize,
ib: usize,
seed: u64,
scale: f64,
}
let configs = [
TestConfig {
n: 8,
ib: 2,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 8,
ib: 4,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 16,
ib: 4,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 16,
ib: 2,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 32,
ib: 4,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 32,
ib: 8,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 16,
ib: 4,
seed: 137,
scale: 1000.0,
},
TestConfig {
n: 16,
ib: 4,
seed: 42,
scale: 1e6,
},
TestConfig {
n: 64,
ib: 8,
seed: 42,
scale: 1000.0,
},
TestConfig {
n: 64,
ib: 16,
seed: 42,
scale: 1000.0,
},
];
let mut _any_delays_found = false;
let mut any_failures = false;
for (idx, config) in configs.iter().enumerate() {
let mut a = deterministic_indefinite_matrix(config.n, config.seed, 5.0);
cause_delays(&mut a, config.seed + 1000, config.scale);
let opts = AptpOptions {
inner_block_size: config.ib,
outer_block_size: config.n.max(config.ib),
threshold: 0.01,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let n = config.n;
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(
sum, n,
"config {}: statistics invariant broken: {} != {}",
idx, sum, n
);
if result.stats.num_delayed > 0 {
_any_delays_found = true;
}
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
if error >= 1e-12 {
any_failures = true;
}
assert!(
error < 1e-10,
"config {} (n={}, ib={}): reconstruction error {:.2e} >= 1e-10 \
(no delays but bad reconstruction indicates factor_inner bug)",
idx,
config.n,
config.ib,
error
);
}
}
assert!(
!any_failures,
"Some configs had reconstruction error >= 1e-12 without delays. \
This indicates a bug in factor_inner's backup/restore/update logic."
);
}
#[test]
fn test_factor_inner_with_delays_targeted() {
let n = 8;
for &ib in &[4, 2] {
let a = symmetric_matrix(n, |i, j| {
match (i, j) {
(0, 0) => 10.0,
(1, 1) => 10.0,
(2, 2) => 0.005,
(3, 3) => 0.005,
(i, j) if i < 4 && j < 4 && i != j => 0.001,
(_, j) if j < 4 => 2.0,
(i, _) if i == j => 20.0,
(_, _) => 0.5,
}
});
let opts_pass = AptpOptions {
inner_block_size: ib,
outer_block_size: 256,
threshold: 0.01,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts_pass).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, n, "ib={}: statistics invariant: {} != {}", ib, sum, n);
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"targeted (ib={}): reconstruction error {:.2e} >= 1e-12 (no delays)",
ib,
error
);
}
if ib == 4 || ib == 2 {
assert!(
result.stats.num_delayed > 0,
"ib={}: expected some delays for small-pivot columns",
ib
);
}
{
let p = 6; let mut a_copy = a.clone();
let opts_partial = AptpOptions {
inner_block_size: ib,
outer_block_size: 256,
threshold: 0.01,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let result_p = aptp_factor_in_place(a_copy.as_mut(), p, &opts_partial).unwrap();
let error_p = check_partial_factorization_in_place(&a, &a_copy, p, &result_p);
assert!(
error_p < 1e-10,
"targeted partial (ib={}, p={}): error {:.2e} >= 1e-10",
ib,
p,
error_p
);
}
}
}
#[test]
fn test_factor_inner_with_delays_aggressive() {
let test_cases: Vec<(usize, usize, u64)> = vec![
(12, 4, 1),
(12, 4, 2),
(12, 4, 3),
(16, 4, 1),
(16, 4, 2),
(16, 8, 1),
(20, 4, 1),
(24, 4, 1),
(24, 8, 1),
(32, 4, 1),
(32, 8, 1),
(32, 16, 1),
(48, 8, 1),
(64, 8, 1),
(64, 16, 1),
];
let mut _total_delays = 0;
let mut max_error = 0.0_f64;
let mut _worst_config = String::new();
for &(n, ib, seed) in &test_cases {
let mut a = deterministic_indefinite_matrix(n, seed * 31337, 5.0);
cause_delays(&mut a, seed * 31337 + 7919, 1000.0);
let opts = AptpOptions {
inner_block_size: ib,
outer_block_size: n.max(ib),
threshold: 0.01,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(
sum, n,
"(n={}, ib={}, seed={}): stats invariant {} != {}",
n, ib, seed, sum, n
);
_total_delays += result.stats.num_delayed;
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
if error > max_error {
max_error = error;
_worst_config =
format!("n={}, ib={}, seed={}: error={:.2e}", n, ib, seed, error);
}
assert!(
error < 1e-10,
"(n={}, ib={}, seed={}): reconstruction error {:.2e} >= 1e-10",
n,
ib,
seed,
error
);
}
}
}
#[test]
fn test_factor_inner_cause_delays_then_compare_single_vs_blocked() {
let seeds = [42u64, 137, 271, 314, 997];
let n = 16;
for &seed in &seeds {
let mut a = deterministic_indefinite_matrix(n, seed, 5.0);
cause_delays(&mut a, seed + 5000, 500.0);
let opts_single = AptpOptions {
inner_block_size: n,
outer_block_size: n,
threshold: 0.01,
..AptpOptions::default()
};
let result_single = aptp_factor(a.as_ref(), &opts_single).unwrap();
let opts_blocked = AptpOptions {
inner_block_size: 4,
outer_block_size: n,
threshold: 0.01,
..AptpOptions::default()
};
let result_blocked = aptp_factor(a.as_ref(), &opts_blocked).unwrap();
if result_single.stats.num_delayed == 0 {
let error_s = dense_reconstruction_error(
&a,
&result_single.l,
&result_single.d,
result_single.perm.as_ref().arrays().0,
);
assert!(
error_s < 1e-12,
"seed={}: single-block error {:.2e}",
seed,
error_s
);
}
if result_blocked.stats.num_delayed == 0 {
let error_b = dense_reconstruction_error(
&a,
&result_blocked.l,
&result_blocked.d,
result_blocked.perm.as_ref().arrays().0,
);
assert!(
error_b < 1e-12,
"seed={}: blocked error {:.2e} >= 1e-12",
seed,
error_b
);
}
}
}
fn check_partial_factorization_in_place(
original: &Mat<f64>,
factored: &Mat<f64>,
num_fully_summed: usize,
result: &AptpFactorResult,
) -> f64 {
let m = original.nrows();
let q = result.num_eliminated;
let p = num_fully_summed;
let perm = &result.perm;
let mut factored = factored.clone();
let nfs = m - p;
if nfs > 0 && q > 0 {
let mut contrib_buffer = Mat::zeros(nfs, nfs);
let mut ld_ws = Mat::new();
compute_contribution_gemm(
&factored,
p,
q,
m,
&result.d,
&mut contrib_buffer,
&mut ld_ws,
Par::Seq,
);
for i in 0..nfs {
for j in 0..=i {
factored[(p + i, p + j)] = contrib_buffer[(i, j)];
}
}
}
let d = &result.d;
let mut papt = Mat::zeros(m, m);
for i in 0..m {
for j in 0..m {
papt[(i, j)] = original[(perm[i], perm[j])];
}
}
let mut l_full = Mat::zeros(m, q);
for i in 0..q {
l_full[(i, i)] = 1.0;
}
let mut col = 0;
while col < q {
match d.pivot_type(col) {
PivotType::OneByOne => {
for i in (col + 1)..m {
l_full[(i, col)] = factored[(i, col)];
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
for i in (col + 2)..m {
l_full[(i, col)] = factored[(i, col)];
l_full[(i, col + 1)] = factored[(i, col + 1)];
}
col += 2;
}
_ => {
col += 1;
}
}
}
let mut d_mat = Mat::zeros(q, q);
col = 0;
while col < q {
match d.pivot_type(col) {
PivotType::OneByOne => {
d_mat[(col, col)] = d.diagonal_1x1(col);
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let block = d.diagonal_2x2(col);
d_mat[(col, col)] = block.a;
d_mat[(col, col + 1)] = block.b;
d_mat[(col + 1, col)] = block.b;
d_mat[(col + 1, col + 1)] = block.c;
col += 2;
}
_ => {
col += 1;
}
}
}
let mut w = Mat::zeros(m, q);
for i in 0..m {
for j in 0..q {
let mut sum = 0.0;
for k in 0..q {
sum += l_full[(i, k)] * d_mat[(k, j)];
}
w[(i, j)] = sum;
}
}
let mut ldlt = Mat::zeros(m, m);
for i in 0..m {
for j in 0..m {
let mut sum = 0.0;
for k in 0..q {
sum += w[(i, k)] * l_full[(j, k)];
}
ldlt[(i, j)] = sum;
}
}
let mut max_error = 0.0_f64;
let mut norm_sq = 0.0_f64;
let mut diff_sq = 0.0_f64;
for i in 0..m {
for j in 0..=i {
let weight = if i == j { 1.0 } else { 2.0 };
norm_sq += weight * papt[(i, j)] * papt[(i, j)];
let residual = papt[(i, j)] - ldlt[(i, j)];
if i >= q && j >= q {
let schur_entry = factored[(i, j)];
let err = (residual - schur_entry).abs();
if err > max_error {
max_error = err;
}
diff_sq += weight * (residual - schur_entry) * (residual - schur_entry);
} else {
diff_sq += weight * residual * residual;
if residual.abs() > max_error {
max_error = residual.abs();
}
}
}
}
if norm_sq == 0.0 {
diff_sq.sqrt()
} else {
(diff_sq / norm_sq).sqrt()
}
}
#[test]
fn test_factor_inner_partial_with_delays_schur_check() {
struct PartialConfig {
m: usize, p: usize, ib: usize, seed: u64,
scale: f64,
}
let configs = [
PartialConfig {
m: 12,
p: 8,
ib: 4,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 12,
p: 8,
ib: 4,
seed: 137,
scale: 1000.0,
},
PartialConfig {
m: 12,
p: 8,
ib: 2,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 16,
p: 12,
ib: 4,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 16,
p: 12,
ib: 4,
seed: 271,
scale: 1000.0,
},
PartialConfig {
m: 20,
p: 16,
ib: 4,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 20,
p: 16,
ib: 8,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 32,
p: 24,
ib: 8,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 32,
p: 24,
ib: 4,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 48,
p: 32,
ib: 8,
seed: 42,
scale: 1000.0,
},
PartialConfig {
m: 16,
p: 12,
ib: 4,
seed: 42,
scale: 1e6,
},
PartialConfig {
m: 16,
p: 12,
ib: 4,
seed: 314,
scale: 1000.0,
},
PartialConfig {
m: 16,
p: 12,
ib: 4,
seed: 997,
scale: 1000.0,
},
];
let mut any_delays = false;
let mut worst_error = 0.0_f64;
let mut worst_config = String::new();
for (idx, config) in configs.iter().enumerate() {
let mut a = deterministic_indefinite_matrix(config.m, config.seed, 5.0);
cause_delays(&mut a, config.seed + 1000, config.scale);
let opts = AptpOptions {
inner_block_size: config.ib,
outer_block_size: config.m.max(config.ib),
threshold: 0.01,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let original = a.clone();
let result = aptp_factor_in_place(a.as_mut(), config.p, &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(
sum, config.p,
"config {} (m={}, p={}, ib={}): stats invariant {} != {}",
idx, config.m, config.p, config.ib, sum, config.p
);
if result.stats.num_delayed > 0 {
any_delays = true;
}
let error = check_partial_factorization_in_place(&original, &a, config.p, &result);
if error > worst_error {
worst_error = error;
worst_config = format!(
"config {} (m={}, p={}, ib={}, seed={})",
idx, config.m, config.p, config.ib, config.seed
);
}
}
assert!(
any_delays,
"No configurations triggered threshold delays. \
Adjust scale or matrix construction to create delays."
);
assert!(
worst_error < 1e-10,
"Worst partial factorization error: {:.2e} at {}\n\
This indicates corrupted Schur complement — bug in \
backup/restore/update_cross_terms logic in factor_inner.",
worst_error,
worst_config
);
}
#[test]
fn test_two_level_nfs_boundary_mid_block() {
struct Config {
m: usize,
p: usize,
nb: usize,
ib: usize,
seed: u64,
}
let configs = [
Config {
m: 48,
p: 20,
nb: 16,
ib: 8,
seed: 42,
},
Config {
m: 64,
p: 40,
nb: 32,
ib: 8,
seed: 42,
},
Config {
m: 24,
p: 10,
nb: 8,
ib: 4,
seed: 137,
},
Config {
m: 48,
p: 25,
nb: 16,
ib: 8,
seed: 271,
},
Config {
m: 32,
p: 13,
nb: 8,
ib: 4,
seed: 42,
},
];
let mut worst_error = 0.0_f64;
let mut worst_label = String::new();
for (idx, c) in configs.iter().enumerate() {
let a = deterministic_indefinite_matrix(c.m, c.seed, 5.0);
let opts = AptpOptions {
outer_block_size: c.nb,
inner_block_size: c.ib,
threshold: 0.01,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let original = a.clone();
let mut a_copy = a;
let result = aptp_factor_in_place(a_copy.as_mut(), c.p, &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(
sum, c.p,
"config {}: statistics invariant {} != {}",
idx, sum, c.p
);
let error = check_partial_factorization_in_place(&original, &a_copy, c.p, &result);
if error > worst_error {
worst_error = error;
worst_label = format!(
"config {} (m={}, p={}, nb={}, ib={}, seed={})",
idx, c.m, c.p, c.nb, c.ib, c.seed
);
}
}
assert!(
worst_error < 1e-10,
"Worst reconstruction error: {:.2e} at {}\n\
This indicates the local nfs_boundary computation in \
two_level_factor is producing incorrect Schur complements.",
worst_error,
worst_label
);
}
#[test]
fn test_tpp_helpers() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[4.0, 0.5, 0.1, 0.3],
[0.5, -3.0, 0.2, -0.4],
[0.1, 0.2, 5.0, 0.6],
[0.3, -0.4, 0.6, 2.0],
];
vals[i][j]
});
assert!(tpp_is_col_small(a.as_ref(), 0, 0, 4, 5.0));
assert!(!tpp_is_col_small(a.as_ref(), 0, 0, 4, 0.01));
let z = Mat::zeros(4, 4);
assert!(tpp_is_col_small(z.as_ref(), 0, 0, 4, 1e-20));
let t = tpp_find_row_abs_max(a.as_ref(), 3, 0, 3);
assert_eq!(t, 2, "expected col 2, got {}", t);
let max_exc = tpp_find_rc_abs_max_exclude(a.as_ref(), 1, 0, 4, 3);
assert!(
(max_exc - 0.5).abs() < 1e-15,
"expected 0.5, got {}",
max_exc
);
let result = tpp_test_2x2(a.as_ref(), 0, 1, 0.6, 0.6, 0.01, 1e-20);
assert!(result.is_some(), "2x2 pivot (0,1) should pass");
let (d11, d12, d22) = result.unwrap();
assert!((d11 - 4.0).abs() < 1e-15);
assert!((d12 - 0.5).abs() < 1e-15);
assert!((d22 - (-3.0)).abs() < 1e-15);
let tiny = symmetric_matrix(2, |_, _| 1e-25);
assert!(tpp_test_2x2(tiny.as_ref(), 0, 1, 1e-25, 1e-25, 0.01, 1e-20).is_none());
}
#[test]
fn test_tpp_basic_1x1() {
let a = symmetric_matrix(4, |i, j| {
if i == j {
[10.0, -8.0, 5.0, 7.0][i]
} else {
0.1
}
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_delayed, 0);
assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 4);
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-12, "reconstruction error {:.2e}", error);
}
#[test]
fn test_tpp_basic_2x2() {
let a = symmetric_matrix(4, |i, j| {
let vals = [
[0.001, 5.0, 0.01, 0.01],
[5.0, 0.001, 0.01, 0.01],
[0.01, 0.01, 0.001, 5.0],
[0.01, 0.01, 5.0, 0.001],
];
vals[i][j]
});
let opts = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &opts).unwrap();
assert_eq!(result.stats.num_delayed, 0);
let error =
dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
assert!(error < 1e-12, "reconstruction error {:.2e}", error);
}
#[test]
fn test_tpp_fallback_after_aptp() {
let n = 16;
let a = symmetric_matrix(n, |i, j| {
match (i, j) {
(i, j) if i < 8 && j < 8 => {
if i == j {
10.0
} else {
0.01
}
}
(i, j) if (8..12).contains(&i) && (8..12).contains(&j) => {
if i == j {
0.005
} else {
2.0
}
}
(i, j) if i >= 12 && j >= 12 => {
if i == j {
20.0
} else {
0.1
}
}
(i, j) if (8..12).contains(&i) && j < 8 => 3.0,
(i, j) if i < 8 && (8..12).contains(&j) => 3.0,
_ => 0.01,
}
});
let opts_pass = AptpOptions {
inner_block_size: 4,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let result_pass = aptp_factor(a.as_ref(), &opts_pass).unwrap();
let opts_tpp = AptpOptions {
inner_block_size: 4,
failed_pivot_method: FailedPivotMethod::Tpp,
..AptpOptions::default()
};
let result_tpp = aptp_factor(a.as_ref(), &opts_tpp).unwrap();
assert!(
result_tpp.stats.num_delayed <= result_pass.stats.num_delayed,
"TPP should not increase delays"
);
if result_tpp.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result_tpp.l,
&result_tpp.d,
result_tpp.perm.as_ref().arrays().0,
);
assert!(
error < 1e-12,
"TPP reconstruction error {:.2e} >= 1e-12",
error
);
}
}
#[test]
fn test_tpp_reconstruction_stress() {
use rand::SeedableRng;
use rand::rngs::StdRng;
use rand_distr::{Distribution, Uniform};
let n = 256;
let mut rng = StdRng::seed_from_u64(42);
let dist = Uniform::new(-1.0f64, 1.0).unwrap();
let mut a = Mat::zeros(n, n);
for i in 0..n {
for j in 0..=i {
let v: f64 = dist.sample(&mut rng);
a[(i, j)] = v;
a[(j, i)] = v;
}
let sign = if i % 3 == 0 { -1.0 } else { 1.0 };
a[(i, i)] = sign * (5.0 + dist.sample(&mut rng).abs());
}
let opts = AptpOptions {
failed_pivot_method: FailedPivotMethod::Tpp,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
if result.stats.num_delayed == 0 {
let error = dense_reconstruction_error(
&a,
&result.l,
&result.d,
result.perm.as_ref().arrays().0,
);
assert!(
error < 1e-10,
"stress reconstruction error {:.2e} >= 1e-10",
error
);
}
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, n, "statistics invariant: {} != {}", sum, n);
}
#[test]
fn test_failed_pivot_method_pass() {
let n = 4;
let a = symmetric_matrix(n, |i, j| {
let vals = [
[0.001, 5.0, 0.01, 0.01],
[5.0, 0.001, 0.01, 0.01],
[0.01, 0.01, 0.001, 5.0],
[0.01, 0.01, 5.0, 0.001],
];
vals[i][j]
});
let opts_pass = AptpOptions {
inner_block_size: 2,
failed_pivot_method: FailedPivotMethod::Pass,
..AptpOptions::default()
};
let result_pass = aptp_factor(a.as_ref(), &opts_pass).unwrap();
let opts_tpp = AptpOptions {
inner_block_size: 2,
failed_pivot_method: FailedPivotMethod::Tpp,
..AptpOptions::default()
};
let result_tpp = aptp_factor(a.as_ref(), &opts_tpp).unwrap();
assert!(
result_tpp.stats.num_delayed <= result_pass.stats.num_delayed,
"TPP delays {} > Pass delays {}",
result_tpp.stats.num_delayed,
result_pass.stats.num_delayed
);
let sum_pass = result_pass.stats.num_1x1
+ 2 * result_pass.stats.num_2x2
+ result_pass.stats.num_delayed;
let sum_tpp =
result_tpp.stats.num_1x1 + 2 * result_tpp.stats.num_2x2 + result_tpp.stats.num_delayed;
assert_eq!(sum_pass, n);
assert_eq!(sum_tpp, n);
}
#[test]
fn test_tpp_zero_pivot_handling() {
let n = 4;
let a = symmetric_matrix(n, |i, j| {
if i == j {
[5.0, 0.0, 0.0, 3.0][i]
} else if (i == 0 && j == 3) || (i == 3 && j == 0) {
0.1
} else {
0.0
}
});
let opts = AptpOptions {
failed_pivot_method: FailedPivotMethod::Tpp,
..AptpOptions::default()
};
let result = aptp_factor(a.as_ref(), &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, n, "invariant: {} != {}", sum, n);
}
#[test]
fn test_tpp_helpers_rect() {
let a = Mat::from_fn(5, 3, |i, j| {
let vals: [[f64; 3]; 5] = [
[4.0, 0.0, 0.0],
[0.5, -3.0, 0.0],
[0.1, 0.2, 5.0],
[0.3, -0.4, 0.6],
[0.7, 0.8, -0.9],
];
vals[i][j]
});
assert!((tpp_sym_entry(a.as_ref(), 3, 1) - (-0.4)).abs() < 1e-15);
assert!((tpp_sym_entry(a.as_ref(), 0, 2) - 0.1).abs() < 1e-15);
assert!((tpp_sym_entry(a.as_ref(), 1, 0) - 0.5).abs() < 1e-15);
assert!(tpp_is_col_small(a.as_ref(), 0, 0, 5, 5.0));
assert!(!tpp_is_col_small(a.as_ref(), 0, 0, 5, 0.01));
assert!(!tpp_is_col_small(a.as_ref(), 3, 0, 5, 0.5));
assert!(tpp_is_col_small(a.as_ref(), 3, 0, 5, 1.0));
let t = tpp_find_row_abs_max(a.as_ref(), 4, 0, 3);
assert_eq!(t, 2, "expected col 2, got {t}");
let max_exc = tpp_find_rc_abs_max_exclude(a.as_ref(), 1, 0, 5, 3);
assert!((max_exc - 0.8).abs() < 1e-15, "expected 0.8, got {max_exc}");
let max_exc2 = tpp_find_rc_abs_max_exclude(a.as_ref(), 3, 0, 5, usize::MAX);
assert!(
(max_exc2 - 0.6).abs() < 1e-15,
"expected 0.6, got {max_exc2}"
);
}
fn rect_reconstruction_error(
original_rect: &Mat<f64>,
factored_rect: &Mat<f64>,
k: usize,
result: &AptpFactorResult,
) -> f64 {
let ne = result.num_eliminated;
if ne == 0 {
return 0.0;
}
let mut l = Mat::zeros(ne, ne);
let mut col = 0;
while col < ne {
l[(col, col)] = 1.0;
match result.d.pivot_type(col) {
PivotType::OneByOne => {
for r in (col + 1)..ne {
l[(r, col)] = factored_rect[(r, col)];
}
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
l[(col + 1, col + 1)] = 1.0;
for r in (col + 2)..ne {
l[(r, col)] = factored_rect[(r, col)];
l[(r, col + 1)] = factored_rect[(r, col + 1)];
}
col += 2;
}
_ => {
col += 1;
}
}
}
let mut d_mat = Mat::zeros(ne, ne);
col = 0;
while col < ne {
match result.d.pivot_type(col) {
PivotType::OneByOne => {
d_mat[(col, col)] = result.d.diagonal_1x1(col);
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let block = result.d.diagonal_2x2(col);
d_mat[(col, col)] = block.a;
d_mat[(col, col + 1)] = block.b;
d_mat[(col + 1, col)] = block.b;
d_mat[(col + 1, col + 1)] = block.c;
col += 2;
}
_ => {
col += 1;
}
}
}
let mut w = Mat::zeros(ne, ne);
for i in 0..ne {
for j in 0..ne {
let mut sum = 0.0;
for kk in 0..ne {
sum += l[(i, kk)] * d_mat[(kk, j)];
}
w[(i, j)] = sum;
}
}
let mut ldlt = Mat::zeros(ne, ne);
for i in 0..ne {
for j in 0..ne {
let mut sum = 0.0;
for kk in 0..ne {
sum += w[(i, kk)] * l[(j, kk)];
}
ldlt[(i, j)] = sum;
}
}
let perm = &result.perm;
let mut pap = Mat::zeros(ne, ne);
for i in 0..ne {
for j in 0..ne {
let pi = perm[i];
let pj = perm[j];
let val = if pi >= pj && pj < k {
original_rect[(pi, pj)]
} else if pj > pi && pi < k {
original_rect[(pj, pi)]
} else {
0.0
};
pap[(i, j)] = val;
}
}
let mut diff_sq = 0.0_f64;
let mut orig_sq = 0.0_f64;
for i in 0..ne {
for j in 0..ne {
let d = pap[(i, j)] - ldlt[(i, j)];
diff_sq += d * d;
orig_sq += pap[(i, j)] * pap[(i, j)];
}
}
if orig_sq == 0.0 {
return diff_sq.sqrt();
}
(diff_sq / orig_sq).sqrt()
}
#[test]
fn test_tpp_factor_rect_basic_1x1() {
let m = 6;
let k = 4;
let mut a = Mat::zeros(m, k);
let vals: [[f64; 4]; 6] = [
[10.0, 0.0, 0.0, 0.0],
[0.1, -8.0, 0.0, 0.0],
[0.2, 0.1, 12.0, 0.0],
[0.1, -0.2, 0.3, 7.0],
[0.3, 0.1, -0.1, 0.2], [0.1, -0.3, 0.2, 0.1], ];
for i in 0..m {
for j in 0..k {
a[(i, j)] = vals[i][j];
}
}
let original = a.clone();
let opts = AptpOptions::default();
let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
assert_eq!(
result.num_eliminated, 4,
"expected all 4 columns eliminated"
);
assert_eq!(result.stats.num_delayed, 0);
let error = rect_reconstruction_error(&original, &a, k, &result);
assert!(
error < 1e-12,
"rect 1x1 reconstruction error {error:.2e} >= 1e-12"
);
}
#[test]
fn test_tpp_factor_rect_basic_2x2() {
let m = 6;
let k = 4;
let mut a = Mat::zeros(m, k);
let vals: [[f64; 4]; 6] = [
[0.001, 0.0, 0.0, 0.0],
[5.0, 0.001, 0.0, 0.0],
[0.01, 0.01, 0.001, 0.0],
[0.01, 0.01, 5.0, 0.001],
[0.1, 0.2, 0.1, 0.2], [0.3, 0.1, 0.3, 0.1], ];
for i in 0..m {
for j in 0..k {
a[(i, j)] = vals[i][j];
}
}
let original = a.clone();
let opts = AptpOptions::default();
let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
assert_eq!(result.stats.num_delayed, 0, "expected no delays");
assert!(result.stats.num_2x2 > 0, "expected at least one 2x2 pivot");
let error = rect_reconstruction_error(&original, &a, k, &result);
assert!(
error < 1e-12,
"rect 2x2 reconstruction error {error:.2e} >= 1e-12"
);
}
#[test]
fn test_tpp_factor_rect_with_delays() {
let m = 8;
let k = 4;
let mut a = Mat::zeros(m, k);
let vals: [[f64; 4]; 8] = [
[1e-25, 0.0, 0.0, 0.0],
[1e-25, 1e-25, 0.0, 0.0],
[1e-25, 1e-25, 1e-25, 0.0],
[1e-25, 1e-25, 1e-25, 1e-25],
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.2, 0.3, 0.4, 0.5],
[0.1, 0.1, 0.1, 0.1],
];
for i in 0..m {
for j in 0..k {
a[(i, j)] = vals[i][j];
}
}
let opts = AptpOptions::default();
let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, k, "invariant: {sum} != {k}");
}
#[test]
fn test_tpp_factor_rect_square_matches() {
let n = 4;
let a = symmetric_matrix(n, |i, j| {
let vals = [
[10.0, 0.5, 0.1, 0.3],
[0.5, -8.0, 0.2, -0.4],
[0.1, 0.2, 12.0, 0.6],
[0.3, -0.4, 0.6, -5.0],
];
vals[i][j]
});
let opts = AptpOptions::default();
let mut a_rect = a.clone();
let result_rect = tpp_factor_rect(a_rect.as_mut(), n, &opts).unwrap();
let mut a_sq = a.clone();
let mut col_order: Vec<usize> = (0..n).collect();
let mut d_sq = MixedDiagonal::new(n);
let mut stats_sq = AptpStatistics::default();
let mut pivot_log_sq = Vec::with_capacity(n);
let ne_sq = tpp_factor(
a_sq.as_mut(),
0,
n,
&mut col_order,
&mut d_sq,
&mut stats_sq,
&mut pivot_log_sq,
&opts,
);
d_sq.truncate(ne_sq);
assert_eq!(
result_rect.num_eliminated, ne_sq,
"num_eliminated: rect={} sq={}",
result_rect.num_eliminated, ne_sq
);
assert_eq!(&result_rect.perm[..n], &col_order[..n], "perm mismatch");
let ne = result_rect.num_eliminated;
let mut col = 0;
while col < ne {
match result_rect.d.pivot_type(col) {
PivotType::OneByOne => {
let d_r = result_rect.d.diagonal_1x1(col);
let d_s = d_sq.diagonal_1x1(col);
assert!(
(d_r - d_s).abs() < 1e-15,
"D mismatch at col {col}: rect={d_r} sq={d_s}"
);
col += 1;
}
PivotType::TwoByTwo { partner } if partner > col => {
let br = result_rect.d.diagonal_2x2(col);
let bs = d_sq.diagonal_2x2(col);
assert!((br.a - bs.a).abs() < 1e-15, "D.a mismatch at col {col}");
assert!((br.b - bs.b).abs() < 1e-15, "D.b mismatch at col {col}");
assert!((br.c - bs.c).abs() < 1e-15, "D.c mismatch at col {col}");
col += 2;
}
_ => {
col += 1;
}
}
}
for j in 0..n {
for i in j..n {
let vr = a_rect[(i, j)];
let vs = a_sq[(i, j)];
assert!(
(vr - vs).abs() == 0.0,
"matrix mismatch at ({i},{j}): rect={vr} sq={vs}"
);
}
}
}
#[test]
fn test_tpp_factor_rect_reconstruction_stress() {
use rand::SeedableRng;
use rand::rngs::StdRng;
use rand_distr::{Distribution, Uniform};
let m = 64;
let k = 32;
let mut rng = StdRng::seed_from_u64(99);
let dist = Uniform::new(-1.0f64, 1.0).unwrap();
let mut a = Mat::zeros(m, k);
for i in 0..k {
for j in 0..=i {
let v: f64 = dist.sample(&mut rng);
a[(i, j)] = v;
}
let sign = if i % 3 == 0 { -1.0 } else { 1.0 };
a[(i, i)] = sign * (5.0 + dist.sample(&mut rng).abs());
}
for i in k..m {
for j in 0..k {
a[(i, j)] = dist.sample(&mut rng);
}
}
let original = a.clone();
let opts = AptpOptions::default();
let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
let error = rect_reconstruction_error(&original, &a, k, &result);
assert!(
error < 1e-10,
"rect stress reconstruction error {error:.2e} >= 1e-10"
);
let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
assert_eq!(sum, k, "statistics invariant: {sum} != {k}");
}
#[test]
fn test_compute_contribution_gemm_rect_matches_square() {
let m = 8;
let k = 4;
let nfs = m - k;
let a_full = symmetric_matrix(m, |i, j| {
if i == j {
[10.0, -8.0, 12.0, 7.0, 3.0, -2.0, 5.0, 4.0][i]
} else {
0.1 * (i as f64 + j as f64 + 1.0) * if (i + j) % 2 == 0 { 1.0 } else { -1.0 }
}
});
let mut a_rect = Mat::zeros(m, k);
for i in 0..m {
for j in 0..k {
a_rect[(i, j)] = a_full[(i, j)];
}
}
let opts = AptpOptions::default();
let result = tpp_factor_rect(a_rect.as_mut(), k, &opts).unwrap();
let ne = result.num_eliminated;
if ne == 0 {
return; }
let mut a_sq_factor = a_full.clone();
let result_sq = tpp_factor_rect(a_sq_factor.as_mut(), k, &opts).unwrap();
let mut contrib_rect = Mat::zeros(nfs, nfs);
let perm = &result.perm;
for i in 0..nfs {
for j in 0..=i {
let pi = perm[k + i];
let pj = perm[k + j];
let val = if pi >= pj {
a_full[(pi, pj)]
} else {
a_full[(pj, pi)]
};
contrib_rect[(i, j)] = val;
}
}
let mut contrib_rect2 = contrib_rect.clone();
let mut ld_ws = Mat::zeros(nfs, ne);
compute_contribution_gemm_rect(
&a_rect,
k,
ne,
m,
&result.d,
&mut contrib_rect2,
&mut ld_ws,
Par::Seq,
);
let mut contrib_sq = Mat::zeros(nfs, nfs);
for i in 0..nfs {
for j in 0..=i {
let pi = perm[k + i];
let pj = perm[k + j];
let val = if pi >= pj {
a_full[(pi, pj)]
} else {
a_full[(pj, pi)]
};
contrib_sq[(i, j)] = val;
}
}
let mut frontal_sq = Mat::zeros(m, m);
for j in 0..k {
for i in 0..m {
frontal_sq[(i, j)] = a_sq_factor[(i, j)];
}
}
for i in 0..nfs {
for j in 0..=i {
frontal_sq[(k + i, k + j)] = contrib_sq[(i, j)];
}
}
let mut ld_ws2 = Mat::zeros(nfs, ne);
compute_contribution_gemm(
&frontal_sq,
k,
ne,
m,
&result_sq.d,
&mut contrib_sq,
&mut ld_ws2,
Par::Seq,
);
for i in 0..nfs {
for j in 0..=i {
let vr = contrib_rect2[(i, j)];
let vs = contrib_sq[(i, j)];
assert!(
(vr - vs).abs() < 1e-12,
"contrib mismatch at ({i},{j}): rect={vr} sq={vs}"
);
}
}
}
#[test]
fn test_extract_front_factors_rect_round_trip() {
let m = 6;
let k = 4;
let mut a = Mat::zeros(m, k);
let vals: [[f64; 4]; 6] = [
[10.0, 0.0, 0.0, 0.0],
[0.5, -8.0, 0.0, 0.0],
[0.2, 0.3, 12.0, 0.0],
[0.1, -0.1, 0.4, 7.0],
[0.3, 0.1, -0.2, 0.5],
[0.1, -0.3, 0.2, 0.1],
];
for i in 0..m {
for j in 0..k {
a[(i, j)] = vals[i][j];
}
}
let opts = AptpOptions::default();
let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
let ne = result.num_eliminated;
assert!(ne > 0, "need at least one eliminated column");
let row_indices: Vec<usize> = (0..m).collect();
let ff = extract_front_factors_rect(&a, m, k, &row_indices, &result);
assert_eq!(ff.num_eliminated, ne);
assert_eq!(ff.l11.nrows(), ne);
assert_eq!(ff.l11.ncols(), ne);
for i in 0..ne {
assert!(
(ff.l11[(i, i)] - 1.0).abs() < 1e-15,
"L11 diagonal at {i} is not 1.0: {}",
ff.l11[(i, i)]
);
for j in (i + 1)..ne {
assert!(
ff.l11[(i, j)].abs() < 1e-15,
"L11 upper triangle at ({i},{j}) is not 0: {}",
ff.l11[(i, j)]
);
}
}
assert_eq!(ff.l21.nrows(), m - ne);
assert_eq!(ff.l21.ncols(), ne);
assert_eq!(ff.d11.dimension(), ne);
}
#[test]
fn test_extract_contribution_rect_with_delays() {
let m = 6;
let k = 4;
let mut a = Mat::zeros(m, k);
let vals: [[f64; 4]; 6] = [
[10.0, 0.0, 0.0, 0.0],
[0.5, -8.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.3, 0.1, 0.0, 0.0],
[0.1, -0.3, 0.0, 0.0],
];
for i in 0..m {
for j in 0..k {
a[(i, j)] = vals[i][j];
}
}
let opts = AptpOptions::default();
let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
let ne = result.num_eliminated;
assert_eq!(ne, 4, "expected all 4 eliminated (2 real + 2 zero)");
let nfs = m - k;
let mut contrib_buffer = Mat::zeros(nfs, nfs);
let mut ld_ws = Mat::zeros(nfs, ne);
compute_contribution_gemm_rect(
&a,
k,
ne,
m,
&result.d,
&mut contrib_buffer,
&mut ld_ws,
Par::Seq,
);
let row_indices: Vec<usize> = (0..m).collect();
let contrib = extract_contribution_rect(&a, m, k, &row_indices, &result, contrib_buffer);
assert_eq!(contrib.num_delayed, 0);
assert_eq!(contrib.data.nrows(), nfs);
assert_eq!(contrib.row_indices.len(), nfs);
}
#[test]
fn test_swap_rect_boundary() {
let mut a = Mat::from_fn(5, 3, |i, j| (i * 10 + j) as f64);
swap_rect(a.as_mut(), 1, 4);
assert!((a[(1, 0)] - 40.0).abs() < 1e-15, "a[(1,0)]={}", a[(1, 0)]);
assert!((a[(4, 0)] - 10.0).abs() < 1e-15, "a[(4,0)]={}", a[(4, 0)]);
assert!((a[(2, 1)] - 42.0).abs() < 1e-15, "a[(2,1)]={}", a[(2, 1)]);
assert!((a[(4, 2)] - 21.0).abs() < 1e-15, "a[(4,2)]={}", a[(4, 2)]);
let mut b = Mat::from_fn(5, 3, |i, j| (i * 10 + j) as f64);
swap_rect(b.as_mut(), 0, 2);
assert!((b[(0, 0)] - 22.0).abs() < 1e-15, "b[(0,0)]={}", b[(0, 0)]);
assert!((b[(2, 2)] - 0.0).abs() < 1e-15, "b[(2,2)]={}", b[(2, 2)]);
assert!((b[(3, 0)] - 32.0).abs() < 1e-15, "b[(3,0)]={}", b[(3, 0)]);
assert!((b[(3, 2)] - 30.0).abs() < 1e-15, "b[(3,2)]={}", b[(3, 2)]);
}
use crate::testing::perturbations::TortureTestConfig;
use rand::RngExt;
use rand::SeedableRng;
use rand::rngs::StdRng;
fn ldlt_app_torture_test(m: usize, n: usize, config: &TortureTestConfig) {
use crate::testing::perturbations;
let options = AptpOptions {
inner_block_size: 32.min(n),
..AptpOptions::default()
};
let num_fully_summed = n;
let mut rng = StdRng::seed_from_u64(config.seed);
let mut failures = Vec::new();
for instance in 0..config.num_instances {
let mut a = perturbations::generate_dense_symmetric_indefinite(m, &mut rng);
let roll: f64 = rng.random::<f64>();
let is_singular;
if roll < config.delay_probability {
perturbations::cause_delays(&mut a, options.inner_block_size, &mut rng);
is_singular = false;
} else if roll < config.delay_probability + config.singular_probability {
if m >= 2 {
let col1 = RngExt::random_range(&mut rng, 0..m);
let mut col2 = RngExt::random_range(&mut rng, 0..m);
while col2 == col1 {
col2 = RngExt::random_range(&mut rng, 0..m);
}
perturbations::make_singular(&mut a, col1, col2);
}
is_singular = true;
} else {
if m >= options.inner_block_size && options.inner_block_size >= 2 {
let max_start = m - options.inner_block_size;
let block_row = RngExt::random_range(&mut rng, 0..=max_start);
perturbations::make_dblk_singular(&mut a, block_row, options.inner_block_size);
}
is_singular = true;
}
let original = a.clone();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
aptp_factor_in_place(a.as_mut(), num_fully_summed, &options)
}));
match result {
Err(_) => {
failures.push(format!("instance {}: PANIC", instance));
}
Ok(Err(e)) => {
if !is_singular {
failures.push(format!("instance {}: unexpected error: {}", instance, e));
}
}
Ok(Ok(ref fr)) => {
let total = fr.num_eliminated + fr.delayed_cols.len();
if total != num_fully_summed {
failures.push(format!(
"instance {}: elim({}) + delayed({}) = {} != nfs({})",
instance,
fr.num_eliminated,
fr.delayed_cols.len(),
total,
num_fully_summed
));
continue;
}
if m == n && !is_singular && fr.num_eliminated == num_fully_summed {
let l = extract_l(a.as_ref(), &fr.d, fr.num_eliminated);
let err = dense_reconstruction_error(
&original,
&l,
&fr.d,
&fr.perm[..fr.num_eliminated],
);
if err > config.backward_error_threshold {
failures.push(format!(
"instance {}: recon error {:.2e} > {:.2e}",
instance, err, config.backward_error_threshold
));
}
}
}
}
}
assert!(
failures.is_empty(),
"APP torture (m={}, n={}) had {} failures:\n{}",
m,
n,
failures.len(),
failures.join("\n")
);
}
fn ldlt_tpp_torture_test(m: usize, n: usize, config: &TortureTestConfig) {
use crate::testing::perturbations;
let options = AptpOptions {
inner_block_size: n + 1, ..AptpOptions::default()
};
let num_fully_summed = n;
let mut rng = StdRng::seed_from_u64(config.seed);
let mut failures = Vec::new();
for instance in 0..config.num_instances {
let mut a = perturbations::generate_dense_symmetric_indefinite(m, &mut rng);
let roll: f64 = rng.random::<f64>();
let is_singular;
if roll < config.delay_probability {
perturbations::cause_delays(&mut a, options.inner_block_size, &mut rng);
is_singular = false;
} else {
if m >= 2 {
let col1 = RngExt::random_range(&mut rng, 0..m);
let mut col2 = RngExt::random_range(&mut rng, 0..m);
while col2 == col1 {
col2 = RngExt::random_range(&mut rng, 0..m);
}
perturbations::make_singular(&mut a, col1, col2);
}
is_singular = true;
}
let original = a.clone();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
aptp_factor_in_place(a.as_mut(), num_fully_summed, &options)
}));
match result {
Err(_) => {
failures.push(format!("instance {}: PANIC", instance));
}
Ok(Err(e)) => {
if !is_singular {
failures.push(format!("instance {}: unexpected error: {}", instance, e));
}
}
Ok(Ok(ref fr)) => {
let total = fr.num_eliminated + fr.delayed_cols.len();
if total != num_fully_summed {
failures.push(format!(
"instance {}: elim({}) + delayed({}) = {} != nfs({})",
instance,
fr.num_eliminated,
fr.delayed_cols.len(),
total,
num_fully_summed
));
continue;
}
if m == n && !is_singular && fr.num_eliminated == num_fully_summed {
let l = extract_l(a.as_ref(), &fr.d, fr.num_eliminated);
let err = dense_reconstruction_error(
&original,
&l,
&fr.d,
&fr.perm[..fr.num_eliminated],
);
if err > config.backward_error_threshold {
failures.push(format!(
"instance {}: recon error {:.2e} > {:.2e}",
instance, err, config.backward_error_threshold
));
}
let l_bound = 1.0 / options.threshold;
let mut max_l = 0.0_f64;
for i in 0..l.nrows() {
for j in 0..fr.num_eliminated {
if i != j {
max_l = max_l.max(l[(i, j)].abs());
}
}
}
if max_l > l_bound * (1.0 + 1e-10) {
failures.push(format!(
"instance {}: L growth |L|={:.4} > 1/u={:.4}",
instance, max_l, l_bound
));
}
}
}
}
}
assert!(
failures.is_empty(),
"TPP torture (m={}, n={}) had {} failures:\n{}",
m,
n,
failures.len(),
failures.join("\n")
);
}
#[test]
#[ignore] fn torture_app_32x32() {
let config = TortureTestConfig {
num_instances: 500,
seed: 42_000,
..TortureTestConfig::default()
};
ldlt_app_torture_test(32, 32, &config);
}
#[test]
#[ignore]
fn torture_app_64x64() {
let config = TortureTestConfig {
num_instances: 500,
seed: 42_001,
..TortureTestConfig::default()
};
ldlt_app_torture_test(64, 64, &config);
}
#[test]
#[ignore]
fn torture_app_128x128() {
let config = TortureTestConfig {
num_instances: 500,
seed: 42_002,
..TortureTestConfig::default()
};
ldlt_app_torture_test(128, 128, &config);
}
#[test]
#[ignore]
fn torture_app_128x48() {
let config = TortureTestConfig {
num_instances: 500,
seed: 42_003,
..TortureTestConfig::default()
};
ldlt_app_torture_test(128, 48, &config);
}
#[test]
#[ignore]
fn torture_app_256x256() {
let config = TortureTestConfig {
num_instances: 500,
seed: 42_004,
..TortureTestConfig::default()
};
ldlt_app_torture_test(256, 256, &config);
}
#[test]
#[ignore]
fn torture_tpp_8x4() {
let config = TortureTestConfig {
num_instances: 500,
seed: 43_000,
..TortureTestConfig::default()
};
ldlt_tpp_torture_test(8, 4, &config);
}
#[test]
#[ignore]
fn torture_tpp_33x21() {
let config = TortureTestConfig {
num_instances: 500,
seed: 43_001,
..TortureTestConfig::default()
};
ldlt_tpp_torture_test(33, 21, &config);
}
#[test]
#[ignore]
fn torture_tpp_100x100() {
let config = TortureTestConfig {
num_instances: 500,
seed: 43_002,
..TortureTestConfig::default()
};
ldlt_tpp_torture_test(100, 100, &config);
}
#[test]
#[ignore]
fn torture_tpp_100x50() {
let config = TortureTestConfig {
num_instances: 500,
seed: 43_003,
..TortureTestConfig::default()
};
ldlt_tpp_torture_test(100, 50, &config);
}
use crate::testing::strategies;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(256))]
#[test]
fn property_pd_reconstruction(
a in strategies::arb_symmetric_pd(5..=100)
) {
let n = a.nrows();
let options = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &options);
if let Ok(ref f) = result {
let perm_fwd = f.perm.as_ref().arrays().0;
let err = dense_reconstruction_error(&a, &f.l, &f.d, perm_fwd);
prop_assert!(
err < 1e-12,
"PD reconstruction error {:.2e} for n={}", err, n
);
}
}
#[test]
fn property_inertia_sum(
a in strategies::arb_symmetric_indefinite(5..=100)
) {
let n = a.nrows();
let options = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &options);
if let Ok(ref f) = result {
let inertia = f.d.compute_inertia();
let num_eliminated = n - f.delayed_cols.len();
prop_assert_eq!(
inertia.dimension(), num_eliminated,
"inertia sum {} != num_eliminated={} (n={}, delayed={})",
inertia.dimension(), num_eliminated, n, f.delayed_cols.len()
);
}
}
#[test]
fn property_permutation_valid(
a in strategies::arb_symmetric_indefinite(5..=100)
) {
let n = a.nrows();
let options = AptpOptions::default();
let result = aptp_factor(a.as_ref(), &options);
if let Ok(ref f) = result {
let (fwd, inv) = f.perm.as_ref().arrays();
let mut seen = vec![false; n];
for &idx in fwd {
prop_assert!(idx < n, "perm fwd index {} out of bounds for n={}", idx, n);
prop_assert!(!seen[idx], "duplicate in perm fwd: {}", idx);
seen[idx] = true;
}
for i in 0..n {
prop_assert_eq!(
inv[fwd[i]], i,
"perm fwd/inv not inverses at i={}", i
);
}
}
}
#[test]
fn property_no_panics_perturbed(
a in strategies::arb_symmetric_indefinite(5..=50),
seed in any::<u64>()
) {
use crate::testing::perturbations as perturb;
let mut a_mut = a.clone();
let mut rng = StdRng::seed_from_u64(seed);
perturb::cause_delays(&mut a_mut, 32, &mut rng);
let options = AptpOptions::default();
let _ = aptp_factor(a_mut.as_ref(), &options);
}
}
}