use super::*;
use approx::assert_abs_diff_eq;
use ndarray::array;
#[test]
pub(crate) fn sparse_block_kronecker_matches_dense_kronecker() {
let p = 2usize;
let dim_a = 5usize;
let k = dim_a * p;
let g_dense = array![
[3.0_f64, 0.5, 0.2, -0.1, 0.0],
[0.5, 4.0, 0.0, 0.3, 0.1],
[0.2, 0.0, 2.0, 0.4, -0.2],
[-0.1, 0.3, 0.4, 5.0, 0.6],
[0.0, 0.1, -0.2, 0.6, 1.5],
];
let dense = KroneckerPenaltyOp {
factor_a: g_dense.clone(),
factor_b: Array2::<f64>::eye(p),
global_offset: 0,
k,
};
let block_00 = g_dense.slice(ndarray::s![0..2, 0..2]).to_owned();
let block_01 = g_dense.slice(ndarray::s![0..2, 2..5]).to_owned();
let block_10 = g_dense.slice(ndarray::s![2..5, 0..2]).to_owned();
let block_11 = g_dense.slice(ndarray::s![2..5, 2..5]).to_owned();
let sparse = SparseBlockKroneckerPenaltyOp {
p,
dim_a,
k,
blocks: vec![
SparseGBlock {
row_off: 0,
col_off: 0,
data: block_00,
},
SparseGBlock {
row_off: 0,
col_off: 2,
data: block_01,
},
SparseGBlock {
row_off: 2,
col_off: 0,
data: block_10,
},
SparseGBlock {
row_off: 2,
col_off: 2,
data: block_11,
},
],
};
let d_dense = dense.to_dense();
let d_sparse = sparse.to_dense();
for i in 0..k {
for j in 0..k {
assert!(
(d_dense[[i, j]] - d_sparse[[i, j]]).abs() < 1e-12,
"to_dense mismatch at ({i},{j}): {} vs {}",
d_dense[[i, j]],
d_sparse[[i, j]]
);
}
}
let x: Vec<f64> = (0..k).map(|i| 0.1 * (i as f64) - 0.3).collect();
let mut y_dense = vec![0.0_f64; k];
let mut y_sparse = vec![0.0_f64; k];
dense.matvec(&x, &mut y_dense);
sparse.matvec(&x, &mut y_sparse);
for i in 0..k {
assert!(
(y_dense[i] - y_sparse[i]).abs() < 1e-12,
"matvec mismatch at {i}: {} vs {}",
y_dense[i],
y_sparse[i]
);
}
let mut diag_dense = vec![0.0_f64; k];
let mut diag_sparse = vec![0.0_f64; k];
dense.diagonal(&mut diag_dense);
sparse.diagonal(&mut diag_sparse);
for i in 0..k {
assert!(
(diag_dense[i] - diag_sparse[i]).abs() < 1e-12,
"diagonal mismatch at {i}: {} vs {}",
diag_dense[i],
diag_sparse[i]
);
}
let offsets = [0..(2 * p), (2 * p)..k];
for id in 0..offsets.len() {
let b = offsets[id].end - offsets[id].start;
let mut blk_dense = Array2::<f64>::zeros((b, b));
let mut blk_sparse = Array2::<f64>::zeros((b, b));
dense.block(BetaBlockId(id), &offsets, &mut blk_dense);
sparse.block(BetaBlockId(id), &offsets, &mut blk_sparse);
for i in 0..b {
for j in 0..b {
assert!(
(blk_dense[[i, j]] - blk_sparse[[i, j]]).abs() < 1e-12,
"block {id} mismatch at ({i},{j})"
);
}
}
}
}
pub(crate) fn factored_reference_dense(
ranks: &[usize],
basis_sizes: &[usize],
blocks: &[FactoredFrameGBlock],
) -> Array2<f64> {
let n_atoms = ranks.len();
let mut offsets = vec![0usize; n_atoms + 1];
for k in 0..n_atoms {
offsets[k + 1] = offsets[k] + basis_sizes[k] * ranks[k];
}
let dim = offsets[n_atoms];
let mut h = Array2::<f64>::zeros((dim, dim));
for blk in blocks {
let (r_i, r_j) = (ranks[blk.atom_i], ranks[blk.atom_j]);
let (off_i, off_j) = (offsets[blk.atom_i], offsets[blk.atom_j]);
let (m_i, m_j) = blk.g.dim();
for li in 0..m_i {
for lj in 0..m_j {
for a in 0..r_i {
for b in 0..r_j {
h[[off_i + li * r_i + a, off_j + lj * r_j + b]] +=
blk.g[[li, lj]] * blk.w[[a, b]];
}
}
}
}
}
h
}
#[test]
pub(crate) fn factored_frame_kronecker_matches_dense_reference() {
let ranks = vec![2usize, 3];
let basis_sizes = vec![2usize, 3];
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4, -0.2], [0.4, 5.0, 0.6], [-0.2, 0.6, 1.5]];
let g01 = array![[0.2_f64, -0.1, 0.0], [0.3, 0.1, -0.2]];
let g10 = g01.t().to_owned();
let w00 = Array2::<f64>::eye(2);
let w11 = Array2::<f64>::eye(3);
let w01 = array![[0.8_f64, 0.1, -0.05], [0.0, 0.7, 0.2]];
let w10 = w01.t().to_owned();
let blocks = vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00.clone(),
w: w00.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11.clone(),
w: w11.clone(),
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01.clone(),
w: w01.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10.clone(),
w: w10.clone(),
},
];
let op = FactoredFrameKroneckerOp::new(ranks.clone(), basis_sizes.clone(), blocks.clone())
.expect("op");
assert_eq!(op.dim(), 13);
let reference = factored_reference_dense(&ranks, &basis_sizes, &blocks);
let dense = op.to_dense();
for i in 0..13 {
for j in 0..13 {
assert!(
(dense[[i, j]] - reference[[i, j]]).abs() < 1e-12,
"to_dense mismatch at ({i},{j}): {} vs {}",
dense[[i, j]],
reference[[i, j]]
);
}
}
let x: Vec<f64> = (0..13).map(|i| 0.13 * (i as f64) - 0.4).collect();
let mut y = vec![0.0_f64; 13];
op.matvec(&x, &mut y);
for i in 0..13 {
let mut expect = 0.0;
for j in 0..13 {
expect += reference[[i, j]] * x[j];
}
assert!(
(y[i] - expect).abs() < 1e-10,
"matvec mismatch at {i}: {} vs {expect}",
y[i]
);
}
let mut diag = vec![0.0_f64; 13];
op.diagonal(&mut diag);
for i in 0..13 {
assert!(
(diag[i] - reference[[i, i]]).abs() < 1e-12,
"diagonal mismatch at {i}"
);
}
let offsets_ranges = [0..4usize, 4..13usize];
for id in 0..2 {
let b = offsets_ranges[id].end - offsets_ranges[id].start;
let mut blk = Array2::<f64>::zeros((b, b));
op.block(BetaBlockId(id), &offsets_ranges, &mut blk);
for bi in 0..b {
for bj in 0..b {
let gi = offsets_ranges[id].start + bi;
let gj = offsets_ranges[id].start + bj;
assert!(
(blk[[bi, bj]] - reference[[gi, gj]]).abs() < 1e-12,
"block {id} mismatch at ({bi},{bj})"
);
}
}
}
}
#[test]
pub(crate) fn factored_frame_kronecker_reduces_to_sparse_block_at_full_rank() {
let p = 2usize;
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4], [0.4, 5.0]];
let g01 = array![[0.2_f64, -0.1], [0.3, 0.1]];
let g10 = g01.t().to_owned();
let ident = Array2::<f64>::eye(p);
let factored = FactoredFrameKroneckerOp::new(
vec![p, p],
vec![2, 2],
vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00.clone(),
w: ident.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11.clone(),
w: ident.clone(),
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01.clone(),
w: ident.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10.clone(),
w: ident.clone(),
},
],
)
.expect("factored op");
let sparse = SparseBlockKroneckerPenaltyOp {
p,
dim_a: 4,
k: 8,
blocks: vec![
SparseGBlock {
row_off: 0,
col_off: 0,
data: g00,
},
SparseGBlock {
row_off: 2,
col_off: 2,
data: g11,
},
SparseGBlock {
row_off: 0,
col_off: 2,
data: g01,
},
SparseGBlock {
row_off: 2,
col_off: 0,
data: g10,
},
],
};
assert_eq!(factored.dim(), sparse.dim());
let x: Vec<f64> = (0..8).map(|i| 0.2 * (i as f64) - 0.5).collect();
let mut yf = vec![0.0_f64; 8];
let mut ys = vec![0.0_f64; 8];
factored.matvec(&x, &mut yf);
sparse.matvec(&x, &mut ys);
for i in 0..8 {
assert!(
(yf[i] - ys[i]).abs() < 1e-12,
"full-rank factored op must equal SparseBlockKronecker at {i}: {} vs {}",
yf[i],
ys[i]
);
}
}
pub(crate) fn mgs_orthonormalize(a: &Array2<f64>) -> Array2<f64> {
let (p, r) = a.dim();
let mut q = a.clone();
for j in 0..r {
for i in 0..j {
let mut dot = 0.0;
for c in 0..p {
dot += q[[c, i]] * q[[c, j]];
}
for c in 0..p {
q[[c, j]] -= dot * q[[c, i]];
}
}
let mut nrm = 0.0;
for c in 0..p {
nrm += q[[c, j]] * q[[c, j]];
}
let nrm = nrm.sqrt();
assert!(nrm > 1e-9, "mgs column {j} degenerate");
for c in 0..p {
q[[c, j]] /= nrm;
}
}
q
}
#[test]
pub(crate) fn frame_output_gram_orthonormal_is_identity() {
let p = 5usize;
let r = 3usize;
let mut seed = Array2::<f64>::zeros((p, r));
for c in 0..p {
for a in 0..r {
seed[[c, a]] = ((c as f64) * 0.37 + (a as f64) * 1.31).sin() + 0.1 * (a as f64);
}
}
let u = mgs_orthonormalize(&seed);
let g = frame_output_gram(u.view(), u.view());
assert_eq!(g.dim(), (r, r));
for a in 0..r {
for b in 0..r {
let expect = if a == b { 1.0 } else { 0.0 };
assert!(
(g[[a, b]] - expect).abs() < 1e-12,
"UᵀU not identity at ({a},{b}): {}",
g[[a, b]]
);
}
}
}
#[test]
pub(crate) fn from_frames_and_blocks_matches_dense_reference() {
let p = 4usize;
let basis_sizes = vec![2usize, 3];
let mut seed0 = Array2::<f64>::zeros((p, 2));
let mut seed1 = Array2::<f64>::zeros((p, 3));
for c in 0..p {
for a in 0..2 {
seed0[[c, a]] = ((c as f64) * 0.91 - (a as f64) * 0.5).cos() + 0.2 * (c as f64);
}
for a in 0..3 {
seed1[[c, a]] = ((c as f64) * 0.23 + (a as f64) * 1.7).sin() - 0.3 * (a as f64);
}
}
let u0 = mgs_orthonormalize(&seed0);
let u1 = mgs_orthonormalize(&seed1);
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4, -0.2], [0.4, 5.0, 0.6], [-0.2, 0.6, 1.5]];
let g01 = array![[0.2_f64, -0.1, 0.0], [0.3, 0.1, -0.2]];
let g10 = g01.t().to_owned();
let mut g_blocks: std::collections::BTreeMap<(usize, usize), Array2<f64>> =
std::collections::BTreeMap::new();
g_blocks.insert((0, 0), g00.clone());
g_blocks.insert((1, 1), g11.clone());
g_blocks.insert((0, 1), g01.clone());
g_blocks.insert((1, 0), g10.clone());
let frames = vec![Some(u0.clone()), Some(u1.clone())];
let op = FactoredFrameKroneckerOp::from_frames_and_blocks(&frames, &basis_sizes, p, &g_blocks)
.expect("from_frames_and_blocks");
assert_eq!(op.dim(), 13);
let ranks = vec![2usize, 3];
let w00 = frame_output_gram(u0.view(), u0.view());
let w11 = frame_output_gram(u1.view(), u1.view());
let w01 = frame_output_gram(u0.view(), u1.view());
let w10 = frame_output_gram(u1.view(), u0.view());
let ref_blocks = vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00,
w: w00,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11,
w: w11,
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01,
w: w01,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10,
w: w10,
},
];
let reference = factored_reference_dense(&ranks, &basis_sizes, &ref_blocks);
let dense = op.to_dense();
for i in 0..13 {
for j in 0..13 {
assert!(
(dense[[i, j]] - reference[[i, j]]).abs() < 1e-12,
"to_dense mismatch at ({i},{j}): {} vs {}",
dense[[i, j]],
reference[[i, j]]
);
}
}
let x: Vec<f64> = (0..13).map(|i| 0.17 * (i as f64) - 0.6).collect();
let mut y = vec![0.0_f64; 13];
op.matvec(&x, &mut y);
for i in 0..13 {
let mut expect = 0.0;
for j in 0..13 {
expect += reference[[i, j]] * x[j];
}
assert!(
(y[i] - expect).abs() < 1e-10,
"matvec mismatch at {i}: {} vs {expect}",
y[i]
);
}
}
#[test]
pub(crate) fn from_frames_and_blocks_mixed_framed_unframed() {
let p = 4usize;
let basis_sizes = vec![2usize, 2]; let mut seed0 = Array2::<f64>::zeros((p, 2));
for c in 0..p {
for a in 0..2 {
seed0[[c, a]] = ((c as f64) * 0.61 + (a as f64) * 0.9).cos() - 0.15 * (c as f64);
}
}
let u0 = mgs_orthonormalize(&seed0);
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4], [0.4, 5.0]];
let g01 = array![[0.2_f64, -0.1], [0.3, 0.1]];
let g10 = g01.t().to_owned();
let mut g_blocks: std::collections::BTreeMap<(usize, usize), Array2<f64>> =
std::collections::BTreeMap::new();
g_blocks.insert((0, 0), g00.clone());
g_blocks.insert((1, 1), g11.clone());
g_blocks.insert((0, 1), g01.clone());
g_blocks.insert((1, 0), g10.clone());
let frames = vec![Some(u0.clone()), None];
let op = FactoredFrameKroneckerOp::from_frames_and_blocks(&frames, &basis_sizes, p, &g_blocks)
.expect("from_frames_and_blocks mixed");
assert_eq!(op.ranks, vec![2usize, 4]);
assert_eq!(op.dim(), 12);
let dense = op.to_dense();
let off1 = 4usize;
for li in 0..2 {
for lj in 0..2 {
for a in 0..4 {
for b in 0..4 {
let gi = off1 + li * 4 + a;
let gj = off1 + lj * 4 + b;
let expect = if a == b { g11[[li, lj]] } else { 0.0 };
assert!(
(dense[[gi, gj]] - expect).abs() < 1e-12,
"g_11 ⊗ I_4 mismatch at ({gi},{gj}): {} vs {expect}",
dense[[gi, gj]]
);
}
}
}
}
let ranks = vec![2usize, 4];
let ident_p = Array2::<f64>::eye(p);
let w00 = frame_output_gram(u0.view(), u0.view());
let w11 = frame_output_gram(ident_p.view(), ident_p.view());
let w01 = frame_output_gram(u0.view(), ident_p.view());
let w10 = frame_output_gram(ident_p.view(), u0.view());
let ref_blocks = vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00,
w: w00,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11.clone(),
w: w11,
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01,
w: w01,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10,
w: w10,
},
];
let reference = factored_reference_dense(&ranks, &basis_sizes, &ref_blocks);
let x: Vec<f64> = (0..12).map(|i| 0.11 * (i as f64) - 0.4).collect();
let mut y = vec![0.0_f64; 12];
op.matvec(&x, &mut y);
for i in 0..12 {
let mut expect = 0.0;
for j in 0..12 {
expect += reference[[i, j]] * x[j];
}
assert!(
(y[i] - expect).abs() < 1e-10,
"mixed matvec mismatch at {i}: {} vs {expect}",
y[i]
);
}
}
#[test]
pub(crate) fn arrow_schur_matches_dense_reference_2x2() {
let n = 2;
let d = 2;
let k = 3;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[2.0_f64, 0.1], [0.1, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.0, 0.5], [0.2, 1.0, 0.0]];
sys.rows[0].gt = array![0.3_f64, -0.2];
sys.rows[1].htt = array![[1.5_f64, -0.1], [-0.1, 2.0]];
sys.rows[1].htbeta = array![[0.1_f64, 0.5, 0.0], [0.0, 0.3, 1.0]];
sys.rows[1].gt = array![-0.1_f64, 0.4];
sys.hbb = array![[4.0_f64, 0.2, 0.0], [0.2, 5.0, 0.1], [0.0, 0.1, 6.0],];
sys.gb = array![0.5_f64, -0.3, 0.2];
let (delta_t, delta_beta, _diag) = sys.solve(0.0, 0.0).expect("arrow-schur solve");
let streaming_options = ArrowSolveOptions::direct().with_streaming_chunk_size(Some(1));
let (delta_t_stream, delta_beta_stream, _diag_stream) = sys
.solve_with_options(0.0, 0.0, &streaming_options)
.expect("streaming arrow-schur solve");
assert_eq!(delta_beta, delta_beta_stream);
assert_eq!(delta_t, delta_t_stream);
let total = k + n * d;
let mut hjoint = Array2::<f64>::zeros((total, total));
let mut gjoint = Array1::<f64>::zeros(total);
for a in 0..k {
for b in 0..k {
hjoint[[a, b]] = sys.hbb[[a, b]];
}
gjoint[a] = sys.gb[a];
}
for i in 0..n {
let toff = k + i * d;
for a in 0..d {
for b in 0..d {
hjoint[[toff + a, toff + b]] = sys.rows[i].htt[[a, b]];
}
gjoint[toff + a] = sys.rows[i].gt[a];
for a2 in 0..k {
hjoint[[toff + a, a2]] = sys.rows[i].htbeta[[a, a2]];
hjoint[[a2, toff + a]] = sys.rows[i].htbeta[[a, a2]];
}
}
}
let lj = cholesky_lower(&hjoint).expect("dense ref PD");
let neg_g = gjoint.mapv(|v| -v);
let xref = cholesky_solve_vector(&lj, &neg_g);
for a in 0..k {
assert!(
(xref[a] - delta_beta[a]).abs() < 1e-10,
"β[{a}] mismatch: dense {} vs arrow {}",
xref[a],
delta_beta[a]
);
}
for i in 0..n {
for a in 0..d {
let dense = xref[k + i * d + a];
let arrow = delta_t[i * d + a];
assert!(
(dense - arrow).abs() < 1e-10,
"t[{i},{a}] mismatch: dense {dense} vs arrow {arrow}"
);
}
}
}
pub(crate) fn diagonal_arrow_fixture(row_min: f64, schur_min: f64) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(2, 2, 2);
sys.rows[0].htt = array![[row_min, 0.0], [0.0, row_min + 1.0]];
sys.rows[1].htt = array![[row_min + 2.0, 0.0], [0.0, row_min + 3.0]];
for row in sys.rows.iter_mut() {
row.htbeta.fill(0.0);
row.gt.fill(0.0);
}
sys.hbb = array![[schur_min, 0.0], [0.0, schur_min + 1.0]];
sys.gb.fill(0.0);
sys
}
pub(crate) fn diagonal_fixture_dense_lambda_min(sys: &ArrowSchurSystem) -> f64 {
let mut out = f64::INFINITY;
for row in &sys.rows {
for axis in 0..row.htt.nrows() {
out = out.min(row.htt[[axis, axis]]);
}
}
for axis in 0..sys.hbb.nrows() {
out = out.min(sys.hbb[[axis, axis]]);
}
out
}
#[test]
pub(crate) fn arrow_factor_min_pivot_matches_dense_lambda_min_ordering() {
let weak = diagonal_arrow_fixture(0.2, 0.8);
let strong = diagonal_arrow_fixture(0.7, 1.2);
let options = ArrowSolveOptions::direct();
let (_dt_w, _db_w, weak_cache) =
solve_arrow_newton_step_with_options(&weak, 0.0, 0.0, &options)
.expect("weak diagonal fixture should factor");
let (_dt_s, _db_s, strong_cache) =
solve_arrow_newton_step_with_options(&strong, 0.0, 0.0, &options)
.expect("strong diagonal fixture should factor");
let weak_lambda = diagonal_fixture_dense_lambda_min(&weak);
let strong_lambda = diagonal_fixture_dense_lambda_min(&strong);
assert!(weak_lambda < strong_lambda);
let weak_pivot = arrow_factor_min_pivot(&weak_cache)
.min_pivot
.expect("weak pivot");
let strong_pivot = arrow_factor_min_pivot(&strong_cache)
.min_pivot
.expect("strong pivot");
assert_abs_diff_eq!(weak_pivot, weak_lambda, epsilon = 1.0e-14);
assert_abs_diff_eq!(strong_pivot, strong_lambda, epsilon = 1.0e-14);
assert!(weak_pivot < strong_pivot);
}
pub(crate) fn quartic_counterexample_value(t: f64) -> f64 {
0.25 * t.powi(4) - t * t + 2.0 * t
}
pub(crate) fn quartic_counterexample_system(t: f64) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(1, 1, 0);
sys.rows[0].gt = array![t.powi(3) - 2.0 * t + 2.0];
sys.rows[0].htt = array![[3.0 * t * t - 2.0]];
sys
}
#[test]
pub(crate) fn proximal_correction_breaks_scalar_newton_cycle() {
let options = ArrowSolveOptions::direct();
let correction = ArrowProximalCorrectionOptions {
initial_ridge: 1e-8,
ridge_growth: 10.0,
max_attempts: 16,
armijo_c1: 1e-4,
gradient_tolerance: 1e-12,
convergence_objective_rel_tol: DEFAULT_PROXIMAL_CONVERGENCE_REL_TOL,
};
let mut t = 0.0_f64;
let mut previous_value = quartic_counterexample_value(t);
for _ in 0..32 {
let sys = quartic_counterexample_system(t);
let accepted = solve_arrow_newton_step_with_proximal_correction(
&sys,
0.0,
0.0,
previous_value,
&options,
&correction,
|delta_t, _delta_beta| quartic_counterexample_value(t + delta_t[0]),
)
.expect("proximal correction should accept a descent step");
assert!(
accepted.trial_objective_value <= previous_value,
"accepted step must not increase the objective"
);
t += accepted.delta_t[0];
previous_value = accepted.trial_objective_value;
}
let final_grad = t.powi(3) - 2.0 * t + 2.0;
assert!(
final_grad.abs() < 1e-7,
"corrected iteration should reach the scalar critical point; t={t}, g={final_grad}"
);
}
#[test]
pub(crate) fn factor_one_row_conditions_barely_pd_block_via_ridge() {
let d = 2;
let k = 2;
let mut row = ArrowRowBlock::new(d, k);
row.htt = array![[1.0_f64, 1.0], [1.0, 1.0 + 1e-14]];
row.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row.gt = array![0.0_f64, 0.0];
let factor = factor_one_row(&row, 0.0, d, 0, false).expect(
"barely-PD H_tt must be CONDITIONED by per-row ridge escalation, not rejected (gam#578)",
);
let kappa = cholesky_factor_kappa_estimate(&factor);
assert!(
kappa.is_finite() && kappa <= safe_spd_kappa_max(d),
"conditioned factor must be within the safe-inversion κ ceiling; got κ={kappa:e}"
);
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += factor[[i, kk]] * factor[[j, kk]];
}
if i == j {
assert!(
acc >= row.htt[[i, j]] - 1e-12,
"diagonal of L Lᵀ must be H_tt + (nonneg ridge) at ({i},{j}): \
{acc} vs {}",
row.htt[[i, j]]
);
} else {
assert!(
(acc - row.htt[[i, j]]).abs() < 1e-9,
"off-diagonal of L Lᵀ must equal H_tt at ({i},{j}): {acc} vs {}",
row.htt[[i, j]]
);
}
}
}
let factor = factor_one_row(&row, 0.0, d, 0, true)
.expect("tolerate_ill_conditioning must accept a barely-PD-but-PD block");
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += factor[[i, kk]] * factor[[j, kk]];
}
assert!(
(acc - row.htt[[i, j]]).abs() < 1e-12,
"tolerated factor must satisfy L Lᵀ = H_tt at ({i},{j})"
);
}
}
let mut row_npd = ArrowRowBlock::new(d, k);
row_npd.htt = array![[1.0_f64, 2.0], [2.0, 1.0]]; row_npd.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row_npd.gt = array![0.0_f64, 0.0];
let npd = factor_one_row(&row_npd, 0.0, d, 0, true);
assert!(
matches!(npd, Err(ArrowSchurError::PerRowFactorFailed { .. })),
"non-PD block must error even with tolerate_ill_conditioning; got {npd:?}"
);
let mut row_ok = ArrowRowBlock::new(d, k);
row_ok.htt = array![[2.0_f64, 0.1], [0.1, 3.0]];
row_ok.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row_ok.gt = array![0.0_f64, 0.0];
factor_one_row(&row_ok, 0.0, d, 0, false)
.expect("well-conditioned block must still factor at ridge_t=0");
let mut row_nan = ArrowRowBlock::new(d, k);
row_nan.htt = array![[f64::NAN, 0.0], [0.0, 1.0]];
row_nan.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row_nan.gt = array![0.0_f64, 0.0];
let nan = factor_one_row(&row_nan, 1.0e-6, d, 0, false);
assert!(
matches!(nan, Err(ArrowSchurError::PerRowFactorFailed { .. })),
"non-finite block must surface PerRowFactorFailed, not loop or condition; got {nan:?}"
);
}
#[test]
pub(crate) fn factor_one_row_conditions_scalar_tiny_pivot_via_ridge() {
let d = 1;
let k = 1;
let mut row = ArrowRowBlock::new(d, k);
row.htt = array![[1.0e-20_f64]];
row.htbeta = array![[1.0_f64]];
row.gt = array![0.0_f64];
let factor = factor_one_row(&row, 0.0, d, 0, false)
.expect("tiny positive scalar pivot must be ridge-conditioned");
let pivot = factor[[0, 0]] * factor[[0, 0]];
assert!(
pivot >= safe_spd_pivot_min(1.0),
"scalar pivot must be lifted above the absolute safe floor; got {pivot:e}"
);
assert!(
pivot > row.htt[[0, 0]],
"scalar block must not be accepted at the raw tiny pivot"
);
let tolerated = factor_one_row(&row, 0.0, d, 0, true)
.expect("tolerated log-det path must accept a positive scalar block");
let raw_pivot = tolerated[[0, 0]] * tolerated[[0, 0]];
assert!(
(raw_pivot - row.htt[[0, 0]]).abs() < 1.0e-30,
"tolerated factor must remain the raw scalar Cholesky"
);
}
#[test]
pub(crate) fn evidence_row_spectral_deflates_indefinite_non_gauge_block_at_unit_stiffness() {
let d = 3usize;
let k = 2usize;
let mut indef = ArrowRowBlock::new(d, k);
indef.htt = array![[4.0_f64, 0.0, 0.0], [0.0, 1.0e-10, 0.0], [0.0, 0.0, -1.0],];
indef.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0], [0.5, 0.5]];
indef.gt = array![0.0_f64, 0.0, 0.0];
let gauge_e1 = array![0.0_f64, 1.0, 0.0];
assert!(
factor_gauge_deflated_evidence_row(&indef, d, std::slice::from_ref(&gauge_e1)).is_none(),
"gauge deflation must NOT rescue a genuinely-indefinite non-gauge direction"
);
let spectral = factor_spectral_deflated_evidence_row(&indef, d)
.expect("spectral deflation must condition the indefinite non-gauge block");
assert_eq!(
spectral.gauge_deflated_directions, 2,
"the two sub-floor eigen-directions (−1.0 and 1e-10) must be unit-deflated"
);
let ls = &spectral.factor;
let mut recon = Array2::<f64>::zeros((d, d));
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += ls[[i, kk]] * ls[[j, kk]];
}
recon[[i, j]] = acc;
}
}
assert!(
(recon[[0, 0]] - 4.0).abs() < 1.0e-9,
"genuine direction e_0 must be preserved exactly; got {}",
recon[[0, 0]]
);
assert!(
(recon[[2, 2]] - 1.0).abs() < 1.0e-9,
"the genuinely-indefinite direction e_2 must be deflated to unit \
stiffness +1 (log 1 = 0, ρ-independent), NOT ridge-damped; got {}",
recon[[2, 2]]
);
let factored = factor_one_row_result(&indef, 0.0, d, 0, true, std::slice::from_ref(&gauge_e1))
.expect("undamped evidence factor must condition the indefinite block by deflation");
for a in 0..d {
assert!(
factored.factor[[a, a]].is_finite() && factored.factor[[a, a]] > 0.0,
"deflated evidence factor must have a finite positive pivot at {a}; got {}",
factored.factor[[a, a]]
);
}
let mut pd = ArrowRowBlock::new(d, k);
pd.htt = array![[4.0_f64, 0.0, 0.0], [0.0, 1.0e-10, 0.0], [0.0, 0.0, 2.0],];
pd.htbeta = indef.htbeta.clone();
pd.gt = array![0.0_f64, 0.0, 0.0];
let result = factor_one_row_result(&pd, 0.0, d, 0, true, std::slice::from_ref(&gauge_e1))
.expect("undamped evidence factor must succeed on the genuinely-PD stationary block");
assert_eq!(
result.gauge_deflated_directions, 1,
"exactly the single near-null gauge direction must be deflated"
);
let l = &result.factor;
let mut reconstructed = Array2::<f64>::zeros((d, d));
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += l[[i, kk]] * l[[j, kk]];
}
reconstructed[[i, j]] = acc;
}
}
assert!(
(reconstructed[[0, 0]] - 4.0).abs() < 1.0e-12,
"stationary factor must be the EXACT Cholesky on the genuine direction e_0; got {}",
reconstructed[[0, 0]]
);
assert!(
(reconstructed[[2, 2]] - 2.0).abs() < 1.0e-12,
"stationary factor must be the EXACT Cholesky on the genuine direction e_2; got {}",
reconstructed[[2, 2]]
);
assert!(
(reconstructed[[1, 1]] - (1.0 + 1.0e-10)).abs() < 1.0e-9,
"deflated gauge direction must carry exactly the +1 unit stiffness; got {}",
reconstructed[[1, 1]]
);
}
#[test]
pub(crate) fn evidence_row_spectral_deflation_count_is_stable_across_the_cutoff() {
let d = 3usize;
let k = 1usize;
let floor = SPECTRAL_DEFLATION_REL_FLOOR * 4.0;
let near_floor_lo = floor * 0.995; let near_floor_hi = floor * 1.05;
let mut block_lo = ArrowRowBlock::new(d, k);
block_lo.htt = array![
[4.0_f64, 0.0, 0.0],
[0.0, near_floor_lo, 0.0],
[0.0, 0.0, -1.0],
];
block_lo.htbeta = array![[1.0_f64], [0.0], [0.5]];
block_lo.gt = array![0.0_f64, 0.0, 0.0];
let mut block_hi = block_lo.clone();
block_hi.htt[[1, 1]] = near_floor_hi;
let lo = factor_spectral_deflated_evidence_row(&block_lo, d)
.expect("indefinite block must spectrally deflate (lo iterate)");
let hi = factor_spectral_deflated_evidence_row(&block_hi, d)
.expect("indefinite block must spectrally deflate (hi iterate)");
assert_eq!(
lo.gauge_deflated_directions, 1,
"lo iterate: only the genuine indefinite direction is deflated"
);
assert_eq!(
hi.gauge_deflated_directions, lo.gauge_deflated_directions,
"deflation count must be STABLE across an eigenvalue straddling the \
bare cutoff — the quotient-dimension guard must not trip mid-walk"
);
let bare_count = |lambda: f64| -> usize {
let mut c = 0usize;
for &l in &[4.0_f64, lambda, -1.0] {
if !(l.is_finite() && l > floor) {
c += 1;
}
}
c
};
assert_ne!(
bare_count(near_floor_lo),
bare_count(near_floor_hi),
"test must straddle the bare cutoff (else it proves nothing): the \
un-banded decision flips the count, the banded one does not"
);
}
#[test]
pub(crate) fn evidence_dense_schur_deflates_indefinite_complement_at_unit_stiffness() {
let schur = array![[4.0_f64, 0.0, 0.0], [0.0, -0.5, 0.0], [0.0, 0.0, 2.0],];
assert!(
cholesky_lower(&schur).is_err(),
"an indefinite Schur complement must be refused by the plain Cholesky"
);
let factor = factor_spectral_deflated_evidence_dense(&schur)
.expect("indefinite Schur complement must spectrally deflate to a PD factor");
let d = 3usize;
let mut reconstructed = Array2::<f64>::zeros((d, d));
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += factor[[i, kk]] * factor[[j, kk]];
}
reconstructed[[i, j]] = acc;
}
}
assert!(
(reconstructed[[0, 0]] - 4.0).abs() < 1.0e-9,
"genuine positive direction e_0 must be exact; got {}",
reconstructed[[0, 0]]
);
assert!(
(reconstructed[[2, 2]] - 2.0).abs() < 1.0e-9,
"genuine positive direction e_2 must be exact; got {}",
reconstructed[[2, 2]]
);
assert!(
(reconstructed[[1, 1]] - 1.0).abs() < 1.0e-9,
"deflated negative direction must carry exactly the +1 unit stiffness; got {}",
reconstructed[[1, 1]]
);
}
#[test]
pub(crate) fn sys_htbeta_materialize_row_sums_operator_and_dense_slab() {
let mut sys = ArrowSchurSystem::new(1, 1, 3);
sys.rows[0].htbeta = array![[0.25_f64, 0.5, 0.75]];
sys.activate_dense_htbeta_supplement();
sys.set_row_htbeta_operator(
|row_idx, x, out| {
assert_eq!(row_idx, 0);
out[0] += 2.0 * x[0] - x[1] + 0.5 * x[2];
},
|row_idx, v, out| {
assert_eq!(row_idx, 0);
out[0] += 2.0 * v[0];
out[1] -= v[0];
out[2] += 0.5 * v[0];
},
);
let htbeta = sys_htbeta_materialize_row(&sys, 0, &sys.rows[0]).unwrap();
assert_eq!(htbeta, array![[2.25_f64, -0.5, 1.25]]);
}
#[test]
pub(crate) fn lm_escalation_recovers_from_ill_conditioned_row() {
let n = 1;
let d = 2;
let k = 2;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[1.0_f64, 1.0], [1.0, 1.0 + 1e-14]];
sys.rows[0].htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
sys.rows[0].gt = array![0.1_f64, -0.2];
sys.hbb = array![[4.0_f64, 0.2], [0.2, 5.0]];
sys.gb = array![0.3_f64, -0.1];
let factor = factor_one_row(&sys.rows[0], 0.0, d, 0, false)
.expect("barely-PD row must be conditioned, not rejected (gam#578)");
let kappa = cholesky_factor_kappa_estimate(&factor);
assert!(
kappa.is_finite() && kappa <= safe_spd_kappa_max(d),
"conditioned per-row factor must satisfy the κ ceiling; got κ={kappa:e}"
);
let options = ArrowSolveOptions::direct();
let (delta_t, delta_beta, diag) = solve_with_lm_escalation_inner(&sys, 0.0, 0.0, &options)
.expect("LM escalation must recover from a barely-PD per-row block");
for v in delta_t.iter().chain(delta_beta.iter()) {
assert!(v.is_finite(), "recovered step must be finite: {v}");
}
assert!(
diag.ridge_escalations <= DEFAULT_PROXIMAL_MAX_ATTEMPTS,
"recovery must use a bounded number of outer ridge escalations; got {}",
diag.ridge_escalations
);
}
#[test]
pub(crate) fn latent_block_inverse_diagonal_matches_dense() {
let n = 3usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
for row in sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.0_f64, 0.0];
let options = ArrowSolveOptions::direct();
let (_delta_t, _delta_beta, cache) =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("direct arrow solve should factor this SPD system");
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = sys.rows[i].htt[[r, c]];
}
}
for r in 0..d {
for c in 0..k {
let v = sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = sys.hbb[[r, c]];
}
}
let l = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let h_inv = cholesky_solve_matrix(&l, &Array2::<f64>::eye(dim));
let diag = cache
.latent_block_inverse_diagonal()
.expect("dense Schur cache must support the selected-inverse diagonal");
assert_eq!(diag.len(), n * d);
for i in 0..n {
for j in 0..d {
let idx = i * d + j; let expected = h_inv[[idx, idx]];
let got = diag[idx];
assert!(
(got - expected).abs() < 1e-9,
"row {i} axis {j}: selected-inverse diag {got} vs dense {expected}"
);
}
}
let trace_selected: f64 = diag.iter().sum();
let trace_dense: f64 = (0..n * d).map(|idx| h_inv[[idx, idx]]).sum();
assert!(
(trace_selected - trace_dense).abs() < 1e-9,
"full latent trace {trace_selected} vs dense {trace_dense}"
);
}
#[test]
pub(crate) fn full_inverse_apply_matches_dense_inverse_and_newton_step() {
let n = 3usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[0].gt = array![0.4_f64, -0.7];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[1].gt = array![-0.2_f64, 0.9];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
sys.rows[2].gt = array![1.1_f64, 0.3];
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.5_f64, -0.8];
let options = ArrowSolveOptions::direct();
let (delta_t, delta_beta, cache) =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("direct arrow solve should factor this SPD system");
let mut g_t = Array1::<f64>::zeros(n * d);
for i in 0..n {
for j in 0..d {
g_t[i * d + j] = sys.rows[i].gt[j];
}
}
let (u_t, u_beta) = cache
.full_inverse_apply(g_t.view(), sys.gb.view())
.expect("full_inverse_apply on the ridge-0 Direct cache");
for idx in 0..n * d {
assert!(
(u_t[idx] + delta_t[idx]).abs() < 1e-10,
"t[{idx}]: full_inverse_apply {} vs −(Newton step) {}",
u_t[idx],
-delta_t[idx]
);
}
for c in 0..k {
assert!(
(u_beta[c] + delta_beta[c]).abs() < 1e-10,
"beta[{c}]: full_inverse_apply {} vs −(Newton step) {}",
u_beta[c],
-delta_beta[c]
);
}
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = sys.rows[i].htt[[r, c]];
}
for c in 0..k {
let v = sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = sys.hbb[[r, c]];
}
}
let l = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let mut w_full = Array1::<f64>::zeros(dim);
for (idx, v) in w_full.iter_mut().enumerate() {
*v = 0.3 + 0.17 * (idx as f64) * (if idx % 2 == 0 { 1.0 } else { -1.0 });
}
let dense_u = cholesky_solve_vector(&l, &w_full);
let (u_t2, u_beta2) = cache
.full_inverse_apply(
w_full.slice(ndarray::s![..n * d]),
w_full.slice(ndarray::s![n * d..]),
)
.expect("full_inverse_apply on arbitrary RHS");
for idx in 0..n * d {
assert!(
(u_t2[idx] - dense_u[idx]).abs() < 1e-10,
"t[{idx}]: full_inverse_apply {} vs dense {}",
u_t2[idx],
dense_u[idx]
);
}
for c in 0..k {
assert!(
(u_beta2[c] - dense_u[n * d + c]).abs() < 1e-10,
"beta[{c}]: full_inverse_apply {} vs dense {}",
u_beta2[c],
dense_u[n * d + c]
);
}
}
#[test]
pub(crate) fn schur_inverse_beta_block_matches_dense() {
let n = 3usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
for row in sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.0_f64, 0.0];
let options = ArrowSolveOptions::direct();
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("direct arrow solve should factor this SPD system");
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = sys.rows[i].htt[[r, c]];
}
}
for r in 0..d {
for c in 0..k {
let v = sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = sys.hbb[[r, c]];
}
}
let l = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let h_inv = cholesky_solve_matrix(&l, &Array2::<f64>::eye(dim));
let beta_off = n * d;
for col in 0..k {
let mut e = Array1::<f64>::zeros(k);
e[col] = 1.0;
let x = cache
.schur_inverse_apply(e.view())
.expect("dense Schur cache must support schur_inverse_apply");
for r in 0..k {
let expected = h_inv[[beta_off + r, beta_off + col]];
assert!(
(x[r] - expected).abs() < 1e-9,
"S_β⁻¹[{r},{col}] {} vs dense {expected}",
x[r]
);
}
}
let a_scalar = 0.75_f64;
let mut trace = 0.0_f64;
for col in 0..k {
let mut m_col = Array1::<f64>::zeros(k);
m_col[col] = a_scalar;
let z = cache
.schur_inverse_apply(m_col.view())
.expect("schur_inverse_apply");
trace += z[col];
}
let trace_dense: f64 = a_scalar
* (0..k)
.map(|j| h_inv[[beta_off + j, beta_off + j]])
.sum::<f64>();
assert!(
(trace - trace_dense).abs() < 1e-9,
"Kron-block trace {trace} vs dense {trace_dense}"
);
let full = cache
.schur_inverse_block(0..k)
.expect("dense Schur cache must support schur_inverse_block");
assert_eq!(full.dim(), (k, k));
for r in 0..k {
for c in 0..k {
let expected = h_inv[[beta_off + r, beta_off + c]];
assert!(
(full[[r, c]] - expected).abs() < 1e-9,
"block[{r},{c}] {} vs dense {expected}",
full[[r, c]]
);
assert!(
(full[[r, c]] - full[[c, r]]).abs() < 1e-12,
"schur_inverse_block must be symmetric at [{r},{c}]"
);
}
}
let sub = cache
.schur_inverse_block(1..k)
.expect("interior block must be supported");
assert_eq!(sub.dim(), (k - 1, k - 1));
assert!(
(sub[[0, 0]] - h_inv[[beta_off + 1, beta_off + 1]]).abs() < 1e-9,
"interior block [1,1] {} vs dense {}",
sub[[0, 0]],
h_inv[[beta_off + 1, beta_off + 1]]
);
assert!(cache.schur_inverse_block(0..(k + 1)).is_err());
}
#[test]
pub(crate) fn ill_conditioning_tolerated_returns_cache_with_exact_logdet() {
let n = 2usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[1.0_f64, 0.0], [0.0, 1e-9]];
sys.rows[0].htbeta = array![[0.3_f64, 0.1], [0.05, 0.2]];
sys.rows[1].htt = array![[2.0_f64, 0.0], [0.0, 2e-9]];
sys.rows[1].htbeta = array![[0.2_f64, -0.1], [0.1, 0.15]];
for row in sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
sys.hbb = array![[5.0_f64, 0.3], [0.3, 4.0]];
sys.gb = array![0.0_f64, 0.0];
for i in 0..n {
let factor = factor_one_row(&sys.rows[i], 0.0, d, i, false)
.expect("barely-PD row must be conditioned, not rejected (gam#578)");
let kappa = cholesky_factor_kappa_estimate(&factor);
assert!(
kappa.is_finite() && kappa <= safe_spd_kappa_max(d),
"conditioned per-row factor {i} must satisfy the safe-Schur κ ceiling; got κ={kappa:e}"
);
}
let single_shot =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &ArrowSolveOptions::direct());
assert!(
matches!(
single_shot,
Err(ArrowSchurError::SchurFactorFailed { .. })
| Err(ArrowSchurError::PerRowFactorIllConditioned { .. })
| Err(ArrowSchurError::PcgFailed { .. })
),
"single-shot strict direct() cannot keep the dense Schur PD with per-row \
conditioning alone; expected a recoverable factorization error, got {single_shot:?}"
);
let (strict_dt, strict_db, strict_diag) =
solve_with_lm_escalation_inner(&sys, 0.0, 0.0, &ArrowSolveOptions::direct())
.expect("LM escalation must recover the ill-conditioned strict solve (gam#845)");
for v in strict_dt.iter().chain(strict_db.iter()) {
assert!(v.is_finite(), "recovered strict step must be finite: {v}");
}
assert!(
strict_diag.ridge_escalations <= DEFAULT_PROXIMAL_MAX_ATTEMPTS,
"recovery must use a bounded number of outer ridge escalations; got {}",
strict_diag.ridge_escalations
);
let opts = ArrowSolveOptions::direct().with_ill_conditioning_tolerated();
let tolerate_indefinite = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &opts);
assert!(
matches!(
tolerate_indefinite,
Err(ArrowSchurError::SchurFactorFailed { .. })
),
"tolerate mode must refuse the indefinite assembled H rather than fabricate \
a log-determinant; got {tolerate_indefinite:?}"
);
let mut pd_sys = ArrowSchurSystem::new(n, d, k);
pd_sys.rows[0].htt = array![[1.0_f64, 0.0], [0.0, 1e-9]];
pd_sys.rows[0].htbeta = array![[0.3_f64, 0.1], [3e-6, 1e-6]];
pd_sys.rows[1].htt = array![[2.0_f64, 0.0], [0.0, 2e-9]];
pd_sys.rows[1].htbeta = array![[0.2_f64, -0.1], [2e-6, 4e-6]];
for row in pd_sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
pd_sys.hbb = array![[5.0_f64, 0.3], [0.3, 4.0]];
pd_sys.gb = array![0.0_f64, 0.0];
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&pd_sys, 0.0, 0.0, &opts)
.expect("tolerate mode must factor the ill-conditioned-but-PD system");
let (log_det_tt, log_det_schur) = cache.arrow_log_det();
let log_det_cache = log_det_tt + log_det_schur.expect("dense Schur factor present");
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = pd_sys.rows[i].htt[[r, c]];
}
}
for r in 0..d {
for c in 0..k {
let v = pd_sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = pd_sys.hbb[[r, c]];
}
}
let lh = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let log_det_dense: f64 = 2.0 * (0..dim).map(|i| lh[[i, i]].ln()).sum::<f64>();
assert!(
(log_det_cache - log_det_dense).abs() < 1e-6,
"tolerated-cache log|H| {log_det_cache} vs dense {log_det_dense}"
);
let tdiag = cache
.latent_block_inverse_diagonal()
.expect("tolerated cache must support latent_block_inverse_diagonal");
assert_eq!(tdiag.len(), n * d);
assert!(tdiag.iter().all(|v| v.is_finite()));
}
#[test]
pub(crate) fn arrow_factor_slab_accessor_matches_array_blocks_bitwise() {
let blocks = vec![
array![[1.0_f64]],
array![[2.0_f64, 0.0], [0.25, 3.0]],
array![[4.0_f64, 0.0, 0.0], [0.5, 5.0, 0.0], [-0.25, 0.75, 6.0]],
];
let slab = ArrowFactorSlab::from_blocks(blocks.clone());
assert_eq!(slab.len(), blocks.len());
for row in 0..blocks.len() {
let view = slab.factor(row);
assert_eq!(view.dim(), blocks[row].dim());
for r in 0..blocks[row].nrows() {
for c in 0..blocks[row].ncols() {
assert_eq!(view[[r, c]].to_bits(), blocks[row][[r, c]].to_bits());
}
}
}
}
pub(crate) fn fixed_row_kernel_fixture<const D: usize>() -> (ArrowRowBlock, Array1<f64>) {
let mut row = ArrowRowBlock::new(D, 0);
for r in 0..D {
for c in 0..D {
row.htt[[r, c]] = if r == c {
4.0 + r as f64
} else {
0.03125 * ((r + c + 1) as f64)
};
}
}
let rhs = Array1::from_iter((0..D).map(|i| 0.5 + i as f64 * 0.25));
(row, rhs)
}
pub(crate) fn assert_fixed_row_kernels_match_dynamic<const D: usize>() -> usize {
let (row, rhs) = fixed_row_kernel_fixture::<D>();
let ridge = 0.125_f64;
let fixed = factor_row_block_cholesky_fixed::<D>(&row, ridge).expect("fixed factor");
let dynamic = factor_row_block_cholesky_dynamic(&row, ridge, D).expect("dynamic factor");
for r in 0..D {
for c in 0..D {
assert_eq!(
fixed[[r, c]].to_bits(),
dynamic[[r, c]].to_bits(),
"factor mismatch at D={D} ({r},{c})"
);
}
}
let fixed_solve = cholesky_solve_vector_fixed::<D>(fixed.view(), rhs.view());
let dynamic_solve = cholesky_solve_vector(dynamic.view(), rhs.view());
for i in 0..D {
assert_eq!(
fixed_solve[i].to_bits(),
dynamic_solve[i].to_bits(),
"solve mismatch at D={D} index {i}"
);
}
D
}
#[test]
pub(crate) fn fixed_row_kernels_match_dynamic_path_bitwise() {
let checked = assert_fixed_row_kernels_match_dynamic::<1>()
+ assert_fixed_row_kernels_match_dynamic::<2>()
+ assert_fixed_row_kernels_match_dynamic::<3>()
+ assert_fixed_row_kernels_match_dynamic::<4>();
assert_eq!(checked, 10);
}
pub(crate) fn dense_direct_system(n: usize, d: usize, k: usize) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(n, d, k);
for (i, row) in sys.rows.iter_mut().enumerate() {
for r in 0..d {
for c in 0..d {
row.htt[[r, c]] = if r == c { 4.0 + (i % 3) as f64 } else { 0.1 };
}
row.gt[r] = 0.05 * ((i + r + 1) as f64).sin();
for c in 0..k {
row.htbeta[[r, c]] = 0.01 * (((i + 1) * (c + 1)) as f64).cos();
}
}
}
for r in 0..k {
sys.gb[r] = 0.02 * ((r + 1) as f64).cos();
for c in 0..k {
sys.hbb[[r, c]] = if r == c { 6.0 } else { 0.0 };
}
}
sys.refresh_row_hessian_fingerprint();
sys
}
#[test]
pub(crate) fn device_dispatch_predicate_gates_on_work_not_rows() {
let policy = crate::gpu::policy::GpuDispatchPolicy::default();
assert!(!policy.dense_hessian_work_target_is_gpu(300, 8));
assert!(policy.dense_hessian_work_target_is_gpu(2_000, 4_096));
}
#[test]
pub(crate) fn matvec_gate_engages_for_llm_shape_off_for_tiny() {
let policy = crate::gpu::policy::GpuDispatchPolicy::default();
let options = ArrowSolveOptions::inexact_pcg();
let cg_iters = options
.pcg
.max_iterations
.min(options.trust_region.max_iterations);
assert!(cg_iters > 0);
let (n_llm, k_llm, d_llm) = (2_000_usize, 2_048_usize, 8_usize);
assert!(policy.reduced_schur_matvec_should_offload(n_llm, k_llm, d_llm, cg_iters));
assert!(!policy.reduced_schur_matvec_should_offload(30, 8, 2, cg_iters));
assert!(!policy.reduced_schur_matvec_should_offload(300, 8, 4, cg_iters));
}
#[test]
pub(crate) fn device_seam_declines_without_gpu_and_matches_cpu() {
if crate::gpu::runtime::GpuRuntime::global().is_some() {
return;
}
let sys = dense_direct_system(6, 2, 4);
let options = ArrowSolveOptions::direct();
assert!(try_device_arrow_direct(&sys, 0.0, 0.0, &options).is_none());
assert!(maybe_inject_gpu_schur_matvec(&sys, 0.0, 0.0, &options).is_none());
let (dt_core, db_core, diag) =
solve_arrow_newton_step_core(&sys, 0.0, 0.0, &options).expect("core solve");
assert!(
!diag.used_device_arrow,
"no device present, so the solve must not be flagged device-served"
);
let artifacts =
solve_arrow_newton_step_artifacts(&sys, 0.0, 0.0, &options).expect("artifacts solve");
for (a, b) in dt_core.iter().zip(artifacts.delta_t.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "Δt must be bit-identical to CPU");
}
for (a, b) in db_core.iter().zip(artifacts.delta_beta.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "Δβ must be bit-identical to CPU");
}
}
#[test]
pub(crate) fn streaming_mixed_precision_matches_f64_and_keeps_logdet_f64() {
let sys = dense_direct_system(40, 3, 6);
let f64_options = ArrowSolveOptions::direct().with_streaming_chunk_size(Some(8));
let mp_options = f64_options
.clone()
.with_mixed_precision_policy(MixedPrecisionPolicy::certified());
assert!(matches!(
f64_options.mixed_precision,
MixedPrecisionPolicy::Off
));
let mut s_f64 = StreamingArrowSchur::from_system(&sys, 8);
let (_, db_f64, _) = s_f64
.solve(0.0, 0.0, &f64_options)
.expect("f64 streaming solve");
let mut s_mp = StreamingArrowSchur::from_system(&sys, 8);
let (_, db_mp, _) = s_mp
.solve(0.0, 0.0, &mp_options)
.expect("mp streaming solve");
let mut max_abs = 0.0_f64;
for (a, b) in db_f64.iter().zip(db_mp.iter()) {
max_abs = max_abs.max((a - b).abs());
}
assert!(
max_abs < 1e-7,
"mixed-precision Δβ deviates from f64 by {max_abs:e}, above the certified tolerance"
);
let mut ld_f64 = StreamingArrowSchur::from_system(&sys, 8);
let logdet_f64 = ld_f64
.exact_arrow_log_det(0.0, 0.0, &f64_options)
.expect("f64 logdet");
let mut ld_mp = StreamingArrowSchur::from_system(&sys, 8);
let logdet_mp = ld_mp
.exact_arrow_log_det(0.0, 0.0, &mp_options)
.expect("mp logdet");
assert_eq!(
logdet_f64.to_bits(),
logdet_mp.to_bits(),
"evidence log|H| must stay bit-for-bit f64 under the mixed-precision policy"
);
}
#[test]
pub(crate) fn streaming_mixed_precision_default_upgrades_only_off() {
let off = ArrowSolveOptions::direct();
assert!(matches!(
off.with_streaming_mixed_precision_default().mixed_precision,
MixedPrecisionPolicy::Certified { .. }
));
let pinned = ArrowSolveOptions::direct().with_mixed_precision_policy(MixedPrecisionPolicy::Off);
let custom =
ArrowSolveOptions::direct().with_mixed_precision_policy(MixedPrecisionPolicy::Certified {
max_refinement_steps: 1,
residual_relative_tolerance: 1e-6,
kappa_unit_roundoff_margin: 0.25,
});
match custom
.with_streaming_mixed_precision_default()
.mixed_precision
{
MixedPrecisionPolicy::Certified {
max_refinement_steps,
..
} => assert_eq!(max_refinement_steps, 1, "explicit policy preserved"),
MixedPrecisionPolicy::Off => panic!("explicit Certified must not be downgraded"),
}
assert!(matches!(pinned.mixed_precision, MixedPrecisionPolicy::Off));
}
pub(crate) fn build_ibp_woodbury_fixture() -> (ArrowSchurSystem, IbpCrossRowSource, Vec<Vec<f64>>) {
let n = 3usize;
let d = 2usize;
let k_beta = 2usize;
let r = 2usize; let mut sys = ArrowSchurSystem::new(n, d, k_beta);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[0].gt = array![0.4_f64, -0.7];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[1].gt = array![-0.2_f64, 0.9];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
sys.rows[2].gt = array![1.1_f64, 0.3];
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.5_f64, -0.8];
let d_coef = array![0.6_f64, -0.35];
let zprime = vec![
vec![0.9_f64, 0.4], vec![-0.5, 0.8], vec![0.7, -0.6], ];
let mut entries = Vec::new();
for i in 0..n {
for k in 0..r {
let g = sys.row_offsets[i] + k;
entries.push((g, k, zprime[i][k]));
}
}
for i in 0..n {
for k in 0..r {
sys.rows[i].htt[[k, k]] += d_coef[k] * zprime[i][k] * zprime[i][k];
}
}
let source = IbpCrossRowSource {
r,
d: d_coef,
entries,
};
(sys, source, zprime)
}
pub(crate) fn dense_h_full(
sys: &ArrowSchurSystem,
source: &IbpCrossRowSource,
zprime: &[Vec<f64>],
) -> Array2<f64> {
let n = sys.rows.len();
let d = sys.d;
let k_beta = sys.k;
let dim = n * d + k_beta;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for rr in 0..d {
for cc in 0..d {
h[[base + rr, base + cc]] = sys.rows[i].htt[[rr, cc]];
}
for cc in 0..k_beta {
let v = sys.rows[i].htbeta[[rr, cc]];
h[[base + rr, n * d + cc]] = v;
h[[n * d + cc, base + rr]] = v;
}
}
}
for rr in 0..k_beta {
for cc in 0..k_beta {
h[[n * d + rr, n * d + cc]] = sys.hbb[[rr, cc]];
}
}
for k in 0..source.r {
let dk = source.d[k];
for i in 0..n {
for j in 0..n {
if i == j {
continue;
}
let gi = i * d + k;
let gj = j * d + k;
h[[gi, gj]] += dk * zprime[i][k] * zprime[j][k];
}
}
}
h
}
#[test]
pub(crate) fn ibp_cross_row_woodbury_logdet_matches_dense() {
let (mut sys, source, zprime) = build_ibp_woodbury_fixture();
sys.set_ibp_cross_row_source(source.clone());
let options = ArrowSolveOptions::direct();
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("IBP Woodbury cache should factor");
assert!(
cache.cross_row_woodbury.is_some(),
"the cache must carry the cross-row Woodbury"
);
let h_full = dense_h_full(&sys, &source, &zprime);
let l = cholesky_lower(&h_full).expect("H_full must be SPD for this fixture");
let mut dense_logdet = 0.0_f64;
for i in 0..l.nrows() {
dense_logdet += 2.0 * l[[i, i]].ln();
}
let (tt, schur) = cache.arrow_log_det();
let cache_logdet = tt + schur.expect("direct mode has a Schur factor");
assert!(
(cache_logdet - dense_logdet).abs() < 1e-9,
"cache log det H_full {cache_logdet} vs dense {dense_logdet}"
);
let mut h0prime = h_full.clone();
for k in 0..source.r {
for i in 0..sys.rows.len() {
let gi = i * sys.d + k;
for j in 0..sys.rows.len() {
let gj = j * sys.d + k;
h0prime[[gi, gj]] -= source.d[k] * zprime[i][k] * zprime[j][k];
}
}
}
let l0 = cholesky_lower(&h0prime).expect("H₀' SPD");
let mut logdet_h0prime = 0.0_f64;
for i in 0..l0.nrows() {
logdet_h0prime += 2.0 * l0[[i, i]].ln();
}
let correction = cache.cross_row_woodbury_log_det();
assert!(
(correction - (dense_logdet - logdet_h0prime)).abs() < 1e-9,
"Woodbury log det correction {correction} vs (logdet H_full − logdet H₀') {}",
dense_logdet - logdet_h0prime
);
}
#[test]
pub(crate) fn ibp_cross_row_woodbury_full_inverse_and_newton_match_dense() {
let (mut sys, source, zprime) = build_ibp_woodbury_fixture();
sys.set_ibp_cross_row_source(source.clone());
let options = ArrowSolveOptions::direct();
let (delta_t, delta_beta, cache) =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("IBP Woodbury cache should factor");
let n = sys.rows.len();
let d = sys.d;
let k_beta = sys.k;
let dim = n * d + k_beta;
let h_full = dense_h_full(&sys, &source, &zprime);
let l = cholesky_lower(&h_full).expect("H_full SPD");
let mut g = Array1::<f64>::zeros(dim);
for i in 0..n {
for j in 0..d {
g[i * d + j] = sys.rows[i].gt[j];
}
}
for c in 0..k_beta {
g[n * d + c] = sys.gb[c];
}
let dense_step = cholesky_solve_vector(&l, &g); for idx in 0..n * d {
assert!(
(delta_t[idx] + dense_step[idx]).abs() < 1e-9,
"Δt[{idx}] {} vs −H_full⁻¹g {}",
delta_t[idx],
-dense_step[idx]
);
}
for c in 0..k_beta {
assert!(
(delta_beta[c] + dense_step[n * d + c]).abs() < 1e-9,
"Δβ[{c}] {} vs −H_full⁻¹g {}",
delta_beta[c],
-dense_step[n * d + c]
);
}
let mut w_full = Array1::<f64>::zeros(dim);
for (idx, v) in w_full.iter_mut().enumerate() {
*v = 0.25 + 0.13 * (idx as f64) * (if idx % 2 == 0 { 1.0 } else { -1.0 });
}
let dense_u = cholesky_solve_vector(&l, &w_full);
let (u_t, u_beta) = cache
.full_inverse_apply(
w_full.slice(ndarray::s![..n * d]),
w_full.slice(ndarray::s![n * d..]),
)
.expect("full_inverse_apply on the Woodbury cache");
for idx in 0..n * d {
assert!(
(u_t[idx] - dense_u[idx]).abs() < 1e-9,
"H_full⁻¹w t[{idx}] {} vs dense {}",
u_t[idx],
dense_u[idx]
);
}
for c in 0..k_beta {
assert!(
(u_beta[c] - dense_u[n * d + c]).abs() < 1e-9,
"H_full⁻¹w beta[{c}] {} vs dense {}",
u_beta[c],
dense_u[n * d + c]
);
}
let mut h_full_inv = Array2::<f64>::zeros((dim, dim));
let mut e = Array1::<f64>::zeros(dim);
for col in 0..dim {
e.fill(0.0);
e[col] = 1.0;
let sol = cholesky_solve_vector(&l, &e);
for rrow in 0..dim {
h_full_inv[[rrow, col]] = sol[rrow];
}
}
let diag = cache
.latent_block_inverse_diagonal()
.expect("latent_block_inverse_diagonal on the Woodbury cache");
for idx in 0..n * d {
assert!(
(diag[idx] - h_full_inv[[idx, idx]]).abs() < 1e-9,
"diag (H_full⁻¹)_tt[{idx}] {} vs dense {}",
diag[idx],
h_full_inv[[idx, idx]]
);
}
}
#[test]
pub(crate) fn ibp_cross_row_woodbury_absent_is_strict_noop() {
let (sys, _source, zprime) = build_ibp_woodbury_fixture();
let options = ArrowSolveOptions::direct();
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("bare cache should factor");
assert!(
cache.cross_row_woodbury.is_none(),
"no source ⇒ no Woodbury carrier"
);
assert_eq!(
cache.cross_row_woodbury_log_det(),
0.0,
"absent Woodbury contributes exactly zero to the log-determinant"
);
let dim = sys.rows.len() * sys.d + sys.k;
let mut h0 = Array2::<f64>::zeros((dim, dim));
let n = sys.rows.len();
let d = sys.d;
for i in 0..n {
let base = i * d;
for rr in 0..d {
for cc in 0..d {
h0[[base + rr, base + cc]] = sys.rows[i].htt[[rr, cc]];
}
for cc in 0..sys.k {
let v = sys.rows[i].htbeta[[rr, cc]];
h0[[base + rr, n * d + cc]] = v;
h0[[n * d + cc, base + rr]] = v;
}
}
}
for rr in 0..sys.k {
for cc in 0..sys.k {
h0[[n * d + rr, n * d + cc]] = sys.hbb[[rr, cc]];
}
}
let l = cholesky_lower(&h0).expect("H₀ SPD");
let mut dense_logdet = 0.0_f64;
for i in 0..l.nrows() {
dense_logdet += 2.0 * l[[i, i]].ln();
}
let (tt, schur) = cache.arrow_log_det();
let cache_logdet = tt + schur.expect("direct Schur");
assert!(
(cache_logdet - dense_logdet).abs() < 1e-9,
"bare cache log det {cache_logdet} vs dense H₀ {dense_logdet}"
);
assert_eq!(zprime.len(), n);
}
#[test]
pub(crate) fn ibp_cross_row_streaming_logdet_refuses() {
let (mut sys, source, _zprime) = build_ibp_woodbury_fixture();
sys.set_ibp_cross_row_source(source);
let mut streaming = StreamingArrowSchur::from_system(&sys, 2);
let options = ArrowSolveOptions::direct();
let err = streaming.reduced_schur_and_log_det_tt(0.0, 0.0, &options);
assert!(
err.is_err(),
"streaming arrow log-det must refuse an IBP-active system"
);
}
pub(crate) fn dense_arrow_system(n: usize, d: usize, k: usize) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(n, d, k);
for i in 0..n {
let mut htt = Array2::<f64>::zeros((d, d));
for r in 0..d {
for c in 0..d {
let s = ((i + 1) * (r + 2) * (c + 3)) as f64;
htt[[r, c]] = if r == c {
4.0 + (s % 7.0)
} else {
0.1 * ((s % 5.0) - 2.0)
};
}
}
let mut sym = &htt + &htt.t();
for r in 0..d {
sym[[r, r]] = sym[[r, r]].abs() + (d as f64) + 2.0;
}
sys.rows[i].htt = sym;
let mut htb = Array2::<f64>::zeros((d, k));
for r in 0..d {
for c in 0..k {
let s = ((i + 1) * (r + 1) + 3 * (c + 1)) as f64;
htb[[r, c]] = 0.05 * ((s % 11.0) - 5.0);
}
}
sys.rows[i].htbeta = htb;
sys.rows[i].gt = Array1::<f64>::zeros(d);
}
let mut hbb = Array2::<f64>::zeros((k, k));
for r in 0..k {
for c in 0..k {
let s = ((r + 1) * (c + 1)) as f64;
hbb[[r, c]] = if r == c {
(k as f64) + 6.0 + (s % 3.0)
} else {
0.02 * ((s % 7.0) - 3.0)
};
}
}
sys.hbb = hbb;
sys.gb = Array1::<f64>::zeros(k);
sys
}
pub(crate) fn schur_matvec_sequential_ref<B: BatchedBlockSolver>(
sys: &ArrowSchurSystem,
htt_factors: &ArrowFactorSlab,
ridge_beta: f64,
x: &Array1<f64>,
backend: &B,
) -> Array1<f64> {
let k = sys.k;
let mut out = Array1::<f64>::zeros(k);
{
let xs = x.as_slice().unwrap();
let os = out.as_slice_mut().unwrap();
sys.penalty_matvec_add(xs, os);
for a in 0..k {
os[a] += ridge_beta * xs[a];
}
}
let mut local = Array1::<f64>::zeros(sys.d);
let mut neg = Array1::<f64>::zeros(k);
for i in 0..sys.rows.len() {
neg.fill(0.0);
schur_matvec_row_into(sys, htt_factors, x, backend, i, &mut local, &mut neg);
for a in 0..k {
out[a] -= neg[a];
}
}
out
}
#[test]
pub(crate) fn parallel_schur_matvec_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 6usize;
let k = 96usize;
let sys = dense_arrow_system(n, d, k);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let x = Array1::from_iter((0..k).map(|a| 0.3 * (a as f64).sin() - 0.1));
let mut out_a = Array1::<f64>::zeros(k);
let mut out_b = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_a,
&backend,
None,
);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_b,
&backend,
None,
);
for a in 0..k {
assert_eq!(
out_a[a].to_bits(),
out_b[a].to_bits(),
"parallel Schur matvec must be deterministic run-to-run at index {a}"
);
}
let out_seq = schur_matvec_sequential_ref(&sys, &htt_factors, ridge_beta, &x, &backend);
let scale = out_seq
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_a[a] - out_seq[a]).abs() / scale;
assert!(
rel < 1e-12,
"parallel vs sequential Schur matvec must agree to reassociation error \
at index {a}: {} vs {} (rel {rel:e})",
out_a[a],
out_seq[a]
);
}
}
#[test]
pub(crate) fn parallel_penalty_prologue_bit_identical_to_serial() {
let k = 576usize; assert!(
k >= SCHUR_PROLOGUE_PARALLEL_K_MIN,
"test border must exceed the prologue parallel threshold"
);
let d = 4usize;
let n = 8usize;
assert!(n < SCHUR_MATVEC_PARALLEL_ROW_MIN);
let sys = dense_arrow_system(n, d, k);
let ridge = 7.5e-3;
let x = Array1::from_iter((0..k).map(|a| 0.4 * (a as f64 * 0.31).cos() - 0.17));
let xs = x.as_slice().unwrap();
let mut serial = vec![0.0_f64; k];
sys.penalty_matvec_add(xs, &mut serial);
for a in 0..k {
serial[a] += ridge * xs[a];
}
let mut par = vec![0.0_f64; k];
sys.penalty_ridge_prologue_into(xs, ridge, &mut par, true);
let mut ser_branch = vec![0.0_f64; k];
sys.penalty_ridge_prologue_into(xs, ridge, &mut ser_branch, false);
for a in 0..k {
assert_eq!(
par[a].to_bits(),
serial[a].to_bits(),
"parallel penalty prologue must be bit-identical to serial at index {a}"
);
assert_eq!(
ser_branch[a].to_bits(),
serial[a].to_bits(),
"serial prologue branch must match the reference at index {a}"
);
}
}
#[test]
pub(crate) fn bench_reduced_schur_matvec_parallel_speedup() {
let n = 1500usize;
let d = 6usize;
let k = 1024usize;
let sys = dense_arrow_system(n, d, k);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let x = Array1::from_iter((0..k).map(|a| 0.3 * ((a as f64) * 0.017).sin() - 0.1));
let calls = 30usize;
let mut sink = 0.0_f64;
let warm = schur_matvec_sequential_ref(&sys, &htt_factors, ridge_beta, &x, &backend);
sink += warm[0];
let t_seq = std::time::Instant::now();
for _ in 0..calls {
let out = schur_matvec_sequential_ref(&sys, &htt_factors, ridge_beta, &x, &backend);
sink += out[0];
}
let seq_elapsed = t_seq.elapsed();
let mut out_par = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_par,
&backend,
None,
); sink += out_par[0];
let t_par = std::time::Instant::now();
for _ in 0..calls {
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_par,
&backend,
None,
);
sink += out_par[0];
}
let par_elapsed = t_par.elapsed();
let seq_per = seq_elapsed.as_secs_f64() / calls as f64;
let par_per = par_elapsed.as_secs_f64() / calls as f64;
let speedup = seq_per / par_per;
println!(
"[#1017 reduced-Schur matvec, n={n} d={d} k={k}, {calls} calls, \
{} rayon threads]\n sequential: {:.3} ms/call\n parallel: {:.3} ms/call\n \
speedup: {:.2}x (sink {:.3e})",
rayon::current_num_threads(),
seq_per * 1e3,
par_per * 1e3,
speedup,
sink,
);
assert!(par_per > 0.0 && seq_per > 0.0, "timings must be positive");
}
pub(crate) fn sae_structured_system(
n: usize,
q: usize,
p: usize,
n_atoms: usize,
m_active: usize,
) -> (ArrowSchurSystem, Vec<Vec<(usize, f64)>>, Vec<Vec<f64>>) {
let k = n_atoms * p;
let mut sys = ArrowSchurSystem::new(n, q, k);
let mut a_phi: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
let mut local_jac: Vec<Vec<f64>> = Vec::with_capacity(n);
for i in 0..n {
let mut htt = Array2::<f64>::zeros((q, q));
for r in 0..q {
for c in 0..q {
let s = ((i + 1) * (r + 2) * (c + 3)) as f64;
htt[[r, c]] = 0.1 * ((s % 5.0) - 2.0);
}
}
let mut sym = &htt + &htt.t();
for r in 0..q {
sym[[r, r]] = sym[[r, r]].abs() + (q as f64) + 3.0;
}
sys.rows[i].htt = sym;
sys.rows[i].gt = Array1::<f64>::zeros(q);
let mut jac = vec![0.0_f64; q * p];
for c in 0..q {
for j in 0..p {
let s = ((i + 1) + 2 * (c + 1) + 3 * (j + 1)) as f64;
jac[c * p + j] = 0.1 * ((s % 7.0) - 3.0);
}
}
local_jac.push(jac);
let mut support = Vec::with_capacity(m_active);
for s in 0..m_active {
let atom = ((i * 3 + s * 5) % n_atoms).min(n_atoms - 1);
let phi = 0.5 + 0.25 * (((i + s) % 4) as f64);
support.push((atom * p, phi));
}
a_phi.push(support);
}
let mut hbb = Array2::<f64>::zeros((k, k));
for r in 0..k {
hbb[[r, r]] = (k as f64) + 4.0;
}
sys.hbb = hbb;
sys.gb = Array1::<f64>::zeros(k);
let a_phi_f = a_phi.clone();
let jac_f = local_jac.clone();
let a_phi_t = a_phi.clone();
let jac_t = local_jac.clone();
let p_f = p;
sys.set_row_htbeta_operator(
move |row, x, out| {
let mut u_p = vec![0.0_f64; p_f];
for &(base, phi) in &a_phi_f[row] {
for j in 0..p_f {
u_p[j] += phi * x[base + j];
}
}
let jac = &jac_f[row];
let qi = jac.len() / p_f;
for c in 0..qi {
let mut acc = 0.0;
for j in 0..p_f {
acc += jac[c * p_f + j] * u_p[j];
}
out[c] = acc;
}
},
move |row, v, out| {
let jac = &jac_t[row];
let qi = jac.len() / p_f;
let mut u_p = vec![0.0_f64; p_f];
for c in 0..qi {
let vc = v[c];
for j in 0..p_f {
u_p[j] += jac[c * p_f + j] * vc;
}
}
for &(base, phi) in &a_phi_t[row] {
for j in 0..p_f {
out[base + j] += phi * u_p[j];
}
}
},
);
sys.set_device_sae_pcg_data(DeviceSaePcgData {
p,
beta_dim: k,
a_phi: a_phi.clone(),
local_jac: local_jac.clone(),
smooth_blocks: Vec::new(),
sparse_g_blocks: Vec::new(),
});
(sys, a_phi, local_jac)
}
#[test]
pub(crate) fn resident_sae_matvec_matches_generic() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 96; let q = 4usize;
let p = 6usize;
let n_atoms = 32usize;
let m_active = 5usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let k = sys.k;
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let x = Array1::from_iter((0..k).map(|a| 0.2 * ((a as f64) * 0.013).cos() - 0.05));
let mut out_generic = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_generic,
&backend,
None,
);
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let mut out_resident = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_resident,
&backend,
Some(&resident),
);
let scale = out_generic
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_resident[a] - out_generic[a]).abs() / scale;
assert!(
rel < 1e-10,
"resident vs generic SAE Schur matvec must agree at index {a}: \
{} vs {} (rel {rel:e})",
out_resident[a],
out_generic[a]
);
}
let resident2 = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend).unwrap();
let mut out_resident2 = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_resident2,
&backend,
Some(&resident2),
);
for a in 0..k {
assert_eq!(
out_resident[a].to_bits(),
out_resident2[a].to_bits(),
"resident SAE matvec must be deterministic run-to-run at index {a}"
);
}
}
#[test]
pub(crate) fn resident_scalar_jacobi_matches_generic() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64;
let q = 4usize;
let p = 5usize;
let n_atoms = 20usize;
let m_active = 4usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let generic =
JacobiPreconditioner::build_scalar_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("generic scalar Jacobi must build");
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let resident_jac =
JacobiPreconditioner::build_scalar_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident scalar Jacobi must build");
let k = sys.k;
let r = Array1::from_iter((0..k).map(|a| 0.3 * ((a as f64) * 0.021).sin() + 0.07));
let out_generic = generic.apply(&r);
let out_resident = resident_jac.apply(&r);
let scale = out_generic
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_resident[a] - out_generic[a]).abs() / scale;
assert!(
rel < 1e-9,
"resident vs generic SAE scalar Jacobi must agree at index {a}: \
{} vs {} (rel {rel:e})",
out_resident[a],
out_generic[a]
);
}
}
#[test]
pub(crate) fn factored_residency_matches_dense_g_block() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 40;
let q = 3usize;
let p = 7usize;
let n_atoms = 24usize;
let m_active = 4usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
for row in 0..n {
let rf = &resident.rows[row];
if rf.di == 0 {
continue;
}
let di = rf.di;
let l = ArrayView2::from_shape((di, p), &rf.l).unwrap();
let y = ArrayView2::from_shape((di, p), &rf.y).unwrap();
let g_dense = l.t().dot(&y);
let g_vec: Vec<f64> = (0..p)
.map(|j| 0.4 * ((row + j) as f64 * 0.11).sin() - 0.07)
.collect();
let mut prod_ref = vec![0.0_f64; p];
for r in 0..p {
let mut s = 0.0;
for c in 0..p {
s += g_dense[(r, c)] * g_vec[c];
}
prod_ref[r] = s;
}
let mut w = vec![0.0_f64; di];
for r in 0..di {
let yrow = &rf.y[r * p..r * p + p];
w[r] = (0..p).map(|c| yrow[c] * g_vec[c]).sum();
}
let mut prod = vec![0.0_f64; p];
for r in 0..di {
let lrow = &rf.l[r * p..r * p + p];
for j in 0..p {
prod[j] += lrow[j] * w[r];
}
}
let scale = prod_ref
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for j in 0..p {
let rel = (prod[j] - prod_ref[j]).abs() / scale;
assert!(
rel < 1e-10,
"factored G_i apply must match dense G_i at row {row} idx {j}: \
{} vs {} (rel {rel:e})",
prod[j],
prod_ref[j]
);
}
}
let factored_entries: usize = resident.rows.iter().map(|r| r.l.len() + r.y.len()).sum();
let dense_entries: usize = resident.rows.iter().filter(|r| r.di > 0).count() * p * p;
assert!(
factored_entries < dense_entries,
"factored residency must store fewer entries than the dense p×p form \
({factored_entries} vs {dense_entries})"
);
}
#[test]
pub(crate) fn bench_resident_sae_matvec_speedup() {
let n = 1500usize;
let q = 2usize;
let p = 64usize;
let n_atoms = 32usize; let m_active = 6usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let k = sys.k;
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let x = Array1::from_iter((0..k).map(|a| 0.3 * ((a as f64) * 0.017).sin() - 0.1));
let cg_iters = 30usize;
let mut sink = 0.0_f64;
let mut out = Array1::<f64>::zeros(k);
schur_matvec(&sys, &htt_factors, ridge_beta, &x, &mut out, &backend, None); sink += out[0];
let t_gen = std::time::Instant::now();
for _ in 0..cg_iters {
schur_matvec(&sys, &htt_factors, ridge_beta, &x, &mut out, &backend, None);
sink += out[0];
}
let gen_elapsed = t_gen.elapsed();
let t_res = std::time::Instant::now();
let resident =
SaeResidentReducedSchur::build(&sys, &htt_factors, &backend).expect("resident operator");
let mut outr = Array1::<f64>::zeros(k);
for _ in 0..cg_iters {
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut outr,
&backend,
Some(&resident),
);
sink += outr[0];
}
let res_elapsed = t_res.elapsed();
let gen_total = gen_elapsed.as_secs_f64();
let res_total = res_elapsed.as_secs_f64();
let factored_f64: usize = resident.rows.iter().map(|r| r.l.len() + r.y.len()).sum();
let dense_f64: usize = resident.rows.iter().filter(|r| r.di > 0).count() * p * p;
println!(
"[#1017 SAE resident matvec, n={n} q={q} p={p} k={k} m={m_active}, \
{cg_iters} CG matvecs incl. 1 residency build, {} rayon threads]\n \
generic: {:.3} ms total ({:.3} ms/matvec)\n resident: {:.3} ms total \
(build + {cg_iters} matvecs)\n speedup: {:.2}x (sink {:.3e})\n \
residency mem: factored {:.2} MiB vs dense p×p {:.2} MiB ({:.1}× smaller)",
rayon::current_num_threads(),
gen_total * 1e3,
gen_total / cg_iters as f64 * 1e3,
res_total * 1e3,
gen_total / res_total,
sink,
factored_f64 as f64 * 8.0 / (1024.0 * 1024.0),
dense_f64 as f64 * 8.0 / (1024.0 * 1024.0),
dense_f64 as f64 / factored_f64.max(1) as f64,
);
assert!(
gen_total > 0.0 && res_total > 0.0,
"timings must be positive"
);
}