use super::*;
use approx::assert_abs_diff_eq;
use ndarray::array;
#[test]
pub(crate) fn sparse_block_kronecker_matches_dense_kronecker() {
let p = 2usize;
let dim_a = 5usize;
let k = dim_a * p;
let g_dense = array![
[3.0_f64, 0.5, 0.2, -0.1, 0.0],
[0.5, 4.0, 0.0, 0.3, 0.1],
[0.2, 0.0, 2.0, 0.4, -0.2],
[-0.1, 0.3, 0.4, 5.0, 0.6],
[0.0, 0.1, -0.2, 0.6, 1.5],
];
let dense = KroneckerPenaltyOp {
factor_a: g_dense.clone(),
factor_b: Array2::<f64>::eye(p),
global_offset: 0,
k,
};
let block_00 = g_dense.slice(ndarray::s![0..2, 0..2]).to_owned();
let block_01 = g_dense.slice(ndarray::s![0..2, 2..5]).to_owned();
let block_10 = g_dense.slice(ndarray::s![2..5, 0..2]).to_owned();
let block_11 = g_dense.slice(ndarray::s![2..5, 2..5]).to_owned();
let sparse = SparseBlockKroneckerPenaltyOp {
p,
dim_a,
k,
blocks: vec![
SparseGBlock {
row_off: 0,
col_off: 0,
data: block_00,
},
SparseGBlock {
row_off: 0,
col_off: 2,
data: block_01,
},
SparseGBlock {
row_off: 2,
col_off: 0,
data: block_10,
},
SparseGBlock {
row_off: 2,
col_off: 2,
data: block_11,
},
],
};
let d_dense = dense.to_dense();
let d_sparse = sparse.to_dense();
for i in 0..k {
for j in 0..k {
assert!(
(d_dense[[i, j]] - d_sparse[[i, j]]).abs() < 1e-12,
"to_dense mismatch at ({i},{j}): {} vs {}",
d_dense[[i, j]],
d_sparse[[i, j]]
);
}
}
let x: Vec<f64> = (0..k).map(|i| 0.1 * (i as f64) - 0.3).collect();
let mut y_dense = vec![0.0_f64; k];
let mut y_sparse = vec![0.0_f64; k];
dense.matvec(&x, &mut y_dense);
sparse.matvec(&x, &mut y_sparse);
for i in 0..k {
assert!(
(y_dense[i] - y_sparse[i]).abs() < 1e-12,
"matvec mismatch at {i}: {} vs {}",
y_dense[i],
y_sparse[i]
);
}
let mut diag_dense = vec![0.0_f64; k];
let mut diag_sparse = vec![0.0_f64; k];
dense.diagonal(&mut diag_dense);
sparse.diagonal(&mut diag_sparse);
for i in 0..k {
assert!(
(diag_dense[i] - diag_sparse[i]).abs() < 1e-12,
"diagonal mismatch at {i}: {} vs {}",
diag_dense[i],
diag_sparse[i]
);
}
let offsets = [0..(2 * p), (2 * p)..k];
for id in 0..offsets.len() {
let b = offsets[id].end - offsets[id].start;
let mut blk_dense = Array2::<f64>::zeros((b, b));
let mut blk_sparse = Array2::<f64>::zeros((b, b));
dense.block(BetaBlockId(id), &offsets, &mut blk_dense);
sparse.block(BetaBlockId(id), &offsets, &mut blk_sparse);
for i in 0..b {
for j in 0..b {
assert!(
(blk_dense[[i, j]] - blk_sparse[[i, j]]).abs() < 1e-12,
"block {id} mismatch at ({i},{j})"
);
}
}
}
}
pub(crate) fn factored_reference_dense(
ranks: &[usize],
basis_sizes: &[usize],
blocks: &[FactoredFrameGBlock],
) -> Array2<f64> {
let n_atoms = ranks.len();
let mut offsets = vec![0usize; n_atoms + 1];
for k in 0..n_atoms {
offsets[k + 1] = offsets[k] + basis_sizes[k] * ranks[k];
}
let dim = offsets[n_atoms];
let mut h = Array2::<f64>::zeros((dim, dim));
for blk in blocks {
let (r_i, r_j) = (ranks[blk.atom_i], ranks[blk.atom_j]);
let (off_i, off_j) = (offsets[blk.atom_i], offsets[blk.atom_j]);
let (m_i, m_j) = blk.g.dim();
for li in 0..m_i {
for lj in 0..m_j {
for a in 0..r_i {
for b in 0..r_j {
h[[off_i + li * r_i + a, off_j + lj * r_j + b]] +=
blk.g[[li, lj]] * blk.w[[a, b]];
}
}
}
}
}
h
}
#[test]
pub(crate) fn factored_frame_kronecker_matches_dense_reference() {
let ranks = vec![2usize, 3];
let basis_sizes = vec![2usize, 3];
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4, -0.2], [0.4, 5.0, 0.6], [-0.2, 0.6, 1.5]];
let g01 = array![[0.2_f64, -0.1, 0.0], [0.3, 0.1, -0.2]];
let g10 = g01.t().to_owned();
let w00 = Array2::<f64>::eye(2);
let w11 = Array2::<f64>::eye(3);
let w01 = array![[0.8_f64, 0.1, -0.05], [0.0, 0.7, 0.2]];
let w10 = w01.t().to_owned();
let blocks = vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00.clone(),
w: w00.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11.clone(),
w: w11.clone(),
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01.clone(),
w: w01.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10.clone(),
w: w10.clone(),
},
];
let op = FactoredFrameKroneckerOp::new(ranks.clone(), basis_sizes.clone(), blocks.clone())
.expect("op");
assert_eq!(op.dim(), 13);
let reference = factored_reference_dense(&ranks, &basis_sizes, &blocks);
let dense = op.to_dense();
for i in 0..13 {
for j in 0..13 {
assert!(
(dense[[i, j]] - reference[[i, j]]).abs() < 1e-12,
"to_dense mismatch at ({i},{j}): {} vs {}",
dense[[i, j]],
reference[[i, j]]
);
}
}
let x: Vec<f64> = (0..13).map(|i| 0.13 * (i as f64) - 0.4).collect();
let mut y = vec![0.0_f64; 13];
op.matvec(&x, &mut y);
for i in 0..13 {
let mut expect = 0.0;
for j in 0..13 {
expect += reference[[i, j]] * x[j];
}
assert!(
(y[i] - expect).abs() < 1e-10,
"matvec mismatch at {i}: {} vs {expect}",
y[i]
);
}
let mut diag = vec![0.0_f64; 13];
op.diagonal(&mut diag);
for i in 0..13 {
assert!(
(diag[i] - reference[[i, i]]).abs() < 1e-12,
"diagonal mismatch at {i}"
);
}
let offsets_ranges = [0..4usize, 4..13usize];
for id in 0..2 {
let b = offsets_ranges[id].end - offsets_ranges[id].start;
let mut blk = Array2::<f64>::zeros((b, b));
op.block(BetaBlockId(id), &offsets_ranges, &mut blk);
for bi in 0..b {
for bj in 0..b {
let gi = offsets_ranges[id].start + bi;
let gj = offsets_ranges[id].start + bj;
assert!(
(blk[[bi, bj]] - reference[[gi, gj]]).abs() < 1e-12,
"block {id} mismatch at ({bi},{bj})"
);
}
}
}
}
#[test]
pub(crate) fn factored_frame_kronecker_reduces_to_sparse_block_at_full_rank() {
let p = 2usize;
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4], [0.4, 5.0]];
let g01 = array![[0.2_f64, -0.1], [0.3, 0.1]];
let g10 = g01.t().to_owned();
let ident = Array2::<f64>::eye(p);
let factored = FactoredFrameKroneckerOp::new(
vec![p, p],
vec![2, 2],
vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00.clone(),
w: ident.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11.clone(),
w: ident.clone(),
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01.clone(),
w: ident.clone(),
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10.clone(),
w: ident.clone(),
},
],
)
.expect("factored op");
let sparse = SparseBlockKroneckerPenaltyOp {
p,
dim_a: 4,
k: 8,
blocks: vec![
SparseGBlock {
row_off: 0,
col_off: 0,
data: g00,
},
SparseGBlock {
row_off: 2,
col_off: 2,
data: g11,
},
SparseGBlock {
row_off: 0,
col_off: 2,
data: g01,
},
SparseGBlock {
row_off: 2,
col_off: 0,
data: g10,
},
],
};
assert_eq!(factored.dim(), sparse.dim());
let x: Vec<f64> = (0..8).map(|i| 0.2 * (i as f64) - 0.5).collect();
let mut yf = vec![0.0_f64; 8];
let mut ys = vec![0.0_f64; 8];
factored.matvec(&x, &mut yf);
sparse.matvec(&x, &mut ys);
for i in 0..8 {
assert!(
(yf[i] - ys[i]).abs() < 1e-12,
"full-rank factored op must equal SparseBlockKronecker at {i}: {} vs {}",
yf[i],
ys[i]
);
}
}
pub(crate) fn mgs_orthonormalize(a: &Array2<f64>) -> Array2<f64> {
let (p, r) = a.dim();
let mut q = a.clone();
for j in 0..r {
for i in 0..j {
let mut dot = 0.0;
for c in 0..p {
dot += q[[c, i]] * q[[c, j]];
}
for c in 0..p {
q[[c, j]] -= dot * q[[c, i]];
}
}
let mut nrm = 0.0;
for c in 0..p {
nrm += q[[c, j]] * q[[c, j]];
}
let nrm = nrm.sqrt();
assert!(nrm > 1e-9, "mgs column {j} degenerate");
for c in 0..p {
q[[c, j]] /= nrm;
}
}
q
}
#[test]
pub(crate) fn frame_output_gram_orthonormal_is_identity() {
let p = 5usize;
let r = 3usize;
let mut seed = Array2::<f64>::zeros((p, r));
for c in 0..p {
for a in 0..r {
seed[[c, a]] = ((c as f64) * 0.37 + (a as f64) * 1.31).sin() + 0.1 * (a as f64);
}
}
let u = mgs_orthonormalize(&seed);
let g = frame_output_gram(u.view(), u.view());
assert_eq!(g.dim(), (r, r));
for a in 0..r {
for b in 0..r {
let expect = if a == b { 1.0 } else { 0.0 };
assert!(
(g[[a, b]] - expect).abs() < 1e-12,
"UᵀU not identity at ({a},{b}): {}",
g[[a, b]]
);
}
}
}
#[test]
pub(crate) fn from_frames_and_blocks_matches_dense_reference() {
let p = 4usize;
let basis_sizes = vec![2usize, 3];
let mut seed0 = Array2::<f64>::zeros((p, 2));
let mut seed1 = Array2::<f64>::zeros((p, 3));
for c in 0..p {
for a in 0..2 {
seed0[[c, a]] = ((c as f64) * 0.91 - (a as f64) * 0.5).cos() + 0.2 * (c as f64);
}
for a in 0..3 {
seed1[[c, a]] = ((c as f64) * 0.23 + (a as f64) * 1.7).sin() - 0.3 * (a as f64);
}
}
let u0 = mgs_orthonormalize(&seed0);
let u1 = mgs_orthonormalize(&seed1);
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4, -0.2], [0.4, 5.0, 0.6], [-0.2, 0.6, 1.5]];
let g01 = array![[0.2_f64, -0.1, 0.0], [0.3, 0.1, -0.2]];
let g10 = g01.t().to_owned();
let mut g_blocks: std::collections::BTreeMap<(usize, usize), Array2<f64>> =
std::collections::BTreeMap::new();
g_blocks.insert((0, 0), g00.clone());
g_blocks.insert((1, 1), g11.clone());
g_blocks.insert((0, 1), g01.clone());
g_blocks.insert((1, 0), g10.clone());
let frames = vec![Some(u0.clone()), Some(u1.clone())];
let op = FactoredFrameKroneckerOp::from_frames_and_blocks(&frames, &basis_sizes, p, &g_blocks)
.expect("from_frames_and_blocks");
assert_eq!(op.dim(), 13);
let ranks = vec![2usize, 3];
let w00 = frame_output_gram(u0.view(), u0.view());
let w11 = frame_output_gram(u1.view(), u1.view());
let w01 = frame_output_gram(u0.view(), u1.view());
let w10 = frame_output_gram(u1.view(), u0.view());
let ref_blocks = vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00,
w: w00,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11,
w: w11,
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01,
w: w01,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10,
w: w10,
},
];
let reference = factored_reference_dense(&ranks, &basis_sizes, &ref_blocks);
let dense = op.to_dense();
for i in 0..13 {
for j in 0..13 {
assert!(
(dense[[i, j]] - reference[[i, j]]).abs() < 1e-12,
"to_dense mismatch at ({i},{j}): {} vs {}",
dense[[i, j]],
reference[[i, j]]
);
}
}
let x: Vec<f64> = (0..13).map(|i| 0.17 * (i as f64) - 0.6).collect();
let mut y = vec![0.0_f64; 13];
op.matvec(&x, &mut y);
for i in 0..13 {
let mut expect = 0.0;
for j in 0..13 {
expect += reference[[i, j]] * x[j];
}
assert!(
(y[i] - expect).abs() < 1e-10,
"matvec mismatch at {i}: {} vs {expect}",
y[i]
);
}
}
#[test]
pub(crate) fn from_frames_and_blocks_mixed_framed_unframed() {
let p = 4usize;
let basis_sizes = vec![2usize, 2]; let mut seed0 = Array2::<f64>::zeros((p, 2));
for c in 0..p {
for a in 0..2 {
seed0[[c, a]] = ((c as f64) * 0.61 + (a as f64) * 0.9).cos() - 0.15 * (c as f64);
}
}
let u0 = mgs_orthonormalize(&seed0);
let g00 = array![[3.0_f64, 0.5], [0.5, 4.0]];
let g11 = array![[2.0_f64, 0.4], [0.4, 5.0]];
let g01 = array![[0.2_f64, -0.1], [0.3, 0.1]];
let g10 = g01.t().to_owned();
let mut g_blocks: std::collections::BTreeMap<(usize, usize), Array2<f64>> =
std::collections::BTreeMap::new();
g_blocks.insert((0, 0), g00.clone());
g_blocks.insert((1, 1), g11.clone());
g_blocks.insert((0, 1), g01.clone());
g_blocks.insert((1, 0), g10.clone());
let frames = vec![Some(u0.clone()), None];
let op = FactoredFrameKroneckerOp::from_frames_and_blocks(&frames, &basis_sizes, p, &g_blocks)
.expect("from_frames_and_blocks mixed");
assert_eq!(op.ranks, vec![2usize, 4]);
assert_eq!(op.dim(), 12);
let dense = op.to_dense();
let off1 = 4usize;
for li in 0..2 {
for lj in 0..2 {
for a in 0..4 {
for b in 0..4 {
let gi = off1 + li * 4 + a;
let gj = off1 + lj * 4 + b;
let expect = if a == b { g11[[li, lj]] } else { 0.0 };
assert!(
(dense[[gi, gj]] - expect).abs() < 1e-12,
"g_11 ⊗ I_4 mismatch at ({gi},{gj}): {} vs {expect}",
dense[[gi, gj]]
);
}
}
}
}
let ranks = vec![2usize, 4];
let ident_p = Array2::<f64>::eye(p);
let w00 = frame_output_gram(u0.view(), u0.view());
let w11 = frame_output_gram(ident_p.view(), ident_p.view());
let w01 = frame_output_gram(u0.view(), ident_p.view());
let w10 = frame_output_gram(ident_p.view(), u0.view());
let ref_blocks = vec![
FactoredFrameGBlock {
atom_i: 0,
atom_j: 0,
g: g00,
w: w00,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 1,
g: g11.clone(),
w: w11,
},
FactoredFrameGBlock {
atom_i: 0,
atom_j: 1,
g: g01,
w: w01,
},
FactoredFrameGBlock {
atom_i: 1,
atom_j: 0,
g: g10,
w: w10,
},
];
let reference = factored_reference_dense(&ranks, &basis_sizes, &ref_blocks);
let x: Vec<f64> = (0..12).map(|i| 0.11 * (i as f64) - 0.4).collect();
let mut y = vec![0.0_f64; 12];
op.matvec(&x, &mut y);
for i in 0..12 {
let mut expect = 0.0;
for j in 0..12 {
expect += reference[[i, j]] * x[j];
}
assert!(
(y[i] - expect).abs() < 1e-10,
"mixed matvec mismatch at {i}: {} vs {expect}",
y[i]
);
}
}
#[test]
pub(crate) fn arrow_schur_matches_dense_reference_2x2() {
let n = 2;
let d = 2;
let k = 3;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[2.0_f64, 0.1], [0.1, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.0, 0.5], [0.2, 1.0, 0.0]];
sys.rows[0].gt = array![0.3_f64, -0.2];
sys.rows[1].htt = array![[1.5_f64, -0.1], [-0.1, 2.0]];
sys.rows[1].htbeta = array![[0.1_f64, 0.5, 0.0], [0.0, 0.3, 1.0]];
sys.rows[1].gt = array![-0.1_f64, 0.4];
sys.hbb = array![[4.0_f64, 0.2, 0.0], [0.2, 5.0, 0.1], [0.0, 0.1, 6.0],];
sys.gb = array![0.5_f64, -0.3, 0.2];
let (delta_t, delta_beta, _diag) = sys.solve(0.0, 0.0).expect("arrow-schur solve");
let streaming_options = ArrowSolveOptions::direct().with_streaming_chunk_size(Some(1));
let (delta_t_stream, delta_beta_stream, _diag_stream) = sys
.solve_with_options(0.0, 0.0, &streaming_options)
.expect("streaming arrow-schur solve");
assert_eq!(delta_beta, delta_beta_stream);
assert_eq!(delta_t, delta_t_stream);
let total = k + n * d;
let mut hjoint = Array2::<f64>::zeros((total, total));
let mut gjoint = Array1::<f64>::zeros(total);
for a in 0..k {
for b in 0..k {
hjoint[[a, b]] = sys.hbb[[a, b]];
}
gjoint[a] = sys.gb[a];
}
for i in 0..n {
let toff = k + i * d;
for a in 0..d {
for b in 0..d {
hjoint[[toff + a, toff + b]] = sys.rows[i].htt[[a, b]];
}
gjoint[toff + a] = sys.rows[i].gt[a];
for a2 in 0..k {
hjoint[[toff + a, a2]] = sys.rows[i].htbeta[[a, a2]];
hjoint[[a2, toff + a]] = sys.rows[i].htbeta[[a, a2]];
}
}
}
let lj = cholesky_lower(&hjoint).expect("dense ref PD");
let neg_g = gjoint.mapv(|v| -v);
let xref = cholesky_solve_vector(&lj, &neg_g);
for a in 0..k {
assert!(
(xref[a] - delta_beta[a]).abs() < 1e-10,
"β[{a}] mismatch: dense {} vs arrow {}",
xref[a],
delta_beta[a]
);
}
for i in 0..n {
for a in 0..d {
let dense = xref[k + i * d + a];
let arrow = delta_t[i * d + a];
assert!(
(dense - arrow).abs() < 1e-10,
"t[{i},{a}] mismatch: dense {dense} vs arrow {arrow}"
);
}
}
}
pub(crate) fn diagonal_arrow_fixture(row_min: f64, schur_min: f64) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(2, 2, 2);
sys.rows[0].htt = array![[row_min, 0.0], [0.0, row_min + 1.0]];
sys.rows[1].htt = array![[row_min + 2.0, 0.0], [0.0, row_min + 3.0]];
for row in sys.rows.iter_mut() {
row.htbeta.fill(0.0);
row.gt.fill(0.0);
}
sys.hbb = array![[schur_min, 0.0], [0.0, schur_min + 1.0]];
sys.gb.fill(0.0);
sys
}
pub(crate) fn diagonal_fixture_dense_lambda_min(sys: &ArrowSchurSystem) -> f64 {
let mut out = f64::INFINITY;
for row in &sys.rows {
for axis in 0..row.htt.nrows() {
out = out.min(row.htt[[axis, axis]]);
}
}
for axis in 0..sys.hbb.nrows() {
out = out.min(sys.hbb[[axis, axis]]);
}
out
}
#[test]
pub(crate) fn arrow_factor_min_pivot_matches_dense_lambda_min_ordering() {
let weak = diagonal_arrow_fixture(0.2, 0.8);
let strong = diagonal_arrow_fixture(0.7, 1.2);
let options = ArrowSolveOptions::direct();
let (_dt_w, _db_w, weak_cache) =
solve_arrow_newton_step_with_options(&weak, 0.0, 0.0, &options)
.expect("weak diagonal fixture should factor");
let (_dt_s, _db_s, strong_cache) =
solve_arrow_newton_step_with_options(&strong, 0.0, 0.0, &options)
.expect("strong diagonal fixture should factor");
let weak_lambda = diagonal_fixture_dense_lambda_min(&weak);
let strong_lambda = diagonal_fixture_dense_lambda_min(&strong);
assert!(weak_lambda < strong_lambda);
let weak_pivot = arrow_factor_min_pivot(&weak_cache)
.min_pivot
.expect("weak pivot");
let strong_pivot = arrow_factor_min_pivot(&strong_cache)
.min_pivot
.expect("strong pivot");
assert_abs_diff_eq!(weak_pivot, weak_lambda, epsilon = 1.0e-14);
assert_abs_diff_eq!(strong_pivot, strong_lambda, epsilon = 1.0e-14);
assert!(weak_pivot < strong_pivot);
}
pub(crate) fn quartic_counterexample_value(t: f64) -> f64 {
0.25 * t.powi(4) - t * t + 2.0 * t
}
pub(crate) fn quartic_counterexample_system(t: f64) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(1, 1, 0);
sys.rows[0].gt = array![t.powi(3) - 2.0 * t + 2.0];
sys.rows[0].htt = array![[3.0 * t * t - 2.0]];
sys
}
#[test]
pub(crate) fn proximal_correction_breaks_scalar_newton_cycle() {
let options = ArrowSolveOptions::direct();
let correction = ArrowProximalCorrectionOptions {
initial_ridge: 1e-8,
ridge_growth: 10.0,
max_attempts: 16,
armijo_c1: 1e-4,
gradient_tolerance: 1e-12,
convergence_objective_rel_tol: DEFAULT_PROXIMAL_CONVERGENCE_REL_TOL,
};
let mut t = 0.0_f64;
let mut previous_value = quartic_counterexample_value(t);
for _ in 0..32 {
let sys = quartic_counterexample_system(t);
let accepted = solve_arrow_newton_step_with_proximal_correction(
&sys,
0.0,
0.0,
previous_value,
&options,
&correction,
|delta_t, _delta_beta| quartic_counterexample_value(t + delta_t[0]),
)
.expect("proximal correction should accept a descent step");
assert!(
accepted.trial_objective_value <= previous_value,
"accepted step must not increase the objective"
);
t += accepted.delta_t[0];
previous_value = accepted.trial_objective_value;
}
let final_grad = t.powi(3) - 2.0 * t + 2.0;
assert!(
final_grad.abs() < 1e-7,
"corrected iteration should reach the scalar critical point; t={t}, g={final_grad}"
);
}
#[test]
pub(crate) fn factor_one_row_conditions_barely_pd_block_via_ridge() {
let d = 2;
let k = 2;
let mut row = ArrowRowBlock::new(d, k);
row.htt = array![[1.0_f64, 1.0], [1.0, 1.0 + 1e-14]];
row.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row.gt = array![0.0_f64, 0.0];
let factor = factor_one_row(&row, 0.0, d, 0, false).expect(
"barely-PD H_tt must be CONDITIONED by per-row ridge escalation, not rejected (gam#578)",
);
let kappa = cholesky_factor_kappa_estimate(&factor);
assert!(
kappa.is_finite() && kappa <= safe_spd_kappa_max(d),
"conditioned factor must be within the safe-inversion κ ceiling; got κ={kappa:e}"
);
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += factor[[i, kk]] * factor[[j, kk]];
}
if i == j {
assert!(
acc >= row.htt[[i, j]] - 1e-12,
"diagonal of L Lᵀ must be H_tt + (nonneg ridge) at ({i},{j}): \
{acc} vs {}",
row.htt[[i, j]]
);
} else {
assert!(
(acc - row.htt[[i, j]]).abs() < 1e-9,
"off-diagonal of L Lᵀ must equal H_tt at ({i},{j}): {acc} vs {}",
row.htt[[i, j]]
);
}
}
}
let factor = factor_one_row(&row, 0.0, d, 0, true)
.expect("tolerate_ill_conditioning must accept a barely-PD-but-PD block");
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += factor[[i, kk]] * factor[[j, kk]];
}
assert!(
(acc - row.htt[[i, j]]).abs() < 1e-12,
"tolerated factor must satisfy L Lᵀ = H_tt at ({i},{j})"
);
}
}
let mut row_npd = ArrowRowBlock::new(d, k);
row_npd.htt = array![[1.0_f64, 2.0], [2.0, 1.0]]; row_npd.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row_npd.gt = array![0.0_f64, 0.0];
let npd = factor_one_row(&row_npd, 0.0, d, 0, true);
assert!(
matches!(npd, Err(ArrowSchurError::PerRowFactorFailed { .. })),
"non-PD block must error even with tolerate_ill_conditioning; got {npd:?}"
);
let mut row_ok = ArrowRowBlock::new(d, k);
row_ok.htt = array![[2.0_f64, 0.1], [0.1, 3.0]];
row_ok.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row_ok.gt = array![0.0_f64, 0.0];
factor_one_row(&row_ok, 0.0, d, 0, false)
.expect("well-conditioned block must still factor at ridge_t=0");
let mut row_nan = ArrowRowBlock::new(d, k);
row_nan.htt = array![[f64::NAN, 0.0], [0.0, 1.0]];
row_nan.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
row_nan.gt = array![0.0_f64, 0.0];
let nan = factor_one_row(&row_nan, 1.0e-6, d, 0, false);
assert!(
matches!(nan, Err(ArrowSchurError::PerRowFactorFailed { .. })),
"non-finite block must surface PerRowFactorFailed, not loop or condition; got {nan:?}"
);
}
#[test]
pub(crate) fn factor_one_row_conditions_scalar_tiny_pivot_via_ridge() {
let d = 1;
let k = 1;
let mut row = ArrowRowBlock::new(d, k);
row.htt = array![[1.0e-20_f64]];
row.htbeta = array![[1.0_f64]];
row.gt = array![0.0_f64];
let factor = factor_one_row(&row, 0.0, d, 0, false)
.expect("tiny positive scalar pivot must be ridge-conditioned");
let pivot = factor[[0, 0]] * factor[[0, 0]];
assert!(
pivot >= safe_spd_pivot_min(1.0),
"scalar pivot must be lifted above the absolute safe floor; got {pivot:e}"
);
assert!(
pivot > row.htt[[0, 0]],
"scalar block must not be accepted at the raw tiny pivot"
);
let tolerated = factor_one_row(&row, 0.0, d, 0, true)
.expect("tolerated log-det path must accept a positive scalar block");
let raw_pivot = tolerated[[0, 0]] * tolerated[[0, 0]];
assert!(
(raw_pivot - row.htt[[0, 0]]).abs() < 1.0e-30,
"tolerated factor must remain the raw scalar Cholesky"
);
}
#[test]
pub(crate) fn evidence_row_spectral_deflates_indefinite_non_gauge_block_at_unit_stiffness() {
let d = 3usize;
let k = 2usize;
let mut indef = ArrowRowBlock::new(d, k);
indef.htt = array![[4.0_f64, 0.0, 0.0], [0.0, 1.0e-10, 0.0], [0.0, 0.0, -1.0],];
indef.htbeta = array![[1.0_f64, 0.0], [0.0, 1.0], [0.5, 0.5]];
indef.gt = array![0.0_f64, 0.0, 0.0];
let gauge_e1 = array![0.0_f64, 1.0, 0.0];
assert!(
factor_gauge_deflated_evidence_row(&indef, d, std::slice::from_ref(&gauge_e1)).is_none(),
"gauge deflation must NOT rescue a genuinely-indefinite non-gauge direction"
);
let spectral = factor_spectral_deflated_evidence_row(&indef, d)
.expect("spectral deflation must condition the indefinite non-gauge block");
assert_eq!(
spectral.gauge_deflated_directions, 2,
"the two sub-floor eigen-directions (−1.0 and 1e-10) must be unit-deflated"
);
let ls = &spectral.factor;
let mut recon = Array2::<f64>::zeros((d, d));
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += ls[[i, kk]] * ls[[j, kk]];
}
recon[[i, j]] = acc;
}
}
assert!(
(recon[[0, 0]] - 4.0).abs() < 1.0e-9,
"genuine direction e_0 must be preserved exactly; got {}",
recon[[0, 0]]
);
assert!(
(recon[[2, 2]] - 1.0).abs() < 1.0e-9,
"the genuinely-indefinite direction e_2 must be deflated to unit \
stiffness +1 (log 1 = 0, ρ-independent), NOT ridge-damped; got {}",
recon[[2, 2]]
);
let factored = factor_one_row_result(
&indef,
0.0,
d,
0,
true,
std::slice::from_ref(&gauge_e1),
true,
)
.expect("undamped evidence factor must condition the indefinite block by deflation");
for a in 0..d {
assert!(
factored.factor[[a, a]].is_finite() && factored.factor[[a, a]] > 0.0,
"deflated evidence factor must have a finite positive pivot at {a}; got {}",
factored.factor[[a, a]]
);
}
let mut pd = ArrowRowBlock::new(d, k);
pd.htt = array![[4.0_f64, 0.0, 0.0], [0.0, 1.0e-10, 0.0], [0.0, 0.0, 2.0],];
pd.htbeta = indef.htbeta.clone();
pd.gt = array![0.0_f64, 0.0, 0.0];
let result = factor_one_row_result(&pd, 0.0, d, 0, true, std::slice::from_ref(&gauge_e1), true)
.expect("undamped evidence factor must succeed on the genuinely-PD stationary block");
assert_eq!(
result.gauge_deflated_directions, 1,
"exactly the single near-null gauge direction must be deflated"
);
let l = &result.factor;
let mut reconstructed = Array2::<f64>::zeros((d, d));
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += l[[i, kk]] * l[[j, kk]];
}
reconstructed[[i, j]] = acc;
}
}
assert!(
(reconstructed[[0, 0]] - 4.0).abs() < 1.0e-12,
"stationary factor must be the EXACT Cholesky on the genuine direction e_0; got {}",
reconstructed[[0, 0]]
);
assert!(
(reconstructed[[2, 2]] - 2.0).abs() < 1.0e-12,
"stationary factor must be the EXACT Cholesky on the genuine direction e_2; got {}",
reconstructed[[2, 2]]
);
assert!(
(reconstructed[[1, 1]] - (1.0 + 1.0e-10)).abs() < 1.0e-9,
"deflated gauge direction must carry exactly the +1 unit stiffness; got {}",
reconstructed[[1, 1]]
);
}
#[test]
pub(crate) fn evidence_row_recovers_intrinsic_dimension_flat_block_without_gauge_1273() {
let d = 2usize; let k = 1usize;
let mut flat = ArrowRowBlock::new(d, k);
flat.htt = array![[3.0_f64, 0.0], [0.0, 0.0]];
flat.htbeta = array![[1.0_f64], [0.5]];
flat.gt = array![0.0_f64, 0.0];
let refused = factor_one_row_result(&flat, 0.0, d, 0, true, &[], false);
assert!(
refused.is_err(),
"fixture precondition: the rank-deficient flat H_tt must be refused by \
the undamped evidence factor when spectral deflation is withheld and no \
gauge is supplied — the exact pre-#1273 abort"
);
let recovered = factor_one_row_result(&flat, 0.0, d, 0, true, &[], true).expect(
"spectral deflation must recover the intrinsic-dimension flat H_tt block on \
the SAE evidence path even with no supplied gauge (#1273)",
);
assert_eq!(
recovered.gauge_deflated_directions, 1,
"exactly the single intrinsic-dimension flat direction must be deflated"
);
for a in 0..d {
assert!(
recovered.factor[[a, a]].is_finite() && recovered.factor[[a, a]] > 0.0,
"recovered evidence factor must have a finite positive pivot at {a}; got {}",
recovered.factor[[a, a]]
);
}
let l = &recovered.factor;
let mut recon = Array2::<f64>::zeros((d, d));
for i in 0..d {
for j in 0..d {
let mut acc = 0.0_f64;
for kk in 0..d {
acc += l[[i, kk]] * l[[j, kk]];
}
recon[[i, j]] = acc;
}
}
assert!(
(recon[[0, 0]] - 3.0).abs() < 1.0e-12,
"genuine ring direction e_0 must be preserved exactly; got {}",
recon[[0, 0]]
);
assert!(
(recon[[1, 1]] - 1.0).abs() < 1.0e-9,
"the intrinsic-dimension flat direction e_1 must be unit-stiffness \
deflated to exactly +1 (log 1 = 0, ρ-independent), NOT ridge-damped; got {}",
recon[[1, 1]]
);
}
#[test]
pub(crate) fn evidence_row_spectral_deflation_count_is_stable_across_the_cutoff() {
let d = 3usize;
let k = 1usize;
let floor = SPECTRAL_DEFLATION_REL_FLOOR * 4.0;
let near_floor_lo = floor * 0.995; let near_floor_hi = floor * 1.05;
let mut block_lo = ArrowRowBlock::new(d, k);
block_lo.htt = array![
[4.0_f64, 0.0, 0.0],
[0.0, near_floor_lo, 0.0],
[0.0, 0.0, -1.0],
];
block_lo.htbeta = array![[1.0_f64], [0.0], [0.5]];
block_lo.gt = array![0.0_f64, 0.0, 0.0];
let mut block_hi = block_lo.clone();
block_hi.htt[[1, 1]] = near_floor_hi;
let lo = factor_spectral_deflated_evidence_row(&block_lo, d)
.expect("indefinite block must spectrally deflate (lo iterate)");
let hi = factor_spectral_deflated_evidence_row(&block_hi, d)
.expect("indefinite block must spectrally deflate (hi iterate)");
assert_eq!(
lo.gauge_deflated_directions, 1,
"lo iterate: only the genuine indefinite direction is deflated"
);
assert_eq!(
hi.gauge_deflated_directions, lo.gauge_deflated_directions,
"deflation count must be STABLE across an eigenvalue straddling the \
bare cutoff — the quotient-dimension guard must not trip mid-walk"
);
let bare_count = |lambda: f64| -> usize {
let mut c = 0usize;
for &l in &[4.0_f64, lambda, -1.0] {
if !(l.is_finite() && l > floor) {
c += 1;
}
}
c
};
assert_ne!(
bare_count(near_floor_lo),
bare_count(near_floor_hi),
"test must straddle the bare cutoff (else it proves nothing): the \
un-banded decision flips the count, the banded one does not"
);
}
#[test]
pub(crate) fn sys_htbeta_materialize_row_sums_operator_and_dense_slab() {
let mut sys = ArrowSchurSystem::new(1, 1, 3);
sys.rows[0].htbeta = array![[0.25_f64, 0.5, 0.75]];
sys.activate_dense_htbeta_supplement();
sys.set_row_htbeta_operator(
|row_idx, x, out| {
assert_eq!(row_idx, 0);
out[0] += 2.0 * x[0] - x[1] + 0.5 * x[2];
},
|row_idx, v, out| {
assert_eq!(row_idx, 0);
out[0] += 2.0 * v[0];
out[1] -= v[0];
out[2] += 0.5 * v[0];
},
);
let htbeta = sys_htbeta_materialize_row(&sys, 0, &sys.rows[0]).unwrap();
assert_eq!(htbeta, array![[2.25_f64, -0.5, 1.25]]);
}
#[test]
pub(crate) fn lm_escalation_recovers_from_ill_conditioned_row() {
let n = 1;
let d = 2;
let k = 2;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[1.0_f64, 1.0], [1.0, 1.0 + 1e-14]];
sys.rows[0].htbeta = array![[1.0_f64, 0.0], [0.0, 1.0]];
sys.rows[0].gt = array![0.1_f64, -0.2];
sys.hbb = array![[4.0_f64, 0.2], [0.2, 5.0]];
sys.gb = array![0.3_f64, -0.1];
let factor = factor_one_row(&sys.rows[0], 0.0, d, 0, false)
.expect("barely-PD row must be conditioned, not rejected (gam#578)");
let kappa = cholesky_factor_kappa_estimate(&factor);
assert!(
kappa.is_finite() && kappa <= safe_spd_kappa_max(d),
"conditioned per-row factor must satisfy the κ ceiling; got κ={kappa:e}"
);
let options = ArrowSolveOptions::direct();
let (delta_t, delta_beta, diag) = solve_with_lm_escalation_inner(&sys, 0.0, 0.0, &options)
.expect("LM escalation must recover from a barely-PD per-row block");
for v in delta_t.iter().chain(delta_beta.iter()) {
assert!(v.is_finite(), "recovered step must be finite: {v}");
}
assert!(
diag.ridge_escalations <= DEFAULT_PROXIMAL_MAX_ATTEMPTS,
"recovery must use a bounded number of outer ridge escalations; got {}",
diag.ridge_escalations
);
}
#[test]
pub(crate) fn latent_block_inverse_diagonal_matches_dense() {
let n = 3usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
for row in sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.0_f64, 0.0];
let options = ArrowSolveOptions::direct();
let (_delta_t, _delta_beta, cache) =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("direct arrow solve should factor this SPD system");
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = sys.rows[i].htt[[r, c]];
}
}
for r in 0..d {
for c in 0..k {
let v = sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = sys.hbb[[r, c]];
}
}
let l = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let h_inv = cholesky_solve_matrix(&l, &Array2::<f64>::eye(dim));
let diag = cache
.latent_block_inverse_diagonal()
.expect("dense Schur cache must support the selected-inverse diagonal");
assert_eq!(diag.len(), n * d);
for i in 0..n {
for j in 0..d {
let idx = i * d + j; let expected = h_inv[[idx, idx]];
let got = diag[idx];
assert!(
(got - expected).abs() < 1e-9,
"row {i} axis {j}: selected-inverse diag {got} vs dense {expected}"
);
}
}
let trace_selected: f64 = diag.iter().sum();
let trace_dense: f64 = (0..n * d).map(|idx| h_inv[[idx, idx]]).sum();
assert!(
(trace_selected - trace_dense).abs() < 1e-9,
"full latent trace {trace_selected} vs dense {trace_dense}"
);
}
#[test]
pub(crate) fn full_inverse_apply_matches_dense_inverse_and_newton_step() {
let n = 3usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[0].gt = array![0.4_f64, -0.7];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[1].gt = array![-0.2_f64, 0.9];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
sys.rows[2].gt = array![1.1_f64, 0.3];
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.5_f64, -0.8];
let options = ArrowSolveOptions::direct();
let (delta_t, delta_beta, cache) =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("direct arrow solve should factor this SPD system");
let mut g_t = Array1::<f64>::zeros(n * d);
for i in 0..n {
for j in 0..d {
g_t[i * d + j] = sys.rows[i].gt[j];
}
}
let (u_t, u_beta) = cache
.full_inverse_apply(g_t.view(), sys.gb.view())
.expect("full_inverse_apply on the ridge-0 Direct cache");
for idx in 0..n * d {
assert!(
(u_t[idx] + delta_t[idx]).abs() < 1e-10,
"t[{idx}]: full_inverse_apply {} vs −(Newton step) {}",
u_t[idx],
-delta_t[idx]
);
}
for c in 0..k {
assert!(
(u_beta[c] + delta_beta[c]).abs() < 1e-10,
"beta[{c}]: full_inverse_apply {} vs −(Newton step) {}",
u_beta[c],
-delta_beta[c]
);
}
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = sys.rows[i].htt[[r, c]];
}
for c in 0..k {
let v = sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = sys.hbb[[r, c]];
}
}
let l = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let mut w_full = Array1::<f64>::zeros(dim);
for (idx, v) in w_full.iter_mut().enumerate() {
*v = 0.3 + 0.17 * (idx as f64) * (if idx % 2 == 0 { 1.0 } else { -1.0 });
}
let dense_u = cholesky_solve_vector(&l, &w_full);
let (u_t2, u_beta2) = cache
.full_inverse_apply(
w_full.slice(ndarray::s![..n * d]),
w_full.slice(ndarray::s![n * d..]),
)
.expect("full_inverse_apply on arbitrary RHS");
for idx in 0..n * d {
assert!(
(u_t2[idx] - dense_u[idx]).abs() < 1e-10,
"t[{idx}]: full_inverse_apply {} vs dense {}",
u_t2[idx],
dense_u[idx]
);
}
for c in 0..k {
assert!(
(u_beta2[c] - dense_u[n * d + c]).abs() < 1e-10,
"beta[{c}]: full_inverse_apply {} vs dense {}",
u_beta2[c],
dense_u[n * d + c]
);
}
}
#[test]
pub(crate) fn schur_inverse_beta_block_matches_dense() {
let n = 3usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
for row in sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.0_f64, 0.0];
let options = ArrowSolveOptions::direct();
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("direct arrow solve should factor this SPD system");
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = sys.rows[i].htt[[r, c]];
}
}
for r in 0..d {
for c in 0..k {
let v = sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = sys.hbb[[r, c]];
}
}
let l = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let dense_log_det: f64 = (0..l.nrows()).map(|i| 2.0 * l[[i, i]].ln()).sum();
let cached_log_det = cache
.joint_hessian_log_det
.expect("direct undamped solve must cache the joint Hessian log-det");
assert!(
(cached_log_det - dense_log_det).abs() < 1.0e-9,
"cached joint Hessian log-det {cached_log_det} vs dense {dense_log_det}"
);
let h_inv = cholesky_solve_matrix(&l, &Array2::<f64>::eye(dim));
let beta_off = n * d;
for col in 0..k {
let mut e = Array1::<f64>::zeros(k);
e[col] = 1.0;
let x = cache
.schur_inverse_apply(e.view())
.expect("dense Schur cache must support schur_inverse_apply");
for r in 0..k {
let expected = h_inv[[beta_off + r, beta_off + col]];
assert!(
(x[r] - expected).abs() < 1e-9,
"S_β⁻¹[{r},{col}] {} vs dense {expected}",
x[r]
);
}
}
let a_scalar = 0.75_f64;
let mut trace = 0.0_f64;
for col in 0..k {
let mut m_col = Array1::<f64>::zeros(k);
m_col[col] = a_scalar;
let z = cache
.schur_inverse_apply(m_col.view())
.expect("schur_inverse_apply");
trace += z[col];
}
let trace_dense: f64 = a_scalar
* (0..k)
.map(|j| h_inv[[beta_off + j, beta_off + j]])
.sum::<f64>();
assert!(
(trace - trace_dense).abs() < 1e-9,
"Kron-block trace {trace} vs dense {trace_dense}"
);
let full = cache
.schur_inverse_block(0..k)
.expect("dense Schur cache must support schur_inverse_block");
assert_eq!(full.dim(), (k, k));
for r in 0..k {
for c in 0..k {
let expected = h_inv[[beta_off + r, beta_off + c]];
assert!(
(full[[r, c]] - expected).abs() < 1e-9,
"block[{r},{c}] {} vs dense {expected}",
full[[r, c]]
);
assert!(
(full[[r, c]] - full[[c, r]]).abs() < 1e-12,
"schur_inverse_block must be symmetric at [{r},{c}]"
);
}
}
let sub = cache
.schur_inverse_block(1..k)
.expect("interior block must be supported");
assert_eq!(sub.dim(), (k - 1, k - 1));
assert!(
(sub[[0, 0]] - h_inv[[beta_off + 1, beta_off + 1]]).abs() < 1e-9,
"interior block [1,1] {} vs dense {}",
sub[[0, 0]],
h_inv[[beta_off + 1, beta_off + 1]]
);
assert!(cache.schur_inverse_block(0..(k + 1)).is_err());
}
#[test]
pub(crate) fn ill_conditioning_tolerated_returns_cache_with_exact_logdet() {
let n = 2usize;
let d = 2usize;
let k = 2usize;
let mut sys = ArrowSchurSystem::new(n, d, k);
sys.rows[0].htt = array![[1.0_f64, 0.0], [0.0, 1e-9]];
sys.rows[0].htbeta = array![[0.3_f64, 0.1], [0.05, 0.2]];
sys.rows[1].htt = array![[2.0_f64, 0.0], [0.0, 2e-9]];
sys.rows[1].htbeta = array![[0.2_f64, -0.1], [0.1, 0.15]];
for row in sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
sys.hbb = array![[5.0_f64, 0.3], [0.3, 4.0]];
sys.gb = array![0.0_f64, 0.0];
for i in 0..n {
let factor = factor_one_row(&sys.rows[i], 0.0, d, i, false)
.expect("barely-PD row must be conditioned, not rejected (gam#578)");
let kappa = cholesky_factor_kappa_estimate(&factor);
assert!(
kappa.is_finite() && kappa <= safe_spd_kappa_max(d),
"conditioned per-row factor {i} must satisfy the safe-Schur κ ceiling; got κ={kappa:e}"
);
}
let single_shot =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &ArrowSolveOptions::direct());
assert!(
matches!(
single_shot,
Err(ArrowSchurError::SchurFactorFailed { .. })
| Err(ArrowSchurError::PerRowFactorIllConditioned { .. })
| Err(ArrowSchurError::PcgFailed { .. })
),
"single-shot strict direct() cannot keep the dense Schur PD with per-row \
conditioning alone; expected a recoverable factorization error, got {single_shot:?}"
);
let (strict_dt, strict_db, strict_diag) =
solve_with_lm_escalation_inner(&sys, 0.0, 0.0, &ArrowSolveOptions::direct())
.expect("LM escalation must recover the ill-conditioned strict solve (gam#845)");
for v in strict_dt.iter().chain(strict_db.iter()) {
assert!(v.is_finite(), "recovered strict step must be finite: {v}");
}
assert!(
strict_diag.ridge_escalations <= DEFAULT_PROXIMAL_MAX_ATTEMPTS,
"recovery must use a bounded number of outer ridge escalations; got {}",
strict_diag.ridge_escalations
);
let opts = ArrowSolveOptions::direct().with_ill_conditioning_tolerated();
let tolerate_indefinite = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &opts);
assert!(
matches!(
tolerate_indefinite,
Err(ArrowSchurError::SchurFactorFailed { .. })
),
"tolerate mode must refuse the indefinite assembled H rather than fabricate \
a log-determinant; got {tolerate_indefinite:?}"
);
let mut pd_sys = ArrowSchurSystem::new(n, d, k);
pd_sys.rows[0].htt = array![[1.0_f64, 0.0], [0.0, 1e-9]];
pd_sys.rows[0].htbeta = array![[0.3_f64, 0.1], [3e-6, 1e-6]];
pd_sys.rows[1].htt = array![[2.0_f64, 0.0], [0.0, 2e-9]];
pd_sys.rows[1].htbeta = array![[0.2_f64, -0.1], [2e-6, 4e-6]];
for row in pd_sys.rows.iter_mut() {
row.gt = array![0.0_f64, 0.0];
}
pd_sys.hbb = array![[5.0_f64, 0.3], [0.3, 4.0]];
pd_sys.gb = array![0.0_f64, 0.0];
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&pd_sys, 0.0, 0.0, &opts)
.expect("tolerate mode must factor the ill-conditioned-but-PD system");
let (log_det_tt, log_det_schur) = cache.arrow_log_det();
let log_det_cache = log_det_tt + log_det_schur.expect("dense Schur factor present");
let dim = n * d + k;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for r in 0..d {
for c in 0..d {
h[[base + r, base + c]] = pd_sys.rows[i].htt[[r, c]];
}
}
for r in 0..d {
for c in 0..k {
let v = pd_sys.rows[i].htbeta[[r, c]];
h[[base + r, n * d + c]] = v;
h[[n * d + c, base + r]] = v;
}
}
}
for r in 0..k {
for c in 0..k {
h[[n * d + r, n * d + c]] = pd_sys.hbb[[r, c]];
}
}
let lh = cholesky_lower(&h).expect("assembled bordered H must be SPD");
let log_det_dense: f64 = 2.0 * (0..dim).map(|i| lh[[i, i]].ln()).sum::<f64>();
assert!(
(log_det_cache - log_det_dense).abs() < 1e-6,
"tolerated-cache log|H| {log_det_cache} vs dense {log_det_dense}"
);
let tdiag = cache
.latent_block_inverse_diagonal()
.expect("tolerated cache must support latent_block_inverse_diagonal");
assert_eq!(tdiag.len(), n * d);
assert!(tdiag.iter().all(|v| v.is_finite()));
}
#[test]
pub(crate) fn arrow_factor_slab_accessor_matches_array_blocks_bitwise() {
let blocks = vec![
array![[1.0_f64]],
array![[2.0_f64, 0.0], [0.25, 3.0]],
array![[4.0_f64, 0.0, 0.0], [0.5, 5.0, 0.0], [-0.25, 0.75, 6.0]],
];
let slab = ArrowFactorSlab::from_blocks(blocks.clone());
assert_eq!(slab.len(), blocks.len());
for row in 0..blocks.len() {
let view = slab.factor(row);
assert_eq!(view.dim(), blocks[row].dim());
for r in 0..blocks[row].nrows() {
for c in 0..blocks[row].ncols() {
assert_eq!(view[[r, c]].to_bits(), blocks[row][[r, c]].to_bits());
}
}
}
}
pub(crate) fn fixed_row_kernel_fixture<const D: usize>() -> (ArrowRowBlock, Array1<f64>) {
let mut row = ArrowRowBlock::new(D, 0);
for r in 0..D {
for c in 0..D {
row.htt[[r, c]] = if r == c {
4.0 + r as f64
} else {
0.03125 * ((r + c + 1) as f64)
};
}
}
let rhs = Array1::from_iter((0..D).map(|i| 0.5 + i as f64 * 0.25));
(row, rhs)
}
pub(crate) fn assert_fixed_row_kernels_match_dynamic<const D: usize>() -> usize {
let (row, rhs) = fixed_row_kernel_fixture::<D>();
let ridge = 0.125_f64;
let fixed = factor_row_block_cholesky_fixed::<D>(&row, ridge).expect("fixed factor");
let dynamic = factor_row_block_cholesky_dynamic(&row, ridge, D).expect("dynamic factor");
for r in 0..D {
for c in 0..D {
assert_eq!(
fixed[[r, c]].to_bits(),
dynamic[[r, c]].to_bits(),
"factor mismatch at D={D} ({r},{c})"
);
}
}
let fixed_solve = cholesky_solve_vector_fixed::<D>(fixed.view(), rhs.view());
let dynamic_solve = cholesky_solve_vector(dynamic.view(), rhs.view());
for i in 0..D {
assert_eq!(
fixed_solve[i].to_bits(),
dynamic_solve[i].to_bits(),
"solve mismatch at D={D} index {i}"
);
}
D
}
#[test]
pub(crate) fn fixed_row_kernels_match_dynamic_path_bitwise() {
let checked = assert_fixed_row_kernels_match_dynamic::<1>()
+ assert_fixed_row_kernels_match_dynamic::<2>()
+ assert_fixed_row_kernels_match_dynamic::<3>()
+ assert_fixed_row_kernels_match_dynamic::<4>();
assert_eq!(checked, 10);
}
pub(crate) fn dense_direct_system(n: usize, d: usize, k: usize) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(n, d, k);
for (i, row) in sys.rows.iter_mut().enumerate() {
for r in 0..d {
for c in 0..d {
row.htt[[r, c]] = if r == c { 4.0 + (i % 3) as f64 } else { 0.1 };
}
row.gt[r] = 0.05 * ((i + r + 1) as f64).sin();
for c in 0..k {
row.htbeta[[r, c]] = 0.01 * (((i + 1) * (c + 1)) as f64).cos();
}
}
}
for r in 0..k {
sys.gb[r] = 0.02 * ((r + 1) as f64).cos();
for c in 0..k {
sys.hbb[[r, c]] = if r == c { 6.0 } else { 0.0 };
}
}
sys.refresh_row_hessian_fingerprint();
sys
}
#[test]
pub(crate) fn device_dispatch_predicate_gates_on_work_not_rows() {
let policy = crate::gpu::policy::GpuDispatchPolicy::default();
assert!(!policy.dense_hessian_work_target_is_gpu(300, 8));
assert!(policy.dense_hessian_work_target_is_gpu(2_000, 4_096));
}
#[test]
pub(crate) fn matvec_gate_engages_for_llm_shape_off_for_tiny() {
let policy = crate::gpu::policy::GpuDispatchPolicy::default();
let options = ArrowSolveOptions::inexact_pcg();
let cg_iters = options
.pcg
.max_iterations
.min(options.trust_region.max_iterations);
assert!(cg_iters > 0);
let (n_llm, k_llm, d_llm) = (2_000_usize, 2_048_usize, 8_usize);
assert!(policy.reduced_schur_matvec_should_offload(n_llm, k_llm, d_llm, cg_iters));
assert!(!policy.reduced_schur_matvec_should_offload(30, 8, 2, cg_iters));
assert!(!policy.reduced_schur_matvec_should_offload(300, 8, 4, cg_iters));
}
#[test]
pub(crate) fn matrix_free_sae_gate_uses_work_predicate_not_dense_floor() {
let policy = crate::gpu::policy::GpuDispatchPolicy::default();
let (n, k, d) = (1_024_usize, 1_024_usize, 8_usize);
let cg_iters = 8usize;
assert!(policy.reduced_schur_matvec_should_offload(n, k, d, cg_iters));
assert!(!policy.reduced_schur_matvec_should_offload(1_000_000, 16, 64, 64));
}
#[test]
pub(crate) fn device_seam_declines_without_gpu_and_matches_cpu() {
if crate::gpu::device_runtime::GpuRuntime::global().is_some() {
return;
}
let sys = dense_direct_system(6, 2, 4);
let options = ArrowSolveOptions::direct();
assert!(try_device_arrow_direct(&sys, 0.0, 0.0, &options).is_none());
assert!(maybe_inject_gpu_schur_matvec(&sys, 0.0, 0.0, &options).is_none());
let (dt_core, db_core, diag) =
solve_arrow_newton_step_core(&sys, 0.0, 0.0, &options).expect("core solve");
assert!(
!diag.used_device_arrow,
"no device present, so the solve must not be flagged device-served"
);
assert!(
!diag.injected_host_procedural_matvec,
"no backend injected, so the host-procedural-matvec flag must stay clear (#1209)"
);
let artifacts =
solve_arrow_newton_step_artifacts(&sys, 0.0, 0.0, &options).expect("artifacts solve");
for (a, b) in dt_core.iter().zip(artifacts.delta_t.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "Δt must be bit-identical to CPU");
}
for (a, b) in db_core.iter().zip(artifacts.delta_beta.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "Δβ must be bit-identical to CPU");
}
}
#[test]
pub(crate) fn streaming_mixed_precision_matches_f64_and_keeps_logdet_f64() {
let sys = dense_direct_system(40, 3, 6);
let f64_options = ArrowSolveOptions::direct().with_streaming_chunk_size(Some(8));
let mp_options = f64_options
.clone()
.with_solve_precision_policy(ArrowSolvePrecisionPolicy::certified_mixed());
assert!(matches!(
f64_options.solve_precision,
ArrowSolvePrecisionPolicy::F64Only
));
let mut s_f64 = StreamingArrowSchur::from_system(&sys, 8);
let (_, db_f64, _) = s_f64
.solve(0.0, 0.0, &f64_options)
.expect("f64 streaming solve");
let mut s_mp = StreamingArrowSchur::from_system(&sys, 8);
let (_, db_mp, _) = s_mp
.solve(0.0, 0.0, &mp_options)
.expect("mp streaming solve");
let mut max_abs = 0.0_f64;
for (a, b) in db_f64.iter().zip(db_mp.iter()) {
max_abs = max_abs.max((a - b).abs());
}
assert!(
max_abs < 1e-7,
"mixed-precision Δβ deviates from f64 by {max_abs:e}, above the certified tolerance"
);
let mut ld_f64 = StreamingArrowSchur::from_system(&sys, 8);
let logdet_f64 = ld_f64
.exact_arrow_log_det(0.0, 0.0, &f64_options)
.expect("f64 logdet");
let mut ld_mp = StreamingArrowSchur::from_system(&sys, 8);
let logdet_mp = ld_mp
.exact_arrow_log_det(0.0, 0.0, &mp_options)
.expect("mp logdet");
assert_eq!(
logdet_f64.to_bits(),
logdet_mp.to_bits(),
"evidence log|H| must stay bit-for-bit f64 under the mixed-precision policy"
);
}
#[test]
pub(crate) fn streaming_mixed_precision_default_upgrades_only_off() {
let off = ArrowSolveOptions::direct();
assert!(matches!(
off.with_streaming_solve_precision_default().solve_precision,
ArrowSolvePrecisionPolicy::CertifiedMixed { .. }
));
let pinned =
ArrowSolveOptions::direct().with_solve_precision_policy(ArrowSolvePrecisionPolicy::F64Only);
let custom = ArrowSolveOptions::direct().with_solve_precision_policy(
ArrowSolvePrecisionPolicy::CertifiedMixed {
max_refinement_steps: 1,
residual_relative_tolerance: 1e-6,
kappa_unit_roundoff_margin: 0.25,
},
);
match custom
.with_streaming_solve_precision_default()
.solve_precision
{
ArrowSolvePrecisionPolicy::CertifiedMixed {
max_refinement_steps,
..
} => assert_eq!(max_refinement_steps, 1, "explicit policy preserved"),
ArrowSolvePrecisionPolicy::F64Only => {
panic!("explicit CertifiedMixed must not be downgraded")
}
}
assert!(matches!(
pinned.solve_precision,
ArrowSolvePrecisionPolicy::F64Only
));
}
pub(crate) fn build_ibp_woodbury_fixture() -> (ArrowSchurSystem, IbpCrossRowSource, Vec<Vec<f64>>) {
let n = 3usize;
let d = 2usize;
let k_beta = 2usize;
let r = 2usize; let mut sys = ArrowSchurSystem::new(n, d, k_beta);
sys.rows[0].htt = array![[4.0_f64, 0.5], [0.5, 3.0]];
sys.rows[0].htbeta = array![[1.0_f64, 0.2], [-0.3, 0.7]];
sys.rows[0].gt = array![0.4_f64, -0.7];
sys.rows[1].htt = array![[5.0_f64, -0.4], [-0.4, 2.5]];
sys.rows[1].htbeta = array![[0.6_f64, -0.1], [0.4, 0.9]];
sys.rows[1].gt = array![-0.2_f64, 0.9];
sys.rows[2].htt = array![[3.5_f64, 0.2], [0.2, 4.5]];
sys.rows[2].htbeta = array![[-0.2_f64, 0.5], [0.8, -0.6]];
sys.rows[2].gt = array![1.1_f64, 0.3];
sys.hbb = array![[12.0_f64, 0.7], [0.7, 10.0]];
sys.gb = array![0.5_f64, -0.8];
let d_coef = array![0.6_f64, -0.35];
let zprime = vec![
vec![0.9_f64, 0.4], vec![-0.5, 0.8], vec![0.7, -0.6], ];
let mut entries = Vec::new();
for i in 0..n {
for k in 0..r {
let g = sys.row_offsets[i] + k;
entries.push((g, k, zprime[i][k]));
}
}
for i in 0..n {
for k in 0..r {
sys.rows[i].htt[[k, k]] += d_coef[k] * zprime[i][k] * zprime[i][k];
}
}
let source = IbpCrossRowSource {
r,
d: d_coef,
entries,
};
(sys, source, zprime)
}
pub(crate) fn dense_h_full(
sys: &ArrowSchurSystem,
source: &IbpCrossRowSource,
zprime: &[Vec<f64>],
) -> Array2<f64> {
let n = sys.rows.len();
let d = sys.d;
let k_beta = sys.k;
let dim = n * d + k_beta;
let mut h = Array2::<f64>::zeros((dim, dim));
for i in 0..n {
let base = i * d;
for rr in 0..d {
for cc in 0..d {
h[[base + rr, base + cc]] = sys.rows[i].htt[[rr, cc]];
}
for cc in 0..k_beta {
let v = sys.rows[i].htbeta[[rr, cc]];
h[[base + rr, n * d + cc]] = v;
h[[n * d + cc, base + rr]] = v;
}
}
}
for rr in 0..k_beta {
for cc in 0..k_beta {
h[[n * d + rr, n * d + cc]] = sys.hbb[[rr, cc]];
}
}
for k in 0..source.r {
let dk = source.d[k];
for i in 0..n {
for j in 0..n {
if i == j {
continue;
}
let gi = i * d + k;
let gj = j * d + k;
h[[gi, gj]] += dk * zprime[i][k] * zprime[j][k];
}
}
}
h
}
#[test]
pub(crate) fn ibp_cross_row_woodbury_logdet_matches_dense() {
let (mut sys, source, zprime) = build_ibp_woodbury_fixture();
sys.set_ibp_cross_row_source(source.clone());
let options = ArrowSolveOptions::direct();
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("IBP Woodbury cache should factor");
assert!(
cache.cross_row_woodbury.is_some(),
"the cache must carry the cross-row Woodbury"
);
let h_full = dense_h_full(&sys, &source, &zprime);
let l = cholesky_lower(&h_full).expect("H_full must be SPD for this fixture");
let mut dense_logdet = 0.0_f64;
for i in 0..l.nrows() {
dense_logdet += 2.0 * l[[i, i]].ln();
}
let (tt, schur) = cache.arrow_log_det();
let cache_logdet = tt + schur.expect("direct mode has a Schur factor");
assert!(
(cache_logdet - dense_logdet).abs() < 1e-9,
"cache log det H_full {cache_logdet} vs dense {dense_logdet}"
);
let mut h0prime = h_full.clone();
for k in 0..source.r {
for i in 0..sys.rows.len() {
let gi = i * sys.d + k;
for j in 0..sys.rows.len() {
let gj = j * sys.d + k;
h0prime[[gi, gj]] -= source.d[k] * zprime[i][k] * zprime[j][k];
}
}
}
let l0 = cholesky_lower(&h0prime).expect("H₀' SPD");
let mut logdet_h0prime = 0.0_f64;
for i in 0..l0.nrows() {
logdet_h0prime += 2.0 * l0[[i, i]].ln();
}
let correction = cache.cross_row_woodbury_log_det();
assert!(
(correction - (dense_logdet - logdet_h0prime)).abs() < 1e-9,
"Woodbury log det correction {correction} vs (logdet H_full − logdet H₀') {}",
dense_logdet - logdet_h0prime
);
}
#[test]
pub(crate) fn ibp_cross_row_woodbury_full_inverse_and_newton_match_dense() {
let (mut sys, source, zprime) = build_ibp_woodbury_fixture();
sys.set_ibp_cross_row_source(source.clone());
let options = ArrowSolveOptions::direct();
let (delta_t, delta_beta, cache) =
solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("IBP Woodbury cache should factor");
let n = sys.rows.len();
let d = sys.d;
let k_beta = sys.k;
let dim = n * d + k_beta;
let h_full = dense_h_full(&sys, &source, &zprime);
let l = cholesky_lower(&h_full).expect("H_full SPD");
let mut g = Array1::<f64>::zeros(dim);
for i in 0..n {
for j in 0..d {
g[i * d + j] = sys.rows[i].gt[j];
}
}
for c in 0..k_beta {
g[n * d + c] = sys.gb[c];
}
let dense_step = cholesky_solve_vector(&l, &g); for idx in 0..n * d {
assert!(
(delta_t[idx] + dense_step[idx]).abs() < 1e-9,
"Δt[{idx}] {} vs −H_full⁻¹g {}",
delta_t[idx],
-dense_step[idx]
);
}
for c in 0..k_beta {
assert!(
(delta_beta[c] + dense_step[n * d + c]).abs() < 1e-9,
"Δβ[{c}] {} vs −H_full⁻¹g {}",
delta_beta[c],
-dense_step[n * d + c]
);
}
let mut w_full = Array1::<f64>::zeros(dim);
for (idx, v) in w_full.iter_mut().enumerate() {
*v = 0.25 + 0.13 * (idx as f64) * (if idx % 2 == 0 { 1.0 } else { -1.0 });
}
let dense_u = cholesky_solve_vector(&l, &w_full);
let (u_t, u_beta) = cache
.full_inverse_apply(
w_full.slice(ndarray::s![..n * d]),
w_full.slice(ndarray::s![n * d..]),
)
.expect("full_inverse_apply on the Woodbury cache");
for idx in 0..n * d {
assert!(
(u_t[idx] - dense_u[idx]).abs() < 1e-9,
"H_full⁻¹w t[{idx}] {} vs dense {}",
u_t[idx],
dense_u[idx]
);
}
for c in 0..k_beta {
assert!(
(u_beta[c] - dense_u[n * d + c]).abs() < 1e-9,
"H_full⁻¹w beta[{c}] {} vs dense {}",
u_beta[c],
dense_u[n * d + c]
);
}
let mut h_full_inv = Array2::<f64>::zeros((dim, dim));
let mut e = Array1::<f64>::zeros(dim);
for col in 0..dim {
e.fill(0.0);
e[col] = 1.0;
let sol = cholesky_solve_vector(&l, &e);
for rrow in 0..dim {
h_full_inv[[rrow, col]] = sol[rrow];
}
}
let diag = cache
.latent_block_inverse_diagonal()
.expect("latent_block_inverse_diagonal on the Woodbury cache");
for idx in 0..n * d {
assert!(
(diag[idx] - h_full_inv[[idx, idx]]).abs() < 1e-9,
"diag (H_full⁻¹)_tt[{idx}] {} vs dense {}",
diag[idx],
h_full_inv[[idx, idx]]
);
}
}
#[test]
pub(crate) fn ibp_cross_row_woodbury_absent_is_strict_noop() {
let (sys, _source, zprime) = build_ibp_woodbury_fixture();
let options = ArrowSolveOptions::direct();
let (_dt, _db, cache) = solve_arrow_newton_step_with_options(&sys, 0.0, 0.0, &options)
.expect("bare cache should factor");
assert!(
cache.cross_row_woodbury.is_none(),
"no source ⇒ no Woodbury carrier"
);
assert_eq!(
cache.cross_row_woodbury_log_det(),
0.0,
"absent Woodbury contributes exactly zero to the log-determinant"
);
let dim = sys.rows.len() * sys.d + sys.k;
let mut h0 = Array2::<f64>::zeros((dim, dim));
let n = sys.rows.len();
let d = sys.d;
for i in 0..n {
let base = i * d;
for rr in 0..d {
for cc in 0..d {
h0[[base + rr, base + cc]] = sys.rows[i].htt[[rr, cc]];
}
for cc in 0..sys.k {
let v = sys.rows[i].htbeta[[rr, cc]];
h0[[base + rr, n * d + cc]] = v;
h0[[n * d + cc, base + rr]] = v;
}
}
}
for rr in 0..sys.k {
for cc in 0..sys.k {
h0[[n * d + rr, n * d + cc]] = sys.hbb[[rr, cc]];
}
}
let l = cholesky_lower(&h0).expect("H₀ SPD");
let mut dense_logdet = 0.0_f64;
for i in 0..l.nrows() {
dense_logdet += 2.0 * l[[i, i]].ln();
}
let (tt, schur) = cache.arrow_log_det();
let cache_logdet = tt + schur.expect("direct Schur");
assert!(
(cache_logdet - dense_logdet).abs() < 1e-9,
"bare cache log det {cache_logdet} vs dense H₀ {dense_logdet}"
);
assert_eq!(zprime.len(), n);
}
#[test]
pub(crate) fn ibp_cross_row_streaming_logdet_refuses() {
let (mut sys, source, _zprime) = build_ibp_woodbury_fixture();
sys.set_ibp_cross_row_source(source);
let mut streaming = StreamingArrowSchur::from_system(&sys, 2);
let options = ArrowSolveOptions::direct();
let err = streaming.reduced_schur_and_log_det_tt(0.0, 0.0, &options);
assert!(
err.is_err(),
"streaming arrow log-det must refuse an IBP-active system"
);
}
pub(crate) fn dense_arrow_system(n: usize, d: usize, k: usize) -> ArrowSchurSystem {
let mut sys = ArrowSchurSystem::new(n, d, k);
for i in 0..n {
let mut htt = Array2::<f64>::zeros((d, d));
for r in 0..d {
for c in 0..d {
let s = ((i + 1) * (r + 2) * (c + 3)) as f64;
htt[[r, c]] = if r == c {
4.0 + (s % 7.0)
} else {
0.1 * ((s % 5.0) - 2.0)
};
}
}
let mut sym = &htt + &htt.t();
for r in 0..d {
sym[[r, r]] = sym[[r, r]].abs() + (d as f64) + 2.0;
}
sys.rows[i].htt = sym;
let mut htb = Array2::<f64>::zeros((d, k));
for r in 0..d {
for c in 0..k {
let s = ((i + 1) * (r + 1) + 3 * (c + 1)) as f64;
htb[[r, c]] = 0.05 * ((s % 11.0) - 5.0);
}
}
sys.rows[i].htbeta = htb;
sys.rows[i].gt = Array1::<f64>::zeros(d);
}
let mut hbb = Array2::<f64>::zeros((k, k));
for r in 0..k {
for c in 0..k {
let s = ((r + 1) * (c + 1)) as f64;
hbb[[r, c]] = if r == c {
(k as f64) + 6.0 + (s % 3.0)
} else {
0.02 * ((s % 7.0) - 3.0)
};
}
}
sys.hbb = hbb;
sys.gb = Array1::<f64>::zeros(k);
sys
}
pub(crate) fn schur_matvec_sequential_ref<B: BatchedBlockSolver>(
sys: &ArrowSchurSystem,
htt_factors: &ArrowFactorSlab,
ridge_beta: f64,
x: &Array1<f64>,
backend: &B,
) -> Array1<f64> {
let k = sys.k;
let mut out = Array1::<f64>::zeros(k);
{
let xs = x.as_slice().unwrap();
let os = out.as_slice_mut().unwrap();
sys.penalty_matvec_add(xs, os);
for a in 0..k {
os[a] += ridge_beta * xs[a];
}
}
let mut local = Array1::<f64>::zeros(sys.d);
let mut neg = Array1::<f64>::zeros(k);
for i in 0..sys.rows.len() {
neg.fill(0.0);
schur_matvec_row_into(sys, htt_factors, x, backend, i, &mut local, &mut neg);
for a in 0..k {
out[a] -= neg[a];
}
}
out
}
#[test]
pub(crate) fn parallel_schur_matvec_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 6usize;
let k = 96usize;
let sys = dense_arrow_system(n, d, k);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let x = Array1::from_iter((0..k).map(|a| 0.3 * (a as f64).sin() - 0.1));
let mut out_a = Array1::<f64>::zeros(k);
let mut out_b = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_a,
&backend,
None,
);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_b,
&backend,
None,
);
for a in 0..k {
assert_eq!(
out_a[a].to_bits(),
out_b[a].to_bits(),
"parallel Schur matvec must be deterministic run-to-run at index {a}"
);
}
let out_seq = schur_matvec_sequential_ref(&sys, &htt_factors, ridge_beta, &x, &backend);
let scale = out_seq
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_a[a] - out_seq[a]).abs() / scale;
assert!(
rel < 1e-12,
"parallel vs sequential Schur matvec must agree to reassociation error \
at index {a}: {} vs {} (rel {rel:e})",
out_a[a],
out_seq[a]
);
}
}
#[test]
pub(crate) fn parallel_dense_schur_reduction_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 5usize;
let k = 48usize;
let sys = dense_direct_system(n, d, k);
let backend = CpuBatchedBlockSolver;
let ridge_beta = 1e-6;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
for kind in [SchurReductionKind::Direct, SchurReductionKind::SqrtBa] {
let seed = || {
let mut s = sys.effective_penalty_op().to_dense();
for j in 0..k {
s[[j, j]] += ridge_beta;
}
s
};
let mut s_a = seed();
reduce_row_schur_contributions(&sys, &htt_factors, &backend, kind, &mut s_a)
.expect("parallel reduction a");
let mut s_b = seed();
reduce_row_schur_contributions(&sys, &htt_factors, &backend, kind, &mut s_b)
.expect("parallel reduction b");
for a in 0..k {
for b in 0..k {
assert_eq!(
s_a[[a, b]].to_bits(),
s_b[[a, b]].to_bits(),
"{kind:?}: parallel dense-Schur reduction must be deterministic \
run-to-run at ({a},{b})"
);
}
}
let mut s_ser = seed();
for (i, row) in sys.rows.iter().enumerate() {
subtract_row_schur_contribution(
&sys,
i,
row,
htt_factors.factor(i),
&backend,
kind,
&mut s_ser,
)
.expect("serial per-row reduction");
}
let scale = s_ser.iter().fold(0.0_f64, |m, &v| m.max(v.abs())).max(1.0);
let mut max_rel = 0.0_f64;
for a in 0..k {
for b in 0..k {
max_rel = max_rel.max((s_a[[a, b]] - s_ser[[a, b]]).abs() / scale);
}
}
assert!(
max_rel < 1e-12,
"{kind:?}: parallel vs serial dense-Schur reduction must agree to \
reassociation error (rel {max_rel:e})"
);
}
}
#[test]
pub(crate) fn cluster_jacobi_build_deterministic_and_matches_serial() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 5usize;
let k = 48usize; let sys = dense_direct_system(n, d, k);
let backend = CpuBatchedBlockSolver;
let ridge_beta = 1e-6;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
let cols: Vec<usize> = (0..k).collect();
let col_groups = vec![cols.clone()];
let r: Array1<f64> =
Array1::from_iter((0..k).map(|j| 0.1 * ((j + 1) as f64).sin() - 0.03 * j as f64));
let p_a = ClusterJacobiPreconditioner::build_from_column_groups(
&sys,
&htt_factors,
ridge_beta,
&backend,
&col_groups,
)
.expect("cluster build a");
let p_b = ClusterJacobiPreconditioner::build_from_column_groups(
&sys,
&htt_factors,
ridge_beta,
&backend,
&col_groups,
)
.expect("cluster build b");
let out_a = p_a.apply(&r);
let out_b = p_b.apply(&r);
for j in 0..k {
assert_eq!(
out_a[j].to_bits(),
out_b[j].to_bits(),
"cluster-Jacobi build must be deterministic run-to-run at {j}"
);
}
let b = k;
let mut s_ref = Array2::<f64>::zeros((b, b));
sys.penalty_subblock_add(&cols, &mut s_ref);
for bi in 0..b {
s_ref[[bi, bi]] += ridge_beta;
}
let mut col_vec = Array1::<f64>::zeros(d);
let mut solved_cols = Array2::<f64>::zeros((d, b));
for (row_idx, row) in sys.rows.iter().enumerate() {
for bj in 0..b {
let gj = cols[bj];
for c in 0..d {
col_vec[c] = row.htbeta[[c, gj]];
}
let solved = backend.solve_block_vector(htt_factors.factor(row_idx), col_vec.view());
for c in 0..d {
solved_cols[[c, bj]] = solved[c];
}
}
for bi in 0..b {
let gi = cols[bi];
for bj in 0..b {
let mut acc = 0.0;
for c in 0..d {
acc += row.htbeta[[c, gi]] * solved_cols[[c, bj]];
}
s_ref[[bi, bj]] -= acc;
}
}
}
for i in 0..b {
for j in 0..i {
let v = 0.5 * (s_ref[[i, j]] + s_ref[[j, i]]);
s_ref[[i, j]] = v;
s_ref[[j, i]] = v;
}
}
let llt = {
use faer::Side;
let view = FaerArrayView::new(&s_ref);
FaerLlt::new(view.as_ref(), Side::Lower).expect("reference Schur block must be PD")
};
let solved_ref = {
use faer::linalg::solvers::Solve;
let mut rhs = r.clone();
let stride = rhs.strides()[0];
let len = rhs.len();
let rhs_mat = unsafe { faer::MatRef::from_raw_parts(rhs.as_mut_ptr(), len, 1, stride, 0) };
let s = llt.solve(rhs_mat);
Array1::from_iter((0..b).map(|i| s[(i, 0)]))
};
let scale = solved_ref
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
let mut max_rel = 0.0_f64;
for j in 0..k {
max_rel = max_rel.max((out_a[j] - solved_ref[j]).abs() / scale);
}
assert!(
max_rel < 1e-12,
"parallel cluster-Jacobi apply must match the serial row-order reference \
to reassociation error (rel {max_rel:e})"
);
}
pub(crate) fn cross_row_matvec_sequential_ref(
sys: &ArrowSchurSystem,
ridge_t: f64,
ridge_beta: f64,
x_t: ArrayView1<'_, f64>,
x_beta: ArrayView1<'_, f64>,
) -> (Array1<f64>, Array1<f64>) {
let n = sys.rows.len();
let k = sys.k;
let total_dt = sys.row_offsets[n];
let mut y_t = Array1::<f64>::zeros(total_dt);
let mut y_beta = Array1::<f64>::zeros(k);
for i in 0..n {
let di = sys.row_dims[i];
let base = sys.row_offsets[i];
let row = &sys.rows[i];
for a in 0..di {
let mut acc = ridge_t * x_t[base + a];
for b in 0..di {
acc += row.htt[[a, b]] * x_t[base + b];
}
y_t[base + a] = acc;
}
let mut slab = Array1::<f64>::zeros(di);
sys_htbeta_apply_row(sys, i, row, x_beta, &mut slab);
for c in 0..di {
y_t[base + c] += slab[c];
}
let x_ti = x_t.slice(ndarray::s![base..base + di]).to_owned();
sys_htbeta_accumulate_transpose(sys, i, row, x_ti.view(), &mut y_beta);
}
{
let x_beta_slice = x_beta.as_slice().expect("x_beta contiguous");
let y_beta_slice = y_beta.as_slice_mut().expect("y_beta contiguous");
sys.penalty_matvec_add(x_beta_slice, y_beta_slice);
}
for a in 0..k {
y_beta[a] += ridge_beta * x_beta[a];
}
sys.apply_cross_row_penalty_hessian(x_t, &mut y_t);
(y_t, y_beta)
}
#[test]
pub(crate) fn parallel_cross_row_matvec_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 96; let d = 5usize;
let k = 80usize;
let sys = dense_arrow_system(n, d, k);
let total_dt = sys.row_offsets[n];
let ridge_t = 1e-5;
let ridge_beta = 1e-6;
let x_t = Array1::from_iter((0..total_dt).map(|i| 0.2 * (i as f64).cos() + 0.05));
let x_beta = Array1::from_iter((0..k).map(|a| 0.3 * (a as f64).sin() - 0.1));
let (yt_a, yb_a) = arrow_cross_row_matvec(&sys, ridge_t, ridge_beta, x_t.view(), x_beta.view());
let (yt_b, yb_b) = arrow_cross_row_matvec(&sys, ridge_t, ridge_beta, x_t.view(), x_beta.view());
for i in 0..total_dt {
assert_eq!(
yt_a[i].to_bits(),
yt_b[i].to_bits(),
"parallel cross-row matvec y_t must be deterministic at {i}"
);
}
for a in 0..k {
assert_eq!(
yb_a[a].to_bits(),
yb_b[a].to_bits(),
"parallel cross-row matvec y_beta must be deterministic at {a}"
);
}
let (yt_seq, yb_seq) =
cross_row_matvec_sequential_ref(&sys, ridge_t, ridge_beta, x_t.view(), x_beta.view());
for i in 0..total_dt {
assert_eq!(
yt_a[i].to_bits(),
yt_seq[i].to_bits(),
"parallel cross-row matvec y_t must match the sequential fold bit-for-bit at {i}"
);
}
let scale = yb_seq.iter().fold(0.0_f64, |m, &v| m.max(v.abs())).max(1.0);
for a in 0..k {
let rel = (yb_a[a] - yb_seq[a]).abs() / scale;
assert!(
rel < 1e-12,
"parallel vs sequential cross-row matvec y_beta must agree to reassociation \
error at {a}: {} vs {} (rel {rel:e})",
yb_a[a],
yb_seq[a]
);
}
}
#[test]
pub(crate) fn parallel_block_diag_inverse_apply_deterministic_and_solves() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 4usize;
let k = 72usize;
let sys = dense_arrow_system(n, d, k);
let backend = CpuBatchedBlockSolver;
let ridge_t = 1e-4;
let ridge_beta = 1e-5;
let precond = ArrowBlockDiagInverse::build(&sys, ridge_t, ridge_beta, false, &backend)
.expect("block-diagonal inverse must build");
let total_dt = sys.row_offsets[n];
let r_t = Array1::from_iter((0..total_dt).map(|i| 0.15 * (i as f64).sin() + 0.02));
let r_beta = Array1::from_iter((0..k).map(|a| 0.25 * (a as f64).cos() - 0.05));
let (xt_a, xb_a) = precond.apply(r_t.view(), r_beta.view());
let (xt_b, xb_b) = precond.apply(r_t.view(), r_beta.view());
for i in 0..total_dt {
assert_eq!(
xt_a[i].to_bits(),
xt_b[i].to_bits(),
"preconditioner x_t must be deterministic at {i}"
);
}
for a in 0..k {
assert_eq!(
xb_a[a].to_bits(),
xb_b[a].to_bits(),
"preconditioner x_beta must be deterministic at {a}"
);
}
let (yt, yb) = arrow_cross_row_matvec(&sys, ridge_t, ridge_beta, xt_a.view(), xb_a.view());
let scale_t = r_t.iter().fold(0.0_f64, |m, &v| m.max(v.abs())).max(1.0);
for i in 0..total_dt {
let rel = (yt[i] - r_t[i]).abs() / scale_t;
assert!(
rel < 1e-9,
"preconditioner round-trip y_t at {i}: rel {rel:e}"
);
}
let scale_b = r_beta.iter().fold(0.0_f64, |m, &v| m.max(v.abs())).max(1.0);
for a in 0..k {
let rel = (yb[a] - r_beta[a]).abs() / scale_b;
assert!(
rel < 1e-9,
"preconditioner round-trip y_beta at {a}: rel {rel:e}"
);
}
}
#[test]
pub(crate) fn parallel_arrow_operator_apply_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 48; let d = 6usize;
let k = 64usize;
let sys = dense_arrow_system(n, d, k);
let total_dt = sys.row_offsets[n];
let ridge_t = 2e-5;
let ridge_beta = 3e-6;
let x_t = Array1::from_iter((0..total_dt).map(|i| 0.17 * (i as f64).sin() - 0.03));
let x_beta = Array1::from_iter((0..k).map(|a| 0.21 * (a as f64).cos() + 0.04));
let (yt_a, yb_a) = arrow_operator_apply(&sys, ridge_t, ridge_beta, x_t.view(), x_beta.view());
let (yt_b, yb_b) = arrow_operator_apply(&sys, ridge_t, ridge_beta, x_t.view(), x_beta.view());
for i in 0..total_dt {
assert_eq!(
yt_a[i].to_bits(),
yt_b[i].to_bits(),
"arrow_operator_apply y_t must be deterministic at {i}"
);
}
for a in 0..k {
assert_eq!(
yb_a[a].to_bits(),
yb_b[a].to_bits(),
"arrow_operator_apply y_beta must be deterministic at {a}"
);
}
let (yt_seq, yb_seq) =
cross_row_matvec_sequential_ref(&sys, ridge_t, ridge_beta, x_t.view(), x_beta.view());
for i in 0..total_dt {
assert_eq!(
yt_a[i].to_bits(),
yt_seq[i].to_bits(),
"arrow_operator_apply y_t must match the sequential fold bit-for-bit at {i}"
);
}
let scale = yb_seq.iter().fold(0.0_f64, |m, &v| m.max(v.abs())).max(1.0);
for a in 0..k {
let rel = (yb_a[a] - yb_seq[a]).abs() / scale;
assert!(
rel < 1e-12,
"arrow_operator_apply y_beta vs sequential at {a}: rel {rel:e}"
);
}
}
#[test]
pub(crate) fn parallel_penalty_prologue_bit_identical_to_serial() {
let k = 576usize; assert!(
k >= SCHUR_PROLOGUE_PARALLEL_K_MIN,
"test border must exceed the prologue parallel threshold"
);
let d = 4usize;
let n = 8usize;
assert!(n < SCHUR_MATVEC_PARALLEL_ROW_MIN);
let sys = dense_arrow_system(n, d, k);
let ridge = 7.5e-3;
let x = Array1::from_iter((0..k).map(|a| 0.4 * (a as f64 * 0.31).cos() - 0.17));
let xs = x.as_slice().unwrap();
let mut serial = vec![0.0_f64; k];
sys.penalty_matvec_add(xs, &mut serial);
for a in 0..k {
serial[a] += ridge * xs[a];
}
let mut par = vec![0.0_f64; k];
sys.penalty_ridge_prologue_into(xs, ridge, &mut par, true);
let mut ser_branch = vec![0.0_f64; k];
sys.penalty_ridge_prologue_into(xs, ridge, &mut ser_branch, false);
for a in 0..k {
assert_eq!(
par[a].to_bits(),
serial[a].to_bits(),
"parallel penalty prologue must be bit-identical to serial at index {a}"
);
assert_eq!(
ser_branch[a].to_bits(),
serial[a].to_bits(),
"serial prologue branch must match the reference at index {a}"
);
}
}
pub(crate) fn sae_structured_system(
n: usize,
q: usize,
p: usize,
n_atoms: usize,
m_active: usize,
) -> (ArrowSchurSystem, Vec<Vec<(usize, f64)>>, Vec<Vec<f64>>) {
let k = n_atoms * p;
let mut sys = ArrowSchurSystem::new(n, q, k);
let mut a_phi: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
let mut local_jac: Vec<Vec<f64>> = Vec::with_capacity(n);
for i in 0..n {
let mut htt = Array2::<f64>::zeros((q, q));
for r in 0..q {
for c in 0..q {
let s = ((i + 1) * (r + 2) * (c + 3)) as f64;
htt[[r, c]] = 0.1 * ((s % 5.0) - 2.0);
}
}
let mut sym = &htt + &htt.t();
for r in 0..q {
sym[[r, r]] = sym[[r, r]].abs() + (q as f64) + 3.0;
}
sys.rows[i].htt = sym;
sys.rows[i].gt = Array1::<f64>::zeros(q);
let mut jac = vec![0.0_f64; q * p];
for c in 0..q {
for j in 0..p {
let s = ((i + 1) + 2 * (c + 1) + 3 * (j + 1)) as f64;
jac[c * p + j] = 0.1 * ((s % 7.0) - 3.0);
}
}
local_jac.push(jac);
let mut support = Vec::with_capacity(m_active);
for s in 0..m_active {
let atom = ((i * 3 + s * 5) % n_atoms).min(n_atoms - 1);
let phi = 0.5 + 0.25 * (((i + s) % 4) as f64);
support.push((atom * p, phi));
}
a_phi.push(support);
}
let mut hbb = Array2::<f64>::zeros((k, k));
for r in 0..k {
hbb[[r, r]] = (k as f64) + 4.0;
}
sys.hbb = hbb;
sys.gb = Array1::<f64>::zeros(k);
let a_phi_f = a_phi.clone();
let jac_f = local_jac.clone();
let a_phi_t = a_phi.clone();
let jac_t = local_jac.clone();
let p_f = p;
sys.set_row_htbeta_operator(
move |row, x, out| {
let mut u_p = vec![0.0_f64; p_f];
for &(base, phi) in &a_phi_f[row] {
for j in 0..p_f {
u_p[j] += phi * x[base + j];
}
}
let jac = &jac_f[row];
let qi = jac.len() / p_f;
for c in 0..qi {
let mut acc = 0.0;
for j in 0..p_f {
acc += jac[c * p_f + j] * u_p[j];
}
out[c] = acc;
}
},
move |row, v, out| {
let jac = &jac_t[row];
let qi = jac.len() / p_f;
let mut u_p = vec![0.0_f64; p_f];
for c in 0..qi {
let vc = v[c];
for j in 0..p_f {
u_p[j] += jac[c * p_f + j] * vc;
}
}
for &(base, phi) in &a_phi_t[row] {
for j in 0..p_f {
out[base + j] += phi * u_p[j];
}
}
},
);
sys.set_device_sae_pcg_data(DeviceSaePcgData {
p,
beta_dim: k,
a_phi: std::sync::Arc::from(a_phi.clone().into_boxed_slice()),
local_jac: std::sync::Arc::from(local_jac.clone().into_boxed_slice()),
smooth_blocks: Vec::new(),
sparse_g_blocks: Vec::new(),
frame: None,
});
(sys, a_phi, local_jac)
}
#[test]
pub(crate) fn resident_sae_matvec_matches_generic() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 96; let q = 4usize;
let p = 6usize;
let n_atoms = 32usize;
let m_active = 5usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let k = sys.k;
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let x = Array1::from_iter((0..k).map(|a| 0.2 * ((a as f64) * 0.013).cos() - 0.05));
let mut out_generic = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_generic,
&backend,
None,
);
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let mut out_resident = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_resident,
&backend,
Some(&resident),
);
let scale = out_generic
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_resident[a] - out_generic[a]).abs() / scale;
assert!(
rel < 1e-10,
"resident vs generic SAE Schur matvec must agree at index {a}: \
{} vs {} (rel {rel:e})",
out_resident[a],
out_generic[a]
);
}
let resident2 = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend).unwrap();
let mut out_resident2 = Array1::<f64>::zeros(k);
schur_matvec(
&sys,
&htt_factors,
ridge_beta,
&x,
&mut out_resident2,
&backend,
Some(&resident2),
);
for a in 0..k {
assert_eq!(
out_resident[a].to_bits(),
out_resident2[a].to_bits(),
"resident SAE matvec must be deterministic run-to-run at index {a}"
);
}
}
#[test]
pub(crate) fn sae_kron_and_device_share_backing_alloc_1033() {
use crate::terms::sae::manifold::SaeKroneckerRows;
let p = 6usize;
let a_phi: std::sync::Arc<[Vec<(usize, f64)>]> = std::sync::Arc::from(
vec![vec![(0usize, 2.0f64), (12, 1.0)], vec![(0, 0.5)]].into_boxed_slice(),
);
let jac: std::sync::Arc<[Vec<f64>]> =
std::sync::Arc::from(vec![vec![1.0; 4 * p], vec![2.0; 4 * p]].into_boxed_slice());
let host = SaeKroneckerRows::new(
p,
std::sync::Arc::clone(&a_phi),
std::sync::Arc::clone(&jac),
);
let device = DeviceSaePcgData {
p,
beta_dim: 6,
a_phi: std::sync::Arc::clone(&a_phi),
local_jac: std::sync::Arc::clone(&jac),
smooth_blocks: Vec::new(),
sparse_g_blocks: Vec::new(),
frame: None,
};
let reshare = device.a_phi_shared();
assert!(
std::sync::Arc::ptr_eq(&reshare, &device.a_phi),
"a_phi_shared must hand back the SAME allocation, not a re-clone"
);
assert!(
std::sync::Arc::ptr_eq(&host.local_jac, &device.local_jac),
"host SaeKroneckerRows and DeviceSaePcgData must share one local_jac alloc"
);
assert!(
std::sync::Arc::ptr_eq(&host.a_phi, &device.a_phi),
"host SaeKroneckerRows and DeviceSaePcgData must share one a_phi alloc"
);
assert_eq!(
std::sync::Arc::strong_count(&jac),
3,
"exactly three references (original, host, device) share the Jacobian"
);
}
#[test]
pub(crate) fn resident_scalar_jacobi_matches_generic() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64;
let q = 4usize;
let p = 5usize;
let n_atoms = 20usize;
let m_active = 4usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let generic =
JacobiPreconditioner::build_scalar_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("generic scalar Jacobi must build");
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let resident_jac =
JacobiPreconditioner::build_scalar_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident scalar Jacobi must build");
let k = sys.k;
let r = Array1::from_iter((0..k).map(|a| 0.3 * ((a as f64) * 0.021).sin() + 0.07));
let out_generic = generic.apply(&r);
let out_resident = resident_jac.apply(&r);
let scale = out_generic
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_resident[a] - out_generic[a]).abs() / scale;
assert!(
rel < 1e-9,
"resident vs generic SAE scalar Jacobi must agree at index {a}: \
{} vs {} (rel {rel:e})",
out_resident[a],
out_generic[a]
);
}
}
#[test]
pub(crate) fn resident_scalar_jacobi_col_dot_hoist_bit_identical() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64;
let q = 4usize;
let p = 5usize;
let n_atoms = 20usize;
let m_active = 4usize; let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let k = sys.k;
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let mut diag_ref = Array1::<f64>::zeros(k);
{
let slice = diag_ref.as_slice_mut().unwrap();
sys.penalty_diagonal_add(slice);
}
for a in 0..k {
diag_ref[a] += ridge_beta;
}
for row in 0..resident.rows.len() {
let rf = &resident.rows[row];
let di = rf.di;
if di == 0 {
continue;
}
let support = &resident.a_phi[row];
let l_i = &resident.local_jac[row];
for &(beta_base, phi) in support {
if phi == 0.0 {
continue;
}
let phi2 = phi * phi;
for j in 0..p {
let mut col_dot = 0.0_f64;
for r in 0..di {
let idx = r * p + j;
col_dot += l_i[idx] * rf.y[idx];
}
diag_ref[beta_base + j] -= phi2 * col_dot;
}
}
}
let r = Array1::from_iter((0..k).map(|a| 0.4 * ((a as f64) * 0.013).cos() + 0.06));
let one_thread = rayon::ThreadPoolBuilder::new()
.num_threads(1)
.build()
.expect("one-thread pool");
let out_resident = one_thread.install(|| {
JacobiPreconditioner::build_scalar_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident scalar Jacobi")
.apply(&r)
});
let scale = (0..k).fold(1.0_f64, |m, a| m.max((r[a] / diag_ref[a]).abs()));
let mut max_rel = 0.0_f64;
for a in 0..k {
let want = r[a] / diag_ref[a];
max_rel = max_rel.max((out_resident[a] - want).abs() / scale);
}
assert!(
max_rel < 1e-12,
"col-dot hoist must match inner-recompute to reassociation error \
(rel {max_rel:e})"
);
}
#[test]
pub(crate) fn parallel_resident_scalar_jacobi_deterministic_and_matches_serial() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64;
let q = 4usize;
let p = 5usize;
let n_atoms = 20usize;
let m_active = 4usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let ridge_beta = 1e-6;
let k = sys.k;
let r = Array1::from_iter((0..k).map(|a| 0.3 * ((a as f64) * 0.019).sin() + 0.05));
let par_a = JacobiPreconditioner::build_scalar_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident scalar Jacobi a");
let par_b = JacobiPreconditioner::build_scalar_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident scalar Jacobi b");
let out_a = par_a.apply(&r);
let out_b = par_b.apply(&r);
for a in 0..k {
assert_eq!(
out_a[a].to_bits(),
out_b[a].to_bits(),
"parallel resident scalar Jacobi must apply deterministically at {a}"
);
}
let one_thread = rayon::ThreadPoolBuilder::new()
.num_threads(1)
.build()
.expect("one-thread pool");
let out_serial = one_thread.install(|| {
JacobiPreconditioner::build_scalar_jacobi_resident(&sys, ridge_beta, &resident)
.expect("serial resident scalar Jacobi")
.apply(&r)
});
let scale = out_serial
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..k {
let rel = (out_a[a] - out_serial[a]).abs() / scale;
assert!(
rel < 1e-12,
"parallel chunk-ordered fold must match the serial subtraction to \
reassociation at {a}: {} vs {} (rel {rel:e})",
out_a[a],
out_serial[a]
);
}
}
#[test]
pub(crate) fn resident_block_jacobi_deterministic_and_matches_generic() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64;
let q = 3usize;
let p = 6usize;
let n_atoms = 18usize;
let m_active = 4usize;
let (mut sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let offsets: Vec<std::ops::Range<usize>> =
(0..n_atoms).map(|atom| atom * p..(atom + 1) * p).collect();
sys.set_block_offsets(offsets.into());
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let r = Array1::from_iter((0..sys.k).map(|a| 0.3 * ((a as f64) * 0.017).sin() + 0.08));
let generic =
JacobiPreconditioner::build_block_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("generic block Jacobi must build");
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
let resident_a = JacobiPreconditioner::build_block_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident block Jacobi a");
let resident_b = JacobiPreconditioner::build_block_jacobi_resident(&sys, ridge_beta, &resident)
.expect("resident block Jacobi b");
let out_generic = generic.apply(&r);
let out_a = resident_a.apply(&r);
let out_b = resident_b.apply(&r);
for a in 0..sys.k {
assert_eq!(
out_a[a].to_bits(),
out_b[a].to_bits(),
"resident block Jacobi must apply deterministically at {a}"
);
}
let scale = out_generic
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for a in 0..sys.k {
let rel = (out_a[a] - out_generic[a]).abs() / scale;
assert!(
rel < 1e-10,
"resident vs generic block Jacobi must agree at index {a}: \
{} vs {} (rel {rel:e})",
out_a[a],
out_generic[a]
);
}
}
#[test]
pub(crate) fn factored_residency_matches_dense_g_block() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 40;
let q = 3usize;
let p = 7usize;
let n_atoms = 24usize;
let m_active = 4usize;
let (sys, _a_phi, _jac) = sae_structured_system(n, q, p, n_atoms, m_active);
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, q, false)
.expect("SPD per-row blocks must factor");
let resident = SaeResidentReducedSchur::build(&sys, &htt_factors, &backend)
.expect("SAE structure must yield a resident operator");
for row in 0..n {
let rf = &resident.rows[row];
if rf.di == 0 {
continue;
}
let di = rf.di;
let l_i = &resident.local_jac[row];
let l = ArrayView2::from_shape((di, p), l_i.as_slice()).unwrap();
let y = ArrayView2::from_shape((di, p), &rf.y).unwrap();
let g_dense = l.t().dot(&y);
let g_vec: Vec<f64> = (0..p)
.map(|j| 0.4 * ((row + j) as f64 * 0.11).sin() - 0.07)
.collect();
let mut prod_ref = vec![0.0_f64; p];
for r in 0..p {
let mut s = 0.0;
for c in 0..p {
s += g_dense[(r, c)] * g_vec[c];
}
prod_ref[r] = s;
}
let mut w = vec![0.0_f64; di];
for r in 0..di {
let yrow = &rf.y[r * p..r * p + p];
w[r] = (0..p).map(|c| yrow[c] * g_vec[c]).sum();
}
let mut prod = vec![0.0_f64; p];
for r in 0..di {
let lrow = &l_i[r * p..r * p + p];
for j in 0..p {
prod[j] += lrow[j] * w[r];
}
}
let scale = prod_ref
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
for j in 0..p {
let rel = (prod[j] - prod_ref[j]).abs() / scale;
assert!(
rel < 1e-10,
"factored G_i apply must match dense G_i at row {row} idx {j}: \
{} vs {} (rel {rel:e})",
prod[j],
prod_ref[j]
);
}
}
let factored_entries: usize = resident
.rows
.iter()
.enumerate()
.map(|(row, r)| resident.local_jac[row].len() + r.y.len())
.sum();
let dense_entries: usize = resident.rows.iter().filter(|r| r.di > 0).count() * p * p;
assert!(
factored_entries < dense_entries,
"factored residency must store fewer entries than the dense p×p form \
({factored_entries} vs {dense_entries})"
);
let data = sys
.device_sae_pcg
.as_ref()
.expect("structured SAE system must carry device_sae_pcg");
assert!(
std::sync::Arc::ptr_eq(&resident.local_jac, &data.local_jac),
"resident operator must SHARE the assembler's local_jac slab (#1033), not copy it"
);
}
#[test]
pub(crate) fn parallel_streaming_assembly_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 4usize;
let k = 24usize;
let sys = dense_direct_system(n, d, k);
let options = ArrowSolveOptions::direct();
let mut s_a = StreamingArrowSchur::from_system(&sys, n); let (dt_a, db_a, _) = s_a.solve(0.0, 0.0, &options).expect("parallel solve a");
let mut s_b = StreamingArrowSchur::from_system(&sys, n);
let (dt_b, db_b, _) = s_b.solve(0.0, 0.0, &options).expect("parallel solve b");
for j in 0..k {
assert_eq!(
db_a[j].to_bits(),
db_b[j].to_bits(),
"streaming Δβ must be deterministic run-to-run at {j}"
);
}
for i in 0..dt_a.len() {
assert_eq!(
dt_a[i].to_bits(),
dt_b[i].to_bits(),
"streaming Δt must be deterministic run-to-run at {i}"
);
}
let mut par = StreamingArrowSchur::from_system(&sys, n);
par.reset_accumulator(0.0).expect("reset par");
par.accumulate_chunk(0, n, 0.0, ArrowSolverMode::Direct)
.expect("parallel accumulate");
let (s_par, rhs_par) = par.take_accumulators();
let mut ser = StreamingArrowSchur::from_system(&sys, 8);
ser.reset_accumulator(0.0).expect("reset ser");
for start in (0..n).step_by(8) {
let end = (start + 8).min(n);
assert!(end - start < SCHUR_MATVEC_PARALLEL_ROW_MIN); ser.accumulate_chunk(start, end, 0.0, ArrowSolverMode::Direct)
.expect("serial accumulate");
}
let (s_ser, rhs_ser) = ser.take_accumulators();
let s_scale = s_ser.iter().fold(0.0_f64, |m, &v| m.max(v.abs())).max(1.0);
let mut s_max = 0.0_f64;
for (a, b) in s_par.iter().zip(s_ser.iter()) {
s_max = s_max.max((a - b).abs());
}
assert!(
s_max / s_scale < 1e-12,
"parallel vs serial reduced-Schur block diverges by rel {:e}",
s_max / s_scale
);
let r_scale = rhs_ser
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
let mut r_max = 0.0_f64;
for (a, b) in rhs_par.iter().zip(rhs_ser.iter()) {
r_max = r_max.max((a - b).abs());
}
assert!(
r_max / r_scale < 1e-12,
"parallel vs serial reduced-RHS diverges by rel {:e}",
r_max / r_scale
);
let s_bs = StreamingArrowSchur::from_system(&sys, n);
let delta_t = s_bs
.back_substitute(0.0, db_a.view())
.expect("parallel back_substitute");
let backend = CpuBatchedBlockSolver;
let total_len: usize = (0..n).map(|i| sys.rows[i].htt.nrows()).sum();
let mut ref_dt = Array1::<f64>::zeros(total_len);
let mut base = 0usize;
for i in 0..n {
let row = &sys.rows[i];
let di = row.htt.nrows();
let factor = factor_one_row(row, 0.0, di, i, false).expect("factor row");
let mut rhs = Array1::<f64>::zeros(di);
for c in 0..di {
let mut acc = 0.0_f64;
for a in 0..k {
acc += row.htbeta[[c, a]] * db_a[a];
}
rhs[c] = row.gt[c] + acc;
}
let dt_i = backend.solve_block_vector(factor.view(), rhs.view());
for c in 0..di {
ref_dt[base + c] = -dt_i[c];
}
base += di;
}
for i in 0..total_len {
assert_eq!(
delta_t[i].to_bits(),
ref_dt[i].to_bits(),
"parallel back_substitute must match sequential bit-for-bit at {i}"
);
}
}
#[test]
pub(crate) fn parallel_block_jacobi_deterministic_and_matches_sequential() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64; let d = 4usize;
let k = 24usize;
let mut sys = dense_direct_system(n, d, k);
let offsets: Vec<std::ops::Range<usize>> = (0..k).step_by(6).map(|s| s..(s + 6)).collect();
sys.set_block_offsets(offsets.into());
let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let r = Array1::from_iter((0..k).map(|a| 0.4 * ((a as f64) * 0.019).cos() - 0.05));
let p_a = JacobiPreconditioner::build_block_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("block Jacobi build a");
let p_b = JacobiPreconditioner::build_block_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("block Jacobi build b");
let out_a = p_a.apply(&r);
let out_b = p_b.apply(&r);
for a in 0..k {
assert_eq!(
out_a[a].to_bits(),
out_b[a].to_bits(),
"parallel block Jacobi must apply deterministically at {a}"
);
}
let mut ref_blocks: Vec<Array2<f64>> = Vec::new();
for range in sys.block_offsets.iter() {
let b = range.end - range.start;
let mut blk = Array2::<f64>::zeros((b, b));
for bi in 0..b {
blk[[bi, bi]] = sys.hbb[[range.start + bi, range.start + bi]] + ridge_beta;
}
ref_blocks.push(blk);
}
for i in 0..n {
let row = &sys.rows[i];
let di = row.htt.nrows();
let factor = factor_one_row(row, 0.0, di, i, false).expect("factor row");
for (bidx, range) in sys.block_offsets.iter().enumerate() {
let b = range.end - range.start;
let mut solved_cols = Array2::<f64>::zeros((di, b));
for bj in 0..b {
let gj = range.start + bj;
let rhs = row.htbeta.column(gj).to_owned();
let solved = backend.solve_block_vector(factor.view(), rhs.view());
for c in 0..di {
solved_cols[[c, bj]] = solved[c];
}
}
for bi in 0..b {
let gi = range.start + bi;
for bj in 0..b {
let mut acc = 0.0;
for c in 0..di {
acc += row.htbeta[[c, gi]] * solved_cols[[c, bj]];
}
ref_blocks[bidx][[bi, bj]] -= acc;
}
}
}
}
let mut ref_out = Array1::<f64>::zeros(k);
for (bidx, range) in sys.block_offsets.iter().enumerate() {
let b = range.end - range.start;
let llt = {
use faer::Side;
let view = crate::linalg::faer_ndarray::FaerArrayView::new(&ref_blocks[bidx]);
crate::linalg::faer_ndarray::FaerLlt::new(view.as_ref(), Side::Lower)
.expect("ref block must be PD")
};
let rhs = Array1::from_iter((0..b).map(|bi| r[range.start + bi]));
use faer::linalg::solvers::Solve;
let stride = rhs.strides()[0];
let len = rhs.len();
let rhs_mat = unsafe { faer::MatRef::from_raw_parts(rhs.as_ptr(), len, 1, stride, 0) };
let solved = llt.solve(rhs_mat);
for bi in 0..b {
ref_out[range.start + bi] = solved[(bi, 0)];
}
}
let scale = ref_out
.iter()
.fold(0.0_f64, |m, &v| m.max(v.abs()))
.max(1.0);
let mut max_abs = 0.0_f64;
for a in 0..k {
max_abs = max_abs.max((out_a[a] - ref_out[a]).abs());
}
assert!(
max_abs / scale < 1e-10,
"parallel block Jacobi apply diverges from sequential by rel {:e}",
max_abs / scale
);
}
#[test]
pub(crate) fn parallel_scalar_jacobi_deterministic() {
let n = SCHUR_MATVEC_PARALLEL_ROW_MIN + 64;
let d = 4usize;
let k = 24usize;
let sys = dense_direct_system(n, d, k); let backend = CpuBatchedBlockSolver;
let htt_factors = backend
.factor_blocks(&sys.rows, 0.0, d, false)
.expect("SPD per-row blocks must factor");
let ridge_beta = 1e-6;
let r = Array1::from_iter((0..k).map(|a| 0.3 * ((a as f64) * 0.023).sin() + 0.11));
let p_a = JacobiPreconditioner::build_scalar_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("scalar Jacobi a");
let p_b = JacobiPreconditioner::build_scalar_jacobi(&sys, &htt_factors, ridge_beta, &backend)
.expect("scalar Jacobi b");
let out_a = p_a.apply(&r);
let out_b = p_b.apply(&r);
for a in 0..k {
assert_eq!(
out_a[a].to_bits(),
out_b[a].to_bits(),
"parallel scalar Jacobi must apply deterministically at {a}"
);
}
}
#[test]
pub(crate) fn arrow_operator_infinity_norm_matches_dense_assembly() {
let n = 12usize;
let d = 3usize;
let k = 7usize;
let sys = dense_direct_system(n, d, k);
let ridge_t = 0.3_f64;
let ridge_beta = 0.2_f64;
let got = arrow_operator_infinity_norm(&sys, ridge_t, ridge_beta).expect("inf-norm");
let total = n * d + k;
let mut full = Array2::<f64>::zeros((total, total));
let hbb = sys.effective_penalty_op().to_dense();
for i in 0..n {
let base = i * d;
let row = &sys.rows[i];
let htbeta = sys_htbeta_materialize_row(&sys, i, row).expect("materialize");
for a in 0..d {
for b in 0..d {
full[[base + a, base + b]] = row.htt[[a, b]];
}
full[[base + a, base + a]] += ridge_t;
for bc in 0..k {
let v = htbeta[[a, bc]];
full[[base + a, n * d + bc]] = v; full[[n * d + bc, base + a]] = v; }
}
}
for br in 0..k {
for bc in 0..k {
full[[n * d + br, n * d + bc]] += hbb[[br, bc]];
}
full[[n * d + br, n * d + br]] += ridge_beta;
}
let mut want = 0.0_f64;
for r in 0..total {
let mut s = 0.0_f64;
for c in 0..total {
s += full[[r, c]].abs();
}
want = want.max(s);
}
let scale = want.max(1.0);
assert!(
(got - want).abs() / scale < 1e-12,
"arrow inf-norm {got} != dense assembly {want} (rel {:e})",
(got - want).abs() / scale
);
}