use crate::linalg::amd::{inv_permute_vec, permute_sym_upper, permute_vec};
use crate::linalg::ldl::LdlError;
use crate::sparse::CscMatrix;
use std::time::Instant;
use twofloat::TwoFloat;
type LdlDdParts = (Vec<usize>, Vec<TwoFloat>, Vec<TwoFloat>);
const DELTA: f64 = 1e-8;
const EPSILON: f64 = 1e-13;
pub struct LdlFactorizationDdAmd {
n: usize,
perm: Vec<usize>,
l_col_ptr: Vec<usize>,
l_row_ind: Vec<usize>,
l_values: Vec<TwoFloat>,
d: Vec<TwoFloat>,
}
impl LdlFactorizationDdAmd {
pub fn nnz_l(&self) -> usize {
self.l_values.len()
}
pub fn solve(&self, rhs: &[f64], sol: &mut [f64]) {
let n = self.n;
let b_p = permute_vec(rhs, &self.perm);
let mut x: Vec<TwoFloat> = b_p.iter().map(|&v| TwoFloat::from(v)).collect();
for j in 0..n {
let yj = x[j];
for p in self.l_col_ptr[j]..self.l_col_ptr[j + 1] {
let i = self.l_row_ind[p];
x[i] -= self.l_values[p] * yj;
}
}
for j in 0..n {
x[j] /= self.d[j];
}
for j in (0..n).rev() {
let mut sum = TwoFloat::from(0.0);
for p in self.l_col_ptr[j]..self.l_col_ptr[j + 1] {
let i = self.l_row_ind[p];
sum += self.l_values[p] * x[i];
}
x[j] -= sum;
}
let x_f64: Vec<f64> = x.iter().map(|&v| f64::from(v)).collect();
let out = inv_permute_vec(&x_f64, &self.perm);
sol.copy_from_slice(&out);
}
}
fn ldl_symbolic(
n: usize,
a_col_ptr: &[usize],
a_row_ind: &[usize],
) -> (Vec<isize>, Vec<usize>) {
let mut parent = vec![-1isize; n];
let mut flag = vec![usize::MAX; n];
let mut lnz = vec![0usize; n];
for k in 0..n {
flag[k] = k;
for p in a_col_ptr[k]..a_col_ptr[k + 1] {
let i_orig = a_row_ind[p];
if i_orig >= k {
continue;
}
let mut i = i_orig;
loop {
if flag[i] == k {
break;
}
if parent[i] == -1 {
parent[i] = k as isize;
}
lnz[i] += 1;
flag[i] = k;
i = parent[i] as usize;
}
}
}
let mut l_col_ptr = vec![0usize; n + 1];
for k in 0..n {
l_col_ptr[k + 1] = l_col_ptr[k] + lnz[k];
}
(parent, l_col_ptr)
}
fn ldl_numeric_dd(
n: usize,
a_col_ptr: &[usize],
a_row_ind: &[usize],
a_values: &[f64],
parent: &[isize],
l_col_ptr: &[usize],
signs: Option<&[i8]>,
) -> Result<LdlDdParts, LdlError> {
let l_nnz_total = l_col_ptr[n];
let mut l_row_ind = vec![0usize; l_nnz_total];
let mut l_values = vec![TwoFloat::from(0.0); l_nnz_total];
let mut d = vec![TwoFloat::from(0.0); n];
let mut y = vec![TwoFloat::from(0.0); n]; let mut pattern = vec![0usize; n]; let mut flag = vec![usize::MAX; n];
let mut lnz_running = vec![0usize; n];
for k in 0..n {
let mut top = n;
flag[k] = k;
debug_assert_eq!(f64::from(y[k]), 0.0);
for p in a_col_ptr[k]..a_col_ptr[k + 1] {
let i_orig = a_row_ind[p];
if i_orig > k {
continue; }
y[i_orig] += a_values[p];
let mut i = i_orig;
let mut len = 0usize;
loop {
if flag[i] == k {
break;
}
pattern[len] = i;
len += 1;
flag[i] = k;
if parent[i] == -1 {
break; }
i = parent[i] as usize;
}
while len > 0 {
len -= 1;
top -= 1;
pattern[top] = pattern[len];
}
}
let mut dk = y[k];
y[k] = TwoFloat::from(0.0);
let mut t = top;
while t < n {
let i = pattern[t];
let yi = y[i];
y[i] = TwoFloat::from(0.0);
let cs = l_col_ptr[i];
let ce = cs + lnz_running[i];
for p in cs..ce {
let row = l_row_ind[p];
y[row] -= l_values[p] * yi;
}
let l_ki = yi / d[i];
dk -= l_ki * yi;
let pos = l_col_ptr[i] + lnz_running[i];
l_row_ind[pos] = k;
l_values[pos] = l_ki;
lnz_running[i] += 1;
t += 1;
}
let dk_f64 = f64::from(dk);
if let Some(signs) = signs {
let s = signs[k];
if s >= 0 && dk_f64 < EPSILON {
dk = TwoFloat::from(DELTA);
} else if s < 0 && dk_f64 > -EPSILON {
dk = TwoFloat::from(-DELTA);
}
} else if dk_f64.abs() < EPSILON {
return Err(LdlError::SingularOrIndefinite);
}
d[k] = dk;
}
Ok((l_row_ind, l_values, d))
}
pub fn factorize_quasidefinite_with_cached_perm_dd(
mat: &CscMatrix,
perm: &[usize],
deadline: Option<Instant>,
) -> Result<LdlFactorizationDdAmd, LdlError> {
if let Some(d) = deadline {
if Instant::now() >= d {
return Err(LdlError::DeadlineExceeded);
}
}
let n = mat.nrows;
let (col_ptr, row_ind, values) =
permute_sym_upper(n, &mat.col_ptr, &mat.row_ind, &mat.values, perm);
let (parent, l_col_ptr) = ldl_symbolic(n, &col_ptr, &row_ind);
if let Some(d) = deadline {
if Instant::now() >= d {
return Err(LdlError::DeadlineExceeded);
}
}
let signs = crate::linalg::ldl::extract_diagonal_signs(n, &col_ptr, &row_ind, &values);
let (l_row_ind, l_values, d_vec) =
ldl_numeric_dd(n, &col_ptr, &row_ind, &values, &parent, &l_col_ptr, Some(&signs))?;
Ok(LdlFactorizationDdAmd {
n,
perm: perm.to_vec(),
l_col_ptr,
l_row_ind,
l_values,
d: d_vec,
})
}
#[cfg(test)]
#[allow(clippy::print_stdout, clippy::print_stderr)]
mod tests {
use super::*;
use crate::sparse::CscMatrix;
fn upper_tri_csc(n: usize, entries: &[(usize, usize, f64)]) -> CscMatrix {
let mut cols: Vec<Vec<(usize, f64)>> = vec![vec![]; n];
for &(row, col, val) in entries {
assert!(row <= col, "upper triangle only: row={row} col={col}");
cols[col].push((row, val));
}
for c in cols.iter_mut() {
c.sort_by_key(|&(r, _)| r);
}
let nnz: usize = cols.iter().map(|c| c.len()).sum();
let mut col_ptr = vec![0usize; n + 1];
for j in 0..n {
col_ptr[j + 1] = col_ptr[j] + cols[j].len();
}
let mut row_ind = vec![0usize; nnz];
let mut values = vec![0.0f64; nnz];
for j in 0..n {
let s = col_ptr[j];
for (k, &(r, v)) in cols[j].iter().enumerate() {
row_ind[s + k] = r;
values[s + k] = v;
}
}
CscMatrix {
col_ptr,
row_ind,
values,
nrows: n,
ncols: n,
}
}
fn residual_inf(full: &[(usize, usize, f64)], x: &[f64], b: &[f64]) -> f64 {
let n = b.len();
let mut r = vec![0.0f64; n];
for &(row, col, val) in full {
r[row] += val * x[col];
}
r.iter()
.zip(b.iter())
.fold(0.0f64, |a, (&ri, &bi)| a.max((ri - bi).abs()))
}
#[test]
fn dd_ldl_pd_3x3() {
let mat = upper_tri_csc(
3,
&[
(0, 0, 4.0),
(0, 1, 1.0),
(1, 1, 3.0),
(1, 2, 2.0),
(2, 2, 5.0),
],
);
let fac = factorize_quasidefinite_with_cached_perm_dd(&mat, &[0, 1, 2], None).unwrap();
let b = [1.0f64, 2.0, 3.0];
let mut x = [0.0f64; 3];
fac.solve(&b, &mut x);
let full: &[(usize, usize, f64)] = &[
(0, 0, 4.0),
(0, 1, 1.0),
(1, 0, 1.0),
(1, 1, 3.0),
(1, 2, 2.0),
(2, 1, 2.0),
(2, 2, 5.0),
];
assert!(residual_inf(full, &x, &b) < 1e-14);
}
#[test]
fn dd_ldl_quasidefinite_2x2() {
let mat = upper_tri_csc(2, &[(0, 0, 3.0), (0, 1, 1.0), (1, 1, -2.0)]);
let fac = factorize_quasidefinite_with_cached_perm_dd(&mat, &[0, 1], None).unwrap();
let b = [1.0f64, 2.0];
let mut x = [0.0f64; 2];
fac.solve(&b, &mut x);
let full: &[(usize, usize, f64)] =
&[(0, 0, 3.0), (0, 1, 1.0), (1, 0, 1.0), (1, 1, -2.0)];
assert!(residual_inf(full, &x, &b) < 1e-14);
}
#[test]
fn dd_ldl_quasidefinite_5x5_with_amd() {
let delta = 1e-4f64;
let mat = upper_tri_csc(
5,
&[
(0, 0, 1.0 + delta),
(1, 1, 2.0 + delta),
(2, 2, -delta),
(3, 3, -delta),
(4, 4, -delta),
(0, 2, 1.0),
(1, 3, 1.0),
(0, 4, 1.0),
(1, 4, 1.0),
],
);
let fac = factorize_quasidefinite_with_cached_perm_dd(&mat, &[0, 1, 2, 3, 4], None).unwrap();
let b = [1.0f64, 2.0, 0.5, -0.5, 1.0];
let mut x = [0.0f64; 5];
fac.solve(&b, &mut x);
let full: &[(usize, usize, f64)] = &[
(0, 0, 1.0 + delta),
(1, 1, 2.0 + delta),
(2, 2, -delta),
(3, 3, -delta),
(4, 4, -delta),
(0, 2, 1.0),
(2, 0, 1.0),
(1, 3, 1.0),
(3, 1, 1.0),
(0, 4, 1.0),
(4, 0, 1.0),
(1, 4, 1.0),
(4, 1, 1.0),
];
assert!(residual_inf(full, &x, &b) < 1e-12);
}
#[test]
fn dd_ldl_outperforms_f64_for_ill_conditioned() {
use crate::linalg::ldl::factorize_quasidefinite_with_cached_perm_budget_par;
let n = 4;
let mut entries = Vec::new();
for i in 0..n {
for j in i..n {
entries.push((i, j, 1.0 / (i + j + 1) as f64));
}
}
let mat = upper_tri_csc(n, &entries);
let perm: Vec<usize> = (0..n).collect();
let fac_dd =
factorize_quasidefinite_with_cached_perm_dd(&mat, &perm, None).expect("dd factorize");
let fac_f64 =
factorize_quasidefinite_with_cached_perm_budget_par(&mat, &perm, None, None, faer::Par::Seq).expect("f64 factorize");
let b = vec![1.0f64; n];
let mut x_dd = vec![0.0f64; n];
let mut x_f64 = vec![0.0f64; n];
fac_dd.solve(&b, &mut x_dd);
fac_f64.solve(&b, &mut x_f64);
let full: Vec<(usize, usize, f64)> = {
let mut v = Vec::new();
for i in 0..n {
for j in 0..n {
v.push((i, j, 1.0 / (i + j + 1) as f64));
}
}
v
};
let r_dd = residual_inf(&full, &x_dd, &b);
let r_f64 = residual_inf(&full, &x_f64, &b);
assert!(r_dd <= r_f64 * 1.01 + 1e-15, "DD residual {r_dd:.3e} should not exceed f64 {r_f64:.3e}");
assert!(r_dd < 1e-13, "DD residual {r_dd:.3e}");
eprintln!("Hilbert n=4: f64 residual={r_f64:.3e}, DD residual={r_dd:.3e}");
}
#[test]
fn dd_ldl_diagonal() {
let n = 5;
let entries: Vec<(usize, usize, f64)> = (0..n).map(|i| (i, i, (i + 1) as f64)).collect();
let mat = upper_tri_csc(n, &entries);
let fac =
factorize_quasidefinite_with_cached_perm_dd(&mat, &(0..n).collect::<Vec<_>>(), None)
.unwrap();
let b: Vec<f64> = (0..n).map(|i| (i + 1) as f64).collect();
let mut x = vec![0.0f64; n];
fac.solve(&b, &mut x);
for i in 0..n {
assert!((x[i] - 1.0).abs() < 1e-15, "x[{i}]={}", x[i]);
}
}
#[test]
fn dd_ldl_etree_simple_chain() {
let n = 4;
let mut entries = Vec::new();
for i in 0..n {
entries.push((i, i, 4.0));
}
for i in 0..n - 1 {
entries.push((i, i + 1, 1.0));
}
let mat = upper_tri_csc(n, &entries);
let (parent, l_col_ptr) = ldl_symbolic(n, &mat.col_ptr, &mat.row_ind);
assert_eq!(parent, vec![1, 2, 3, -1]);
assert_eq!(l_col_ptr, vec![0, 1, 2, 3, 3]);
}
}