use crate::solver::EPS;
use crate::sparse::{Error, Perm, ScatteredVec, SparseMat, TriangleMat};
use std::cmp::Ordering;
#[derive(Clone)]
pub struct LUFactors {
lower: TriangleMat,
upper: TriangleMat,
row_perm: Option<Perm>,
col_perm: Option<Perm>,
}
#[derive(Clone, Debug)]
pub struct ScratchSpace {
rhs: ScatteredVec,
dense_rhs: Vec<f64>,
mark_nonzero: MarkNonzero,
}
impl ScratchSpace {
pub fn with_capacity(n: usize) -> ScratchSpace {
ScratchSpace {
rhs: ScatteredVec::empty(n),
dense_rhs: vec![0.0; n],
mark_nonzero: MarkNonzero::with_capacity(n),
}
}
pub(crate) fn clear_sparse(&mut self, size: usize) {
self.rhs.clear_and_resize(size);
self.mark_nonzero.clear_and_resize(size);
}
}
impl std::fmt::Debug for LUFactors {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "L:\n{:?}", self.lower)?;
writeln!(f, "U:\n{:?}", self.upper)?;
writeln!(
f,
"row_perm.new2orig: {:?}",
self.row_perm.as_ref().map(|p| &p.new2orig)
)?;
writeln!(
f,
"col_perm.new2orig: {:?}",
self.col_perm.as_ref().map(|p| &p.new2orig)
)?;
Ok(())
}
}
impl LUFactors {
pub fn nnz(&self) -> usize {
self.lower.nondiag.nnz() + self.upper.nondiag.nnz() + self.lower.cols()
}
pub fn solve_dense(&self, rhs: &mut [f64], scratch: &mut ScratchSpace) {
scratch.dense_rhs.resize(rhs.len(), 0.0);
if let Some(row_perm) = &self.row_perm {
for (i, rhs_el) in rhs.iter().enumerate() {
scratch.dense_rhs[row_perm.orig2new[i]] = *rhs_el;
}
} else {
scratch.dense_rhs.copy_from_slice(rhs);
}
tri_solve_dense(&self.lower, Triangle::Lower, &mut scratch.dense_rhs);
tri_solve_dense(&self.upper, Triangle::Upper, &mut scratch.dense_rhs);
if let Some(col_perm) = &self.col_perm {
for i in 0..rhs.len() {
rhs[col_perm.new2orig[i]] = scratch.dense_rhs[i];
}
} else {
rhs.copy_from_slice(&scratch.dense_rhs);
}
}
pub fn solve(&self, rhs: &mut ScatteredVec, scratch: &mut ScratchSpace) {
if let Some(row_perm) = &self.row_perm {
scratch.rhs.clear();
for &i in &rhs.nonzero {
let new_i = row_perm.orig2new[i];
scratch.rhs.nonzero.push(new_i);
scratch.rhs.is_nonzero[new_i] = true;
scratch.rhs.values[new_i] = rhs.values[i];
}
} else {
std::mem::swap(&mut scratch.rhs, rhs);
}
tri_solve_sparse(&self.lower, scratch);
tri_solve_sparse(&self.upper, scratch);
if let Some(col_perm) = &self.col_perm {
rhs.clear();
for &i in &scratch.rhs.nonzero {
let new_i = col_perm.new2orig[i];
rhs.nonzero.push(new_i);
rhs.is_nonzero[new_i] = true;
rhs.values[new_i] = scratch.rhs.values[i];
}
} else {
std::mem::swap(rhs, &mut scratch.rhs);
}
}
pub fn transpose(&self) -> LUFactors {
LUFactors {
lower: self.upper.transpose(),
upper: self.lower.transpose(),
row_perm: self.col_perm.clone(),
col_perm: self.row_perm.clone(),
}
}
}
pub fn lu_factorize<'a>(
size: usize,
get_col: impl Fn(usize) -> (&'a [usize], &'a [f64]),
stability_coeff: f64,
scratch: &mut ScratchSpace,
) -> Result<LUFactors, Error> {
let mat_nnz = (0..size).map(|c| get_col(c).0.len()).sum::<usize>();
trace!(
"lu_factorize: starting, matrix size: {}, nnz: {} (excess: {})",
size,
mat_nnz,
mat_nnz - size,
);
let col_perm = super::ordering::order_simple(size, |c| get_col(c).0);
let mut orig_row2elt_count = vec![0; size];
for col_rows in (0..size).map(|c| get_col(c).0) {
for &orig_r in col_rows {
orig_row2elt_count[orig_r] += 1;
}
}
scratch.clear_sparse(size);
let mut lower = SparseMat::new(size);
let mut upper = SparseMat::new(size);
let mut upper_diag = Vec::with_capacity(size);
let mut new2orig_row = (0..size).collect::<Vec<_>>();
let mut orig2new_row = new2orig_row.clone();
for i_col in 0..size {
let mat_col = get_col(col_perm.new2orig[i_col]);
scratch.rhs.set(mat_col.0.iter().copied().zip(mat_col.1));
scratch.mark_nonzero.run(
&mut scratch.rhs,
|new_i| lower.col_rows(new_i),
|new_i| new_i < i_col,
|orig_r| orig2new_row[orig_r],
);
for &orig_i in scratch.mark_nonzero.visited.iter().rev() {
let new_i = orig2new_row[orig_i];
if new_i < i_col {
let x_val = scratch.rhs.values[orig_i];
for (orig_r, coeff) in lower.col_iter(new_i) {
scratch.rhs.values[orig_r] -= x_val * coeff;
}
}
}
let pivot_orig_r = {
let mut max_abs = 0.0;
for &orig_r in &scratch.rhs.nonzero {
if orig2new_row[orig_r] < i_col {
continue;
}
let abs = f64::abs(scratch.rhs.values[orig_r]);
if abs > max_abs {
max_abs = abs;
}
}
if max_abs < EPS {
return Err(Error::SingularMatrix);
}
assert!(max_abs.is_normal());
let mut best_orig_r = None;
let mut best_elt_count = None;
for &orig_r in &scratch.rhs.nonzero {
if orig2new_row[orig_r] < i_col {
continue;
}
if f64::abs(scratch.rhs.values[orig_r]) >= stability_coeff * max_abs {
let elt_count = orig_row2elt_count[orig_r];
if best_elt_count.is_none() || best_elt_count.unwrap() > elt_count {
best_orig_r = Some(orig_r);
best_elt_count = Some(elt_count);
}
}
}
best_orig_r.unwrap()
};
let pivot_val = scratch.rhs.values[pivot_orig_r];
{
let row = i_col;
let orig_row = new2orig_row[row];
let pivot_row = orig2new_row[pivot_orig_r];
new2orig_row.swap(row, pivot_row);
orig2new_row.swap(orig_row, pivot_orig_r);
}
for &orig_r in &scratch.rhs.nonzero {
let val = scratch.rhs.values[orig_r];
if val == 0.0 {
continue;
}
let new_r = orig2new_row[orig_r];
match new_r.cmp(&i_col) {
Ordering::Less => upper.push(new_r, val),
Ordering::Equal => upper_diag.push(pivot_val),
Ordering::Greater => lower.push(orig_r, val / pivot_val),
}
}
upper.seal_column();
lower.seal_column();
}
for i_col in 0..lower.cols() {
for r in lower.col_rows_mut(i_col) {
*r = orig2new_row[*r];
}
}
let lower_nnz = lower.nnz();
let upper_nnz = upper.nnz();
trace!(
"lu_factorize: done, lower nnz: {} (excess: {}), upper nnz: {} (excess: {}), additional fill-in: {}",
lower_nnz + size,
lower_nnz,
upper_nnz + size,
upper_nnz,
(lower_nnz + upper_nnz + size) as isize - mat_nnz as isize,
);
let res = LUFactors {
lower: TriangleMat {
nondiag: lower,
diag: None,
},
upper: TriangleMat {
nondiag: upper,
diag: Some(upper_diag),
},
row_perm: Some(Perm {
orig2new: orig2new_row,
new2orig: new2orig_row,
}),
col_perm: Some(col_perm),
};
Ok(res)
}
#[derive(Clone, Debug)]
struct MarkNonzero {
dfs_stack: Vec<DfsStep>,
is_visited: Vec<bool>,
visited: Vec<usize>, }
#[derive(Clone, Debug)]
struct DfsStep {
orig_i: usize,
cur_child: usize,
}
impl MarkNonzero {
fn with_capacity(n: usize) -> MarkNonzero {
MarkNonzero {
dfs_stack: Vec::with_capacity(n),
is_visited: vec![false; n],
visited: vec![],
}
}
fn clear(&mut self) {
assert!(self.dfs_stack.is_empty());
for &i in &self.visited {
self.is_visited[i] = false;
}
self.visited.clear();
}
fn clear_and_resize(&mut self, n: usize) {
self.clear();
self.dfs_stack.reserve(n);
self.is_visited.resize(n, false);
}
fn run<'a>(
&mut self,
rhs: &mut ScatteredVec,
get_children: impl Fn(usize) -> &'a [usize] + 'a,
filter: impl Fn(usize) -> bool,
orig2new_row: impl Fn(usize) -> usize,
) {
self.clear();
for &orig_r in &rhs.nonzero {
let new_r = orig2new_row(orig_r);
if !filter(new_r) {
continue;
}
if self.is_visited[orig_r] {
continue;
}
self.dfs_stack.push(DfsStep {
orig_i: orig_r,
cur_child: 0,
});
while !self.dfs_stack.is_empty() {
let cur_step = self.dfs_stack.last_mut().unwrap();
let new_i = orig2new_row(cur_step.orig_i);
let children = if filter(new_i) {
get_children(new_i)
} else {
&[]
};
if !self.is_visited[cur_step.orig_i] {
self.is_visited[cur_step.orig_i] = true;
} else {
cur_step.cur_child += 1;
}
while cur_step.cur_child < children.len() {
let child_orig_r = children[cur_step.cur_child];
if !self.is_visited[child_orig_r] {
break;
}
cur_step.cur_child += 1;
}
if cur_step.cur_child < children.len() {
let i_child = cur_step.cur_child;
self.dfs_stack.push(DfsStep {
orig_i: children[i_child],
cur_child: 0,
});
} else {
self.visited.push(cur_step.orig_i);
self.dfs_stack.pop();
}
}
}
for &i in &self.visited {
if !rhs.is_nonzero[i] {
rhs.is_nonzero[i] = true;
rhs.nonzero.push(i)
}
}
}
}
enum Triangle {
Lower,
Upper,
}
fn tri_solve_dense(tri_mat: &TriangleMat, triangle: Triangle, rhs: &mut [f64]) {
assert_eq!(tri_mat.rows(), rhs.len());
match triangle {
Triangle::Lower => {
for col in 0..tri_mat.cols() {
tri_solve_process_col(tri_mat, col, rhs);
}
}
Triangle::Upper => {
for col in (0..tri_mat.cols()).rev() {
tri_solve_process_col(tri_mat, col, rhs);
}
}
};
}
fn tri_solve_sparse(tri_mat: &TriangleMat, scratch: &mut ScratchSpace) {
assert_eq!(tri_mat.rows(), scratch.rhs.len());
scratch.mark_nonzero.run(
&mut scratch.rhs,
|col| tri_mat.nondiag.col_rows(col),
|_| true,
|orig_i| orig_i,
);
for &col in scratch.mark_nonzero.visited.iter().rev() {
tri_solve_process_col(tri_mat, col, &mut scratch.rhs.values);
}
}
fn tri_solve_process_col(tri_mat: &TriangleMat, col: usize, rhs: &mut [f64]) {
let x_val = if let Some(diag) = tri_mat.diag.as_ref() {
rhs[col] / diag[col]
} else {
rhs[col]
};
rhs[col] = x_val;
for (r, &coeff) in tri_mat.nondiag.col_iter(col) {
rhs[r] -= x_val * coeff;
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::helpers::{assert_matrix_eq, to_dense, to_sparse};
use sprs::{CsMat, CsVec, TriMat};
fn init() {
let _ = env_logger::builder().is_test(true).try_init();
}
fn mat_from_triplets(rows: usize, cols: usize, triplets: &[(usize, usize, f64)]) -> CsMat<f64> {
let mut mat = TriMat::with_capacity((rows, cols), triplets.len());
for (r, c, val) in triplets {
mat.add_triplet(*r, *c, *val);
}
mat.to_csc()
}
#[test]
fn lu_simple() {
init();
let mat = mat_from_triplets(
3,
4,
&[
(0, 1, 2.0),
(0, 0, 2.0),
(0, 2, 123.0),
(1, 2, 456.0),
(1, 3, 1.0),
(2, 1, 4.0),
(2, 0, 3.0),
(2, 2, 789.0),
(2, 3, 1.0),
],
);
let mut scratch = ScratchSpace::with_capacity(mat.rows());
let lu = lu_factorize(
mat.rows(),
|c| mat.outer_view([1, 0, 3][c]).unwrap().into_raw_storage(),
0.9,
&mut scratch,
)
.unwrap();
let lu_transp = lu.transpose();
let l_nondiag_ref = [
vec![0.0, 0.0, 0.0],
vec![0.5, 0.0, 0.0],
vec![0.0, 0.0, 0.0],
];
assert_matrix_eq(&lu.lower.nondiag.to_csmat(), &l_nondiag_ref);
assert_eq!(lu.lower.diag, None);
let u_nondiag_ref = [
vec![0.0, 3.0, 1.0],
vec![0.0, 0.0, -0.5],
vec![0.0, 0.0, 0.0],
];
let u_diag_ref = [4.0, 0.5, 1.0];
assert_matrix_eq(&lu.upper.nondiag.to_csmat(), &u_nondiag_ref);
assert_eq!(lu.upper.diag.as_ref().unwrap(), &u_diag_ref);
assert_eq!(lu.row_perm.as_ref().unwrap().new2orig, &[2, 0, 1]);
assert_eq!(lu.col_perm.as_ref().unwrap().new2orig, &[0, 1, 2]);
{
let mut rhs_dense = [6.0, 3.0, 13.0];
lu.solve_dense(&mut rhs_dense, &mut scratch);
assert_eq!(&rhs_dense, &[1.0, 2.0, 3.0]);
}
{
let mut rhs_dense_t = [14.0, 11.0, 5.0];
lu_transp.solve_dense(&mut rhs_dense_t, &mut scratch);
assert_eq!(&rhs_dense_t, &[1.0, 2.0, 3.0]);
}
{
let mut rhs = ScatteredVec::empty(3);
rhs.set(to_sparse(&[0.0, -1.0, 0.0]).iter());
lu.solve(&mut rhs, &mut scratch);
assert_eq!(to_dense(&rhs.to_csvec()), vec![1.0, -1.0, -1.0]);
}
{
let mut rhs = ScatteredVec::empty(3);
rhs.set(to_sparse(&[0.0, -1.0, 1.0]).iter());
lu_transp.solve(&mut rhs, &mut scratch);
assert_eq!(to_dense(&rhs.to_csvec()), vec![-2.0, 0.0, 1.0]);
}
}
#[test]
fn lu_singular() {
init();
let size = 3;
{
let symbolically_singular = mat_from_triplets(
size,
size,
&[(0, 0, 1.0), (1, 0, 1.0), (1, 1, 2.0), (1, 2, 3.0)],
);
let mut scratch = ScratchSpace::with_capacity(size);
let err = lu_factorize(
size,
|c| {
symbolically_singular
.outer_view(c)
.unwrap()
.into_raw_storage()
},
0.9,
&mut scratch,
);
assert_eq!(err.unwrap_err(), Error::SingularMatrix);
}
{
let numerically_singular = mat_from_triplets(
size,
size,
&[
(0, 0, 1.0),
(1, 0, 1.0),
(1, 1, 2.0),
(1, 2, 3.0),
(2, 0, 2.0),
(2, 1, 2.0),
(2, 2, 3.0),
],
);
let mut scratch = ScratchSpace::with_capacity(size);
let err = lu_factorize(
size,
|c| {
numerically_singular
.outer_view(c)
.unwrap()
.into_raw_storage()
},
0.9,
&mut scratch,
);
assert_eq!(err.unwrap_err(), Error::SingularMatrix);
}
}
#[test]
fn lu_rand() {
init();
let size = 10;
let mut rng = rand_pcg::Pcg64::seed_from_u64(12345);
use rand::prelude::*;
let mut mat = TriMat::new((size, size));
for r in 0..size {
for c in 0..size {
if rng.gen_range(0..2) == 0 {
mat.add_triplet(r, c, rng.gen_range(0.0..1.0));
}
}
}
let mat: CsMat<f64> = mat.to_csc();
let mut scratch = ScratchSpace::with_capacity(mat.rows());
let cols: Vec<_> = (0..size).collect();
let lu = lu_factorize(
size,
|c| mat.outer_view(cols[c]).unwrap().into_raw_storage(),
0.1,
&mut scratch,
)
.unwrap();
let lu_transp = lu.transpose();
let multiplied = &lu.lower.to_csmat() * &lu.upper.to_csmat();
assert!(multiplied.is_csc());
for (i, &c) in cols.iter().enumerate() {
let permuted = {
let mut is = vec![];
let mut vs = vec![];
for (i, &val) in mat.outer_view(c).unwrap().iter() {
is.push(lu.row_perm.as_ref().unwrap().orig2new[i]);
vs.push(val);
}
CsVec::new_from_unsorted(size, is, vs).unwrap()
};
let diff = &multiplied
.outer_view(lu.col_perm.as_ref().unwrap().orig2new[i])
.unwrap()
- &permuted;
assert!(diff.norm(1.0) < 1e-5);
}
let sparse_rhs = {
let mut res = CsVec::empty(size);
for i in 0..size {
if rng.gen_range(0..3) == 0 {
res.append(i, rng.gen_range(0.0..1.0));
}
}
res
};
{
let mut rhs = ScatteredVec::empty(size);
rhs.set(sparse_rhs.iter());
lu.solve(&mut rhs, &mut scratch);
let diff = &sparse_rhs - &(&mat * &rhs.to_csvec());
assert!(diff.norm(1.0) < 1e-5);
}
{
let mut rhs_t = ScatteredVec::empty(size);
rhs_t.set(sparse_rhs.iter());
lu_transp.solve(&mut rhs_t, &mut scratch);
let diff = &sparse_rhs - &(&mat.transpose_view() * &rhs_t.to_csvec());
assert!(diff.norm(1.0) < 1e-5);
}
}
}