#![allow(non_snake_case)]
use crate::algebra::*;
use core::cmp::{max, min};
use derive_builder::Builder;
use std::iter::zip;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum QDLDLError {
#[error("Matrix dimension fields are incompatible")]
IncompatibleDimension,
#[error("Matrix has a zero column")]
EmptyColumn,
#[error("Matrix is not upper triangular")]
NotUpperTriangular,
#[error("Matrix factorization produced a zero pivot")]
ZeroPivot,
#[error("Invalid permutation vector")]
InvalidPermutation,
}
#[derive(Builder, Debug, Clone)]
#[allow(missing_docs)]
pub struct QDLDLSettings<T: FloatT> {
#[builder(default = "1.0")]
pub amd_dense_scale: f64,
#[builder(default = "None", setter(strip_option))]
pub perm: Option<Vec<usize>>,
#[builder(default = "false")]
pub logical: bool,
#[builder(default = "None", setter(strip_option))]
pub Dsigns: Option<Vec<i8>>,
#[builder(default = "true")]
pub regularize_enable: bool,
#[builder(default = "(1e-12).as_T()")]
pub regularize_eps: T,
#[builder(default = "(1e-7).as_T()")]
pub regularize_delta: T,
}
impl<T> Default for QDLDLSettings<T>
where
T: FloatT,
{
fn default() -> QDLDLSettings<T> {
QDLDLSettingsBuilder::<T>::default().build().unwrap()
}
}
#[derive(Debug)]
pub struct QDLDLFactorisation<T = f64> {
pub perm: Vec<usize>,
#[allow(dead_code)] iperm: Vec<usize>,
pub L: CscMatrix<T>,
pub D: Vec<T>,
pub Dinv: Vec<T>,
workspace: QDLDLWorkspace<T>,
is_symbolic: bool,
}
impl<T> QDLDLFactorisation<T>
where
T: FloatT,
{
pub fn new(
Ain: &CscMatrix<T>,
opts: Option<QDLDLSettings<T>>,
) -> Result<QDLDLFactorisation<T>, QDLDLError> {
check_structure(Ain)?;
_qdldl_new(Ain, opts)
}
pub fn positive_inertia(&self) -> usize {
self.workspace.positive_inertia
}
pub fn regularize_count(&self) -> usize {
self.workspace.regularize_count
}
pub fn solve(&mut self, b: &mut [T]) {
assert!(!self.is_symbolic);
assert_eq!(b.len(), self.D.len());
let tmp = &mut self.workspace.fwork;
permute(tmp, b, &self.perm);
_solve(
&self.L.colptr,
&self.L.rowval,
&self.L.nzval,
&self.Dinv,
tmp,
);
ipermute(b, tmp, &self.perm);
}
pub fn update_values(&mut self, indices: &[usize], values: &[T]) {
let nzval = &mut self.workspace.triuA.nzval; let AtoPAPt = &self.workspace.AtoPAPt;
for (i, &idx) in indices.iter().enumerate() {
nzval[AtoPAPt[idx]] = values[i];
}
}
pub fn scale_values(&mut self, indices: &[usize], scale: T) {
let nzval = &mut self.workspace.triuA.nzval; let AtoPAPt = &self.workspace.AtoPAPt;
for &idx in indices.iter() {
nzval[AtoPAPt[idx]] *= scale;
}
}
pub fn offset_values(&mut self, indices: &[usize], offset: T, signs: &[i8]) {
assert_eq!(indices.len(), signs.len());
let nzval = &mut self.workspace.triuA.nzval; let AtoPAPt = &self.workspace.AtoPAPt;
for (&idx, &sign) in zip(indices, signs) {
match sign.signum() {
1 => {
nzval[AtoPAPt[idx]] += offset;
}
-1 => {
nzval[AtoPAPt[idx]] -= offset;
}
_ => {}
}
}
}
pub fn refactor(&mut self) -> Result<(), QDLDLError> {
self.is_symbolic = false;
_factor(
&mut self.L,
&mut self.D,
&mut self.Dinv,
&mut self.workspace,
self.is_symbolic,
)
}
pub fn nnzA(&self) -> usize {
self.workspace.triuA.nnz()
}
pub fn nnzL(&self) -> usize {
self.L.nnz()
}
}
fn check_structure<T: FloatT>(A: &CscMatrix<T>) -> Result<(), QDLDLError> {
if !A.is_square() {
return Err(QDLDLError::IncompatibleDimension);
}
if !A.is_triu() {
return Err(QDLDLError::NotUpperTriangular);
}
if !A.colptr.windows(2).all(|c| c[0] < c[1]) {
return Err(QDLDLError::EmptyColumn);
}
Ok(())
}
fn _qdldl_new<T: FloatT>(
Ain: &CscMatrix<T>,
opts: Option<QDLDLSettings<T>>,
) -> Result<QDLDLFactorisation<T>, QDLDLError> {
let n = Ain.nrows();
let opts = opts.unwrap_or_default();
let (perm, iperm);
if let Some(_perm) = opts.perm {
iperm = _invperm(&_perm)?;
perm = _perm;
} else {
(perm, iperm, _) = get_amd_ordering(Ain, opts.amd_dense_scale);
}
let (A, AtoPAPt) = permute_symmetric(Ain, &iperm);
let mut Dsigns = vec![1_i8; n];
if let Some(ds) = opts.Dsigns {
Dsigns = vec![1_i8; n];
permute(&mut Dsigns, &ds, &perm);
}
let mut workspace = QDLDLWorkspace::<T>::new(
A,
AtoPAPt,
Dsigns,
opts.regularize_enable,
opts.regularize_eps,
opts.regularize_delta,
)?;
let sumLnz = workspace.Lnz.iter().sum();
let mut L = CscMatrix::spalloc((n, n), sumLnz);
let mut D = vec![T::zero(); n];
let mut Dinv = vec![T::zero(); n];
_factor(&mut L, &mut D, &mut Dinv, &mut workspace, opts.logical)?;
Ok(QDLDLFactorisation {
perm,
iperm,
L,
D,
Dinv,
workspace,
is_symbolic: opts.logical,
})
}
#[derive(Debug)]
struct QDLDLWorkspace<T> {
etree: Vec<usize>,
Lnz: Vec<usize>,
iwork: Vec<usize>,
bwork: Vec<bool>,
fwork: Vec<T>,
positive_inertia: usize,
triuA: CscMatrix<T>,
AtoPAPt: Vec<usize>,
Dsigns: Vec<i8>,
regularize_enable: bool,
regularize_eps: T,
regularize_delta: T,
regularize_count: usize,
}
impl<T> QDLDLWorkspace<T>
where
T: FloatT,
{
pub fn new(
triuA: CscMatrix<T>,
AtoPAPt: Vec<usize>,
Dsigns: Vec<i8>,
regularize_enable: bool,
regularize_eps: T,
regularize_delta: T,
) -> Result<Self, QDLDLError> {
let mut etree = vec![0; triuA.ncols()];
let mut Lnz = vec![0; triuA.ncols()]; let mut iwork = vec![0; triuA.ncols() * 3];
let bwork = vec![false; triuA.ncols()];
let fwork = vec![T::zero(); triuA.ncols()];
_etree(
triuA.nrows(),
&triuA.colptr,
&triuA.rowval,
&mut iwork,
&mut Lnz,
&mut etree,
)?;
let positive_inertia = 0;
let regularize_count = 0;
Ok(Self {
etree,
Lnz,
iwork,
bwork,
fwork,
positive_inertia,
triuA,
AtoPAPt,
Dsigns,
regularize_enable,
regularize_eps,
regularize_delta,
regularize_count,
})
}
}
fn _factor<T: FloatT>(
L: &mut CscMatrix<T>,
D: &mut [T],
Dinv: &mut [T],
workspace: &mut QDLDLWorkspace<T>,
logical: bool,
) -> Result<(), QDLDLError> {
if logical {
L.nzval.fill(T::one());
D.fill(T::one());
Dinv.fill(T::one());
}
let A = &workspace.triuA;
let pos_d_count = _factor_inner(
A.n,
&A.colptr,
&A.rowval,
&A.nzval,
&mut L.colptr,
&mut L.rowval,
&mut L.nzval,
D,
Dinv,
&workspace.Lnz,
&workspace.etree,
&mut workspace.bwork,
&mut workspace.iwork,
&mut workspace.fwork,
logical,
&workspace.Dsigns,
workspace.regularize_enable,
workspace.regularize_eps,
workspace.regularize_delta,
&mut workspace.regularize_count,
)?;
workspace.positive_inertia = pos_d_count;
Ok(())
}
const QDLDL_UNKNOWN: usize = usize::MAX;
const QDLDL_USED: bool = true;
const QDLDL_UNUSED: bool = false;
fn _etree(
n: usize,
Ap: &[usize],
Ai: &[usize],
work: &mut [usize],
Lnz: &mut [usize],
etree: &mut [usize],
) -> Result<usize, QDLDLError> {
work.fill(0);
Lnz.fill(0);
etree.fill(QDLDL_UNKNOWN);
for j in 0..n {
work[j] = j;
for istart in Ai.iter().take(Ap[j + 1]).skip(Ap[j]) {
let mut i = *istart;
while work[i] != j {
if etree[i] == QDLDL_UNKNOWN {
etree[i] = j;
}
Lnz[i] += 1; work[i] = j;
i = etree[i];
}
}
}
Ok(0)
}
#[allow(clippy::too_many_arguments)]
fn _factor_inner<T: FloatT>(
n: usize,
Ap: &[usize],
Ai: &[usize],
Ax: &[T],
Lp: &mut [usize],
Li: &mut [usize],
Lx: &mut [T],
D: &mut [T],
Dinv: &mut [T],
Lnz: &[usize],
etree: &[usize],
bwork: &mut [bool],
iwork: &mut [usize],
fwork: &mut [T],
logical_factor: bool,
Dsigns: &[i8],
regularize_enable: bool,
regularize_eps: T,
regularize_delta: T,
regularize_count: &mut usize,
) -> Result<usize, QDLDLError> {
*regularize_count = 0;
let mut positiveValuesInD = 0;
let y_markers = bwork;
let (y_idx, iwork) = iwork.split_at_mut(n);
let (elim_buffer, next_colspace) = iwork.split_at_mut(n);
let y_vals = fwork;
Lp[0] = 0;
let mut acc = 0;
for (Lp, Lnz) in zip(&mut Lp[1..], Lnz) {
acc += Lnz;
*Lp = acc;
}
y_markers.fill(QDLDL_UNUSED);
y_vals.fill(T::zero());
D.fill(T::zero());
next_colspace.copy_from_slice(&Lp[0..Lp.len() - 1]);
if !logical_factor {
D[0] = Ax[0];
if regularize_enable {
let sign = T::from_i8(Dsigns[0]).unwrap();
if D[0] * sign < regularize_eps {
D[0] = regularize_delta * sign;
*regularize_count += 1;
}
}
if D[0].is_zero() {
return Err(QDLDLError::ZeroPivot);
}
if D[0] > T::zero() {
positiveValuesInD += 1;
}
Dinv[0] = T::recip(D[0]);
}
for k in 1..n {
let mut nnz_y = 0;
for i in Ap[k]..Ap[k + 1] {
let bidx = Ai[i];
if bidx == k {
D[k] = Ax[i];
continue;
}
y_vals[bidx] = Ax[i];
let next_idx = bidx;
if y_markers[next_idx] == QDLDL_UNUSED {
y_markers[next_idx] = QDLDL_USED; elim_buffer[0] = next_idx; let mut nnz_e = 1;
let mut next_idx = etree[bidx];
while next_idx != QDLDL_UNKNOWN && next_idx < k {
if y_markers[next_idx] == QDLDL_USED {
break;
}
y_markers[next_idx] = QDLDL_USED; elim_buffer[nnz_e] = next_idx; next_idx = etree[next_idx]; nnz_e += 1; }
while nnz_e != 0 {
nnz_e -= 1;
y_idx[nnz_y] = elim_buffer[nnz_e];
nnz_y += 1;
}
}
}
for i in (0..nnz_y).rev() {
let cidx = y_idx[i];
let tmp_idx = next_colspace[cidx];
if !logical_factor {
let y_vals_cidx = y_vals[cidx];
let (f, l) = (Lp[cidx], tmp_idx);
unsafe {
for (&Lxj, &Lij) in zip(&Lx[f..l], &Li[f..l]) {
*(y_vals.get_unchecked_mut(Lij)) -= Lxj * y_vals_cidx;
}
let Lx_tmp_idx = y_vals_cidx * *Dinv.get_unchecked(cidx);
*Lx.get_unchecked_mut(tmp_idx) = Lx_tmp_idx;
*D.get_unchecked_mut(k) -= y_vals_cidx * Lx_tmp_idx;
}
}
Li[tmp_idx] = k;
next_colspace[cidx] += 1;
y_vals[cidx] = T::zero();
y_markers[cidx] = QDLDL_UNUSED;
}
if !logical_factor {
if regularize_enable {
let sign = T::from_i8(Dsigns[k]).unwrap();
if D[k] * sign < regularize_eps {
D[k] = regularize_delta * sign;
*regularize_count += 1;
}
}
if D[k].is_zero() {
return Err(QDLDLError::ZeroPivot);
}
if D[k] > T::zero() {
positiveValuesInD += 1;
}
Dinv[k] = T::recip(D[k]);
}
}
Ok(positiveValuesInD)
}
fn _lsolve_safe<T: FloatT>(Lp: &[usize], Li: &[usize], Lx: &[T], x: &mut [T]) {
for i in 0..x.len() {
let xi = x[i];
let (f, l) = (Lp[i], Lp[i + 1]);
let Lx = &Lx[f..l];
let Li = &Li[f..l];
for (&Lij, &Lxj) in zip(Li, Lx) {
x[Lij] -= Lxj * xi;
}
}
}
fn _ltsolve_safe<T: FloatT>(Lp: &[usize], Li: &[usize], Lx: &[T], x: &mut [T]) {
for i in (0..x.len()).rev() {
let mut s = T::zero();
let (f, l) = (Lp[i], Lp[i + 1]);
let Lx = &Lx[f..l];
let Li = &Li[f..l];
for (&Lij, &Lxj) in zip(Li, Lx) {
s += Lxj * x[Lij];
}
x[i] -= s;
}
}
fn _lsolve_unsafe<T: FloatT>(Lp: &[usize], Li: &[usize], Lx: &[T], x: &mut [T]) {
unsafe {
for i in 0..x.len() {
let xi = *x.get_unchecked(i);
let f = *Lp.get_unchecked(i);
let l = *Lp.get_unchecked(i + 1);
for (&Lxj, &Lij) in zip(&Lx[f..l], &Li[f..l]) {
*(x.get_unchecked_mut(Lij)) -= Lxj * xi;
}
}
}
}
fn _ltsolve_unsafe<T: FloatT>(Lp: &[usize], Li: &[usize], Lx: &[T], x: &mut [T]) {
unsafe {
for i in (0..x.len()).rev() {
let mut s = T::zero();
let f = *Lp.get_unchecked(i);
let l = *Lp.get_unchecked(i + 1);
for (&Lxj, &Lij) in zip(&Lx[f..l], &Li[f..l]) {
s += Lxj * (*x.get_unchecked(Lij));
}
*x.get_unchecked_mut(i) -= s;
}
}
}
fn _dltsolve_unsafe<T: FloatT>(Lp: &[usize], Li: &[usize], Lx: &[T], Dinv: &[T], x: &mut [T]) {
unsafe {
for i in (0..x.len()).rev() {
let mut s = T::zero();
let f = *Lp.get_unchecked(i);
let l = *Lp.get_unchecked(i + 1);
for (&Lxj, &Lij) in zip(&Lx[f..l], &Li[f..l]) {
s += Lxj * (*x.get_unchecked(Lij));
}
let xi = x.get_unchecked_mut(i);
*xi *= *Dinv.get_unchecked(i);
*xi -= s;
}
}
}
fn _solve<T: FloatT>(Lp: &[usize], Li: &[usize], Lx: &[T], Dinv: &[T], b: &mut [T]) {
_lsolve_unsafe(Lp, Li, Lx, b);
_dltsolve_unsafe(Lp, Li, Lx, Dinv, b);
}
fn _invperm(p: &[usize]) -> Result<Vec<usize>, QDLDLError> {
let mut b = vec![0; p.len()];
for (i, j) in p.iter().enumerate() {
if *j < p.len() && b[*j] == 0 {
b[*j] = i;
} else {
return Err(QDLDLError::InvalidPermutation);
}
}
Ok(b)
}
pub(crate) fn permute<T: Copy>(x: &mut [T], b: &[T], p: &[usize]) {
debug_assert!(p.is_empty() || *p.iter().max().unwrap() < x.len());
unsafe {
zip(p, x).for_each(|(p, x)| *x = *b.get_unchecked(*p));
}
}
pub(crate) fn ipermute<T: Copy>(x: &mut [T], b: &[T], p: &[usize]) {
debug_assert!(p.is_empty() || *p.iter().max().unwrap() < x.len());
unsafe {
zip(p, b).for_each(|(p, b)| *x.get_unchecked_mut(*p) = *b);
}
}
pub(crate) fn permute_symmetric<T: FloatT>(
A: &CscMatrix<T>,
iperm: &[usize],
) -> (CscMatrix<T>, Vec<usize>) {
let (_m, n) = A.size();
let mut P = CscMatrix::<T>::spalloc((n, n), A.nnz());
let mut AtoPAPt = vec![0; A.nnz()];
_permute_symmetric_inner(
A,
&mut AtoPAPt,
iperm,
&mut P.rowval,
&mut P.colptr,
&mut P.nzval,
);
(P, AtoPAPt)
}
fn _permute_symmetric_inner<T: FloatT>(
A: &CscMatrix<T>,
AtoPAPt: &mut [usize],
iperm: &[usize],
Pr: &mut [usize],
Pc: &mut [usize],
Pv: &mut [T],
) {
let n = A.nrows();
let mut num_entries = vec![0; n];
let Ar = &A.rowval;
let Ac = &A.colptr;
let Av = &A.nzval;
for colA in 0..n {
let colP = iperm[colA];
for rowA in Ar.iter().take(Ac[colA + 1]).skip(Ac[colA]) {
let rowP = iperm[*rowA];
if *rowA <= colA {
let col_idx = max(rowP, colP);
num_entries[col_idx] += 1;
}
}
}
Pc[0] = 0;
let mut acc = 0;
for (Pckp1, ne) in zip(&mut Pc[1..], &num_entries) {
*Pckp1 = acc + ne;
acc = *Pckp1;
}
num_entries.copy_from_slice(&Pc[0..n]);
let mut row_starts = num_entries;
for colA in 0..n {
let colP = iperm[colA];
for rowA_idx in Ac[colA]..Ac[colA + 1] {
let rowA = Ar[rowA_idx];
if rowA <= colA {
let rowP = iperm[rowA];
let col_idx = max(colP, rowP);
let rowP_idx = row_starts[col_idx];
Pr[rowP_idx] = min(colP, rowP);
Pv[rowP_idx] = Av[rowA_idx];
AtoPAPt[rowA_idx] = rowP_idx;
row_starts[col_idx] += 1;
}
}
}
}
pub(crate) fn get_amd_ordering<T: FloatT>(
A: &CscMatrix<T>,
amd_dense_scale: f64,
) -> (Vec<usize>, Vec<usize>, amd::Info) {
let mut control = amd::Control::default();
control.dense *= amd_dense_scale; let (perm, iperm, info) = amd::order(A.nrows(), &A.colptr, &A.rowval, &control).unwrap();
(perm, iperm, info)
}
#[path = "test.rs"]
#[cfg(test)]
mod test;