use crate::error::{SparseError, SparseResult};
fn csr_matvec(
row_ptr: &[usize],
col_ind: &[usize],
val: &[f64],
x: &[f64],
n: usize,
) -> Vec<f64> {
let mut y = vec![0.0f64; n];
for i in 0..n {
let mut acc = 0.0f64;
for pos in row_ptr[i]..row_ptr[i + 1] {
acc += val[pos] * x[col_ind[pos]];
}
y[i] = acc;
}
y
}
fn dense_lu(a: &mut Vec<Vec<f64>>, n: usize) -> Option<Vec<usize>> {
let mut perm: Vec<usize> = (0..n).collect();
for k in 0..n {
let mut max_val = a[k][k].abs();
let mut max_row = k;
for i in (k + 1)..n {
let v = a[i][k].abs();
if v > max_val {
max_val = v;
max_row = i;
}
}
if max_val < 1e-300 {
return None;
}
a.swap(k, max_row);
perm.swap(k, max_row);
let pivot = a[k][k];
for i in (k + 1)..n {
a[i][k] /= pivot;
for j in (k + 1)..n {
let l = a[i][k];
a[i][j] -= l * a[k][j];
}
}
}
Some(perm)
}
fn dense_lu_solve(lu: &[Vec<f64>], perm: &[usize], b: &[f64], n: usize) -> Vec<f64> {
let mut y: Vec<f64> = perm.iter().map(|&p| b[p]).collect();
for i in 0..n {
for j in 0..i {
y[i] -= lu[i][j] * y[j];
}
}
let mut x = y;
for ii in 0..n {
let i = n - 1 - ii;
for j in (i + 1)..n {
x[i] -= lu[i][j] * x[j];
}
x[i] /= lu[i][i];
}
x
}
pub struct BlockJacobiF64 {
lu_factors: Vec<Vec<Vec<f64>>>,
lu_perms: Vec<Vec<usize>>,
block_starts: Vec<usize>,
block_sizes: Vec<usize>,
n: usize,
}
impl BlockJacobiF64 {
pub fn new_uniform(
csr_val: &[f64],
csr_row_ptr: &[usize],
csr_col_ind: &[usize],
n: usize,
block_size: usize,
) -> SparseResult<Self> {
if block_size == 0 {
return Err(SparseError::InvalidArgument(
"block_size must be positive".to_string(),
));
}
let mut starts = Vec::new();
let mut i = 0;
while i < n {
starts.push(i);
i += block_size;
}
Self::new_with_partition(csr_val, csr_row_ptr, csr_col_ind, n, starts)
}
pub fn new_with_partition(
csr_val: &[f64],
csr_row_ptr: &[usize],
csr_col_ind: &[usize],
n: usize,
block_starts: Vec<usize>,
) -> SparseResult<Self> {
if csr_row_ptr.len() != n + 1 {
return Err(SparseError::InconsistentData {
reason: format!(
"csr_row_ptr.len() = {} != n+1 = {}",
csr_row_ptr.len(),
n + 1
),
});
}
if block_starts.is_empty() || block_starts[0] != 0 {
return Err(SparseError::InvalidArgument(
"block_starts must start with 0".to_string(),
));
}
let nb = block_starts.len();
let mut lu_factors = Vec::with_capacity(nb);
let mut lu_perms = Vec::with_capacity(nb);
let mut block_sizes = Vec::with_capacity(nb);
for b in 0..nb {
let start = block_starts[b];
let end = if b + 1 < nb { block_starts[b + 1] } else { n };
let bs = end - start;
block_sizes.push(bs);
let mut block = vec![vec![0.0f64; bs]; bs];
for (local_row, global_row) in (start..end).enumerate() {
for pos in csr_row_ptr[global_row]..csr_row_ptr[global_row + 1] {
let col = csr_col_ind[pos];
if col >= start && col < end {
block[local_row][col - start] = csr_val[pos];
}
}
}
let perm = dense_lu(&mut block, bs).ok_or_else(|| {
SparseError::SingularMatrix(format!(
"block {b} (rows {start}..{end}) is singular"
))
})?;
lu_factors.push(block);
lu_perms.push(perm);
}
Ok(Self {
lu_factors,
lu_perms,
block_starts,
block_sizes,
n,
})
}
pub fn apply(&self, x: &[f64]) -> Vec<f64> {
let mut y = vec![0.0f64; self.n];
for (b, &start) in self.block_starts.iter().enumerate() {
let bs = self.block_sizes[b];
let x_block = &x[start..start + bs];
let y_block =
dense_lu_solve(&self.lu_factors[b], &self.lu_perms[b], x_block, bs);
y[start..start + bs].copy_from_slice(&y_block);
}
y
}
pub fn size(&self) -> usize {
self.n
}
pub fn num_blocks(&self) -> usize {
self.block_starts.len()
}
}
fn local_lstsq(a_col_major: &[f64], b: &[f64], nrows: usize, ncols: usize) -> Vec<f64> {
if nrows == 0 || ncols == 0 {
return vec![0.0f64; ncols];
}
let mut a = a_col_major.to_vec();
let mut rhs = b.to_vec();
let k = ncols.min(nrows);
for j in 0..k {
let col_start = j * nrows + j;
let col_len = nrows - j;
if col_len == 0 {
break;
}
let col_slice = &a[col_start..col_start + col_len];
let sigma: f64 = col_slice.iter().map(|x| x * x).sum::<f64>().sqrt();
if sigma < 1e-300 {
continue;
}
let mut v = col_slice.to_vec();
let sign = if v[0] >= 0.0 { 1.0 } else { -1.0 };
v[0] += sign * sigma;
let v_norm_sq: f64 = v.iter().map(|x| x * x).sum();
if v_norm_sq < 1e-300 {
continue;
}
let beta = 2.0 / v_norm_sq;
for jj in j..ncols {
let s = jj * nrows + j;
let dot: f64 = v.iter().zip(a[s..s + col_len].iter()).map(|(a, b)| a * b).sum();
let scale = beta * dot;
for ii in 0..col_len {
a[jj * nrows + j + ii] -= scale * v[ii];
}
}
let dot: f64 = v.iter().zip(rhs[j..j + col_len].iter()).map(|(a, b)| a * b).sum();
let scale = beta * dot;
for ii in 0..col_len {
rhs[j + ii] -= scale * v[ii];
}
}
let mut x = vec![0.0f64; ncols];
for ii in 0..k {
let i = k - 1 - ii;
let r_ii = a[i * nrows + i];
if r_ii.abs() < 1e-14 {
x[i] = 0.0;
continue;
}
let mut s = rhs[i];
for jj in (i + 1)..ncols {
s -= a[jj * nrows + i] * x[jj];
}
x[i] = s / r_ii;
}
x
}
pub struct SpaiPrecondF64 {
m_rows: Vec<Vec<(usize, f64)>>,
n: usize,
}
impl SpaiPrecondF64 {
pub fn new(
csr_val: &[f64],
csr_row_ptr: &[usize],
csr_col_ind: &[usize],
n: usize,
_max_iter: usize,
tolerance: f64,
) -> SparseResult<Self> {
if csr_row_ptr.len() != n + 1 {
return Err(SparseError::InconsistentData {
reason: format!(
"csr_row_ptr.len() = {} != n+1 = {}",
csr_row_ptr.len(),
n + 1
),
});
}
let nnz = csr_val.len();
let mut col_counts = vec![0usize; n];
for &c in csr_col_ind {
col_counts[c] += 1;
}
let mut csc_ptr = vec![0usize; n + 1];
for c in 0..n {
csc_ptr[c + 1] = csc_ptr[c] + col_counts[c];
}
let mut csc_row = vec![0usize; nnz];
let mut csc_val = vec![0.0f64; nnz];
let mut cur = csc_ptr[..n].to_vec();
for i in 0..n {
for pos in csr_row_ptr[i]..csr_row_ptr[i + 1] {
let c = csr_col_ind[pos];
let dst = cur[c];
csc_row[dst] = i;
csc_val[dst] = csr_val[pos];
cur[c] += 1;
}
}
let mut m_rows: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
for k in 0..n {
let s = csc_ptr[k];
let e = csc_ptr[k + 1];
let mut pattern: Vec<usize> = csc_row[s..e].to_vec();
if !pattern.contains(&k) {
pattern.push(k);
}
pattern.sort_unstable();
pattern.dedup();
let np = pattern.len();
if np == 0 {
continue;
}
let mut row_set: Vec<usize> = Vec::new();
for &j in &pattern {
for &r in &csc_row[csc_ptr[j]..csc_ptr[j + 1]] {
if !row_set.contains(&r) {
row_set.push(r);
}
}
}
row_set.sort_unstable();
let nr = row_set.len();
let mut a_local = vec![0.0f64; nr * np];
for (col_loc, &j) in pattern.iter().enumerate() {
for pos in csc_ptr[j]..csc_ptr[j + 1] {
let global_row = csc_row[pos];
if let Ok(row_loc) = row_set.binary_search(&global_row) {
a_local[col_loc * nr + row_loc] = csc_val[pos];
}
}
}
let mut rhs = vec![0.0f64; nr];
if let Ok(pos_k) = row_set.binary_search(&k) {
rhs[pos_k] = 1.0;
}
let x_local = local_lstsq(&a_local, &rhs, nr, np);
let mut resid_sq = 0.0f64;
for row_loc in 0..nr {
let mut v = -rhs[row_loc];
for col_loc in 0..np {
v += a_local[col_loc * nr + row_loc] * x_local[col_loc];
}
resid_sq += v * v;
}
let _ = tolerance;
for (p, &row_idx) in pattern.iter().enumerate() {
if x_local[p].abs() > 1e-15 {
m_rows[row_idx].push((k, x_local[p]));
}
}
let _ = resid_sq; }
Ok(Self { m_rows, n })
}
pub fn apply(&self, x: &[f64]) -> Vec<f64> {
let mut y = vec![0.0f64; self.n];
for (i, row) in self.m_rows.iter().enumerate() {
let mut acc = 0.0f64;
for &(j, v) in row {
acc += v * x[j];
}
y[i] = acc;
}
y
}
pub fn size(&self) -> usize {
self.n
}
}
pub struct PolynomialPrecondF64 {
coefficients: Vec<f64>,
csr_val: Vec<f64>,
csr_row_ptr: Vec<usize>,
csr_col_ind: Vec<usize>,
n: usize,
}
impl PolynomialPrecondF64 {
pub fn neumann(
csr_val: &[f64],
csr_row_ptr: &[usize],
csr_col_ind: &[usize],
n: usize,
degree: usize,
spectral_radius_est: f64,
) -> SparseResult<Self> {
if csr_row_ptr.len() != n + 1 {
return Err(SparseError::InconsistentData {
reason: format!(
"csr_row_ptr.len() = {} != n+1 = {}",
csr_row_ptr.len(),
n + 1
),
});
}
let alpha = if spectral_radius_est.abs() > 1e-300 {
1.0 / spectral_radius_est
} else {
1.0
};
let coefficients = vec![alpha; degree + 1];
Ok(Self {
coefficients,
csr_val: csr_val.to_vec(),
csr_row_ptr: csr_row_ptr.to_vec(),
csr_col_ind: csr_col_ind.to_vec(),
n,
})
}
pub fn apply(&self, x: &[f64]) -> Vec<f64> {
let n = self.n;
let degree = self.coefficients.len().saturating_sub(1);
let alpha = self.coefficients[0];
let b_apply = |v: &[f64]| -> Vec<f64> {
let av = csr_matvec(&self.csr_row_ptr, &self.csr_col_ind, &self.csr_val, v, n);
(0..n).map(|i| v[i] - alpha * av[i]).collect()
};
let mut acc = x.to_vec();
for _ in 0..degree {
let b_acc = b_apply(&acc);
for i in 0..n {
acc[i] = x[i] + b_acc[i];
}
}
acc.iter_mut().for_each(|v| *v *= alpha);
acc
}
pub fn degree(&self) -> usize {
self.coefficients.len().saturating_sub(1)
}
pub fn shift(&self) -> f64 {
*self.coefficients.first().unwrap_or(&1.0)
}
pub fn size(&self) -> usize {
self.n
}
}
#[cfg(test)]
mod tests {
use super::*;
fn tridiag4() -> (Vec<f64>, Vec<usize>, Vec<usize>, usize) {
let n = 4usize;
let row_ptr = vec![0, 2, 5, 8, 10];
let col_ind = vec![0, 1, 0, 1, 2, 1, 2, 3, 2, 3];
let val = vec![4.0, -1.0, -1.0, 4.0, -1.0, -1.0, 4.0, -1.0, -1.0, 4.0];
(val, row_ptr, col_ind, n)
}
#[test]
fn test_block_jacobi_uniform() {
let (val, rp, ci, n) = tridiag4();
let prec = BlockJacobiF64::new_uniform(&val, &rp, &ci, n, 2)
.expect("BlockJacobiF64::new_uniform");
assert_eq!(prec.size(), n);
assert_eq!(prec.num_blocks(), 2);
let b = vec![1.0, 0.0, 0.0, 1.0];
let y = prec.apply(&b);
assert_eq!(y.len(), n);
let r0 = 4.0 * y[0] - 1.0 * y[1] - b[0];
let r1 = -1.0 * y[0] + 4.0 * y[1] - b[1];
assert!(r0.abs() < 1e-12, "block 0 row 0 residual {r0}");
assert!(r1.abs() < 1e-12, "block 0 row 1 residual {r1}");
}
#[test]
fn test_block_jacobi_partition() {
let (val, rp, ci, n) = tridiag4();
let starts = vec![0, 1, 3]; let prec = BlockJacobiF64::new_with_partition(&val, &rp, &ci, n, starts)
.expect("BlockJacobiF64::new_with_partition");
assert_eq!(prec.num_blocks(), 3);
let y = prec.apply(&[4.0, 4.0, 4.0, 4.0]);
assert_eq!(y.len(), n);
assert!((y[0] - 1.0).abs() < 1e-12, "y[0]={}", y[0]);
}
#[test]
fn test_block_jacobi_singular_error() {
let n = 2;
let row_ptr = vec![0, 1, 2];
let col_ind = vec![0, 0]; let val = vec![1.0, 1.0];
let result = BlockJacobiF64::new_uniform(&val, &row_ptr, &col_ind, n, 2);
assert!(result.is_err(), "should detect singular block");
}
#[test]
fn test_spai_apply() {
let (val, rp, ci, n) = tridiag4();
let prec = SpaiPrecondF64::new(&val, &rp, &ci, n, 5, 1e-8)
.expect("SpaiPrecondF64::new");
assert_eq!(prec.size(), n);
let b = vec![1.0, 2.0, 3.0, 4.0];
let y = prec.apply(&b);
assert_eq!(y.len(), n);
let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
assert!(norm > 1e-10, "SPAI apply should give non-zero result");
}
#[test]
fn test_polynomial_neumann_apply() {
let (val, rp, ci, n) = tridiag4();
let prec = PolynomialPrecondF64::neumann(&val, &rp, &ci, n, 4, 6.0)
.expect("PolynomialPrecondF64::neumann");
assert_eq!(prec.size(), n);
assert_eq!(prec.degree(), 4);
let b = vec![1.0, 0.0, 0.0, 0.0];
let y = prec.apply(&b);
assert_eq!(y.len(), n);
let norm: f64 = y.iter().map(|x| x * x).sum::<f64>().sqrt();
assert!(norm > 1e-10, "Neumann apply should give non-zero result");
}
#[test]
fn test_polynomial_shift() {
let (val, rp, ci, n) = tridiag4();
let prec = PolynomialPrecondF64::neumann(&val, &rp, &ci, n, 2, 5.0)
.expect("neumann");
let expected_shift = 1.0 / 5.0;
assert!((prec.shift() - expected_shift).abs() < 1e-14);
}
}