use crate::csr::CsrMatrix;
use crate::error::{SparseError, SparseResult};
use scirs2_core::numeric::SparseElement;
use scirs2_core::parallel_ops::*;
use std::fmt::Debug;
#[derive(Debug, Clone)]
pub struct RowPartitioner {
pub partitions: Vec<usize>,
pub num_partitions: usize,
}
impl RowPartitioner {
pub fn new(indptr: &[usize], nrows: usize, num_partitions: usize) -> SparseResult<Self> {
if num_partitions == 0 {
return Err(SparseError::ValueError(
"num_partitions must be at least 1".to_string(),
));
}
if indptr.len() < nrows + 1 {
return Err(SparseError::ValueError(
"indptr length must be at least nrows + 1".to_string(),
));
}
let total_nnz = indptr[nrows];
let target = (total_nnz + num_partitions - 1) / num_partitions;
let mut partitions = vec![0usize; num_partitions + 1];
let mut part = 0usize;
let mut accumulated = 0usize;
for row in 0..nrows {
let row_nnz = indptr[row + 1] - indptr[row];
accumulated += row_nnz;
if accumulated >= target && part + 1 < num_partitions {
part += 1;
partitions[part] = row + 1;
accumulated = 0;
}
}
partitions[num_partitions] = nrows;
for i in (part + 1)..num_partitions {
partitions[i] = nrows;
}
Ok(Self {
partitions,
num_partitions,
})
}
pub fn range(&self, i: usize) -> std::ops::Range<usize> {
self.partitions[i]..self.partitions[i + 1]
}
pub fn auto(indptr: &[usize], nrows: usize) -> SparseResult<Self> {
let nthreads = get_num_threads().max(1);
Self::new(indptr, nrows, nthreads)
}
}
pub fn parallel_spmv(a: &CsrMatrix<f64>, x: &[f64]) -> SparseResult<Vec<f64>> {
let (m, n) = a.shape();
if x.len() != n {
return Err(SparseError::DimensionMismatch {
expected: n,
found: x.len(),
});
}
let partitioner = RowPartitioner::auto(&a.indptr, m)?;
let num_parts = partitioner.num_partitions;
let ranges: Vec<(usize, usize)> = (0..num_parts)
.map(|i| {
let r = partitioner.range(i);
(r.start, r.end)
})
.collect();
let chunks: Vec<Vec<(usize, f64)>> = parallel_map(&ranges, |(start, end)| {
let mut partial = Vec::with_capacity(end - start);
for row in *start..*end {
let mut sum = 0.0f64;
for j in a.indptr[row]..a.indptr[row + 1] {
sum += a.data[j] * x[a.indices[j]];
}
partial.push((row, sum));
}
partial
});
let mut y = vec![0.0f64; m];
for chunk in chunks {
for (row, val) in chunk {
y[row] = val;
}
}
Ok(y)
}
pub fn parallel_spmm(a: &CsrMatrix<f64>, b: &CsrMatrix<f64>) -> SparseResult<CsrMatrix<f64>> {
let (m, k) = a.shape();
let (brows, n) = b.shape();
if k != brows {
return Err(SparseError::DimensionMismatch {
expected: k,
found: brows,
});
}
let partitioner = RowPartitioner::auto(&a.indptr, m)?;
let num_parts = partitioner.num_partitions;
let ranges: Vec<(usize, usize)> = (0..num_parts)
.map(|i| {
let r = partitioner.range(i);
(r.start, r.end)
})
.collect();
let chunks: Vec<Vec<(usize, Vec<(usize, f64)>)>> = parallel_map(&ranges, |(start, end)| {
let mut rows_out = Vec::with_capacity(end - start);
let mut acc = vec![0.0f64; n];
let mut marker = vec![false; n];
let mut nz_cols: Vec<usize> = Vec::new();
for row in *start..*end {
for &col in &nz_cols {
acc[col] = 0.0;
marker[col] = false;
}
nz_cols.clear();
for ja in a.indptr[row]..a.indptr[row + 1] {
let a_col = a.indices[ja];
let a_val = a.data[ja];
for jb in b.indptr[a_col]..b.indptr[a_col + 1] {
let b_col = b.indices[jb];
let b_val = b.data[jb];
acc[b_col] += a_val * b_val;
if !marker[b_col] {
marker[b_col] = true;
nz_cols.push(b_col);
}
}
}
let mut row_nz: Vec<(usize, f64)> = nz_cols
.iter()
.filter_map(|&col| {
let v = acc[col];
if v != 0.0 {
Some((col, v))
} else {
None
}
})
.collect();
row_nz.sort_unstable_by_key(|&(col, _)| col);
rows_out.push((row, row_nz));
}
rows_out
});
let mut indptr = vec![0usize; m + 1];
let mut all_rows: Vec<(usize, Vec<(usize, f64)>)> = chunks.into_iter().flatten().collect();
all_rows.sort_unstable_by_key(|&(row, _)| row);
for (row, ref nz) in &all_rows {
indptr[row + 1] = nz.len();
}
for i in 1..=m {
indptr[i] += indptr[i - 1];
}
let total_nnz = indptr[m];
let mut indices = vec![0usize; total_nnz];
let mut data = vec![0.0f64; total_nnz];
for (row, nz) in all_rows {
let start = indptr[row];
for (k2, (col, val)) in nz.into_iter().enumerate() {
indices[start + k2] = col;
data[start + k2] = val;
}
}
CsrMatrix::from_raw_csr(data, indptr, indices, (m, n))
}
pub fn parallel_csr_add(a: &CsrMatrix<f64>, b: &CsrMatrix<f64>) -> SparseResult<CsrMatrix<f64>> {
let (am, an) = a.shape();
let (bm, bn) = b.shape();
if am != bm || an != bn {
return Err(SparseError::ShapeMismatch {
expected: (am, an),
found: (bm, bn),
});
}
let m = am;
let n = an;
let partitioner = RowPartitioner::auto(&a.indptr, m)?;
let num_parts = partitioner.num_partitions;
let ranges: Vec<(usize, usize)> = (0..num_parts)
.map(|i| {
let r = partitioner.range(i);
(r.start, r.end)
})
.collect();
let chunks: Vec<Vec<(usize, Vec<(usize, f64)>)>> = parallel_map(&ranges, |(start, end)| {
let mut rows_out = Vec::with_capacity(end - start);
let mut acc = vec![0.0f64; n];
let mut marker = vec![false; n];
let mut nz_cols: Vec<usize> = Vec::new();
for row in *start..*end {
for &col in &nz_cols {
acc[col] = 0.0;
marker[col] = false;
}
nz_cols.clear();
for ja in a.indptr[row]..a.indptr[row + 1] {
let col = a.indices[ja];
acc[col] += a.data[ja];
if !marker[col] {
marker[col] = true;
nz_cols.push(col);
}
}
for jb in b.indptr[row]..b.indptr[row + 1] {
let col = b.indices[jb];
acc[col] += b.data[jb];
if !marker[col] {
marker[col] = true;
nz_cols.push(col);
}
}
let mut row_nz: Vec<(usize, f64)> = nz_cols
.iter()
.filter_map(|&col| {
let v = acc[col];
if v != 0.0 {
Some((col, v))
} else {
None
}
})
.collect();
row_nz.sort_unstable_by_key(|&(col, _)| col);
rows_out.push((row, row_nz));
}
rows_out
});
let mut all_rows: Vec<(usize, Vec<(usize, f64)>)> = chunks.into_iter().flatten().collect();
all_rows.sort_unstable_by_key(|&(row, _)| row);
let mut indptr = vec![0usize; m + 1];
for (row, ref nz) in &all_rows {
indptr[row + 1] = nz.len();
}
for i in 1..=m {
indptr[i] += indptr[i - 1];
}
let total_nnz = indptr[m];
let mut indices_out = vec![0usize; total_nnz];
let mut data_out = vec![0.0f64; total_nnz];
for (row, nz) in all_rows {
let start = indptr[row];
for (k, (col, val)) in nz.into_iter().enumerate() {
indices_out[start + k] = col;
data_out[start + k] = val;
}
}
CsrMatrix::from_raw_csr(data_out, indptr, indices_out, (m, n))
}
#[derive(Debug, Clone)]
pub struct ILUFactor {
pub l: CsrMatrix<f64>,
pub u: CsrMatrix<f64>,
pub perm: Vec<usize>,
pub level_sets: Vec<Vec<usize>>,
}
impl ILUFactor {
pub fn forward_solve(&self, b: &[f64]) -> SparseResult<Vec<f64>> {
let n = b.len();
if self.l.rows() != n {
return Err(SparseError::DimensionMismatch {
expected: self.l.rows(),
found: n,
});
}
let mut x: Vec<f64> = (0..n).map(|i| b[self.perm[i]]).collect();
for level in &self.level_sets {
let updates: Vec<(usize, f64)> = parallel_map(level, |&row| {
let mut s = x[row];
for j in self.l.indptr[row]..self.l.indptr[row + 1] {
let col = self.l.indices[j];
if col < row {
s -= self.l.data[j] * x[col];
}
}
(row, s)
});
for (row, val) in updates {
x[row] = val;
}
}
Ok(x)
}
pub fn backward_solve(&self, b: &[f64]) -> SparseResult<Vec<f64>> {
let n = b.len();
if self.u.rows() != n {
return Err(SparseError::DimensionMismatch {
expected: self.u.rows(),
found: n,
});
}
let mut x = b.to_vec();
for level in self.level_sets.iter().rev() {
let updates: Vec<(usize, f64)> = parallel_map(level, |&row| {
let mut s = x[row];
let mut diag = 1.0f64;
for j in self.u.indptr[row]..self.u.indptr[row + 1] {
let col = self.u.indices[j];
if col == row {
diag = self.u.data[j];
} else if col > row {
s -= self.u.data[j] * x[col];
}
}
(row, s / diag)
});
for (row, val) in updates {
x[row] = val;
}
}
Ok(x)
}
}
fn build_level_sets(n: usize, deps: &[Vec<usize>]) -> Vec<Vec<usize>> {
let mut level_of = vec![0usize; n];
for i in 0..n {
for &d in &deps[i] {
if level_of[d] + 1 > level_of[i] {
level_of[i] = level_of[d] + 1;
}
}
}
let max_level = level_of.iter().copied().max().unwrap_or(0);
let mut sets: Vec<Vec<usize>> = vec![Vec::new(); max_level + 1];
for i in 0..n {
sets[level_of[i]].push(i);
}
sets
}
pub fn parallel_ilu_factor(a: &CsrMatrix<f64>) -> SparseResult<ILUFactor> {
let (m, n) = a.shape();
if m != n {
return Err(SparseError::ValueError(
"ILU factorization requires a square matrix".to_string(),
));
}
let nnz = a.nnz();
let mut work_data = a.data.clone();
let indptr = a.indptr.clone();
let indices = a.indices.clone();
let mut row_col_to_pos: Vec<std::collections::HashMap<usize, usize>> =
vec![std::collections::HashMap::new(); n];
for row in 0..n {
for j in indptr[row]..indptr[row + 1] {
row_col_to_pos[row].insert(indices[j], j);
}
}
let perm: Vec<usize> = (0..n).collect();
for i in 1..n {
for ji in indptr[i]..indptr[i + 1] {
let k = indices[ji];
if k >= i {
break; }
let u_kk_pos = match row_col_to_pos[k].get(&k) {
Some(&pos) => pos,
None => continue, };
let u_kk = work_data[u_kk_pos];
if u_kk.abs() < 1e-300 {
return Err(SparseError::SingularMatrix(format!(
"Near-zero pivot encountered at row {}",
k
)));
}
let multiplier = work_data[ji] / u_kk;
work_data[ji] = multiplier;
for jk in indptr[k]..indptr[k + 1] {
let j = indices[jk];
if j <= k {
continue; }
if let Some(&pos_ij) = row_col_to_pos[i].get(&j) {
work_data[pos_ij] -= multiplier * work_data[jk];
}
}
}
}
let mut l_indptr = vec![0usize; n + 1];
let mut u_indptr = vec![0usize; n + 1];
let mut l_indices: Vec<usize> = Vec::with_capacity(nnz);
let mut l_data: Vec<f64> = Vec::with_capacity(nnz);
let mut u_indices: Vec<usize> = Vec::with_capacity(nnz);
let mut u_data: Vec<f64> = Vec::with_capacity(nnz);
let mut deps: Vec<Vec<usize>> = vec![Vec::new(); n];
for row in 0..n {
for j in indptr[row]..indptr[row + 1] {
let col = indices[j];
let val = work_data[j];
if col < row {
l_indices.push(col);
l_data.push(val);
l_indptr[row + 1] += 1;
deps[row].push(col);
} else if col == row {
l_indices.push(col);
l_data.push(1.0);
l_indptr[row + 1] += 1;
u_indices.push(col);
u_data.push(val);
u_indptr[row + 1] += 1;
} else {
u_indices.push(col);
u_data.push(val);
u_indptr[row + 1] += 1;
}
}
}
for i in 1..=n {
l_indptr[i] += l_indptr[i - 1];
u_indptr[i] += u_indptr[i - 1];
}
let l = CsrMatrix::from_raw_csr(l_data, l_indptr, l_indices, (n, n))?;
let u = CsrMatrix::from_raw_csr(u_data, u_indptr, u_indices, (n, n))?;
let level_sets = build_level_sets(n, &deps);
Ok(ILUFactor {
l,
u,
perm,
level_sets,
})
}
#[derive(Debug, Clone)]
pub struct ColoredGaussSeidel {
pub color: Vec<usize>,
pub color_sets: Vec<Vec<usize>>,
pub num_colors: usize,
}
impl ColoredGaussSeidel {
pub fn from_matrix<T>(a: &CsrMatrix<T>) -> SparseResult<Self>
where
T: Clone + Copy + SparseElement + scirs2_core::numeric::Zero + std::cmp::PartialEq + Debug,
{
let (n, _) = a.shape();
let mut color = vec![usize::MAX; n];
let mut forbidden: Vec<bool> = Vec::new();
for row in 0..n {
let neighbor_colors: Vec<usize> = (a.indptr[row]..a.indptr[row + 1])
.filter_map(|j| {
let nbr = a.indices[j];
if nbr != row && color[nbr] != usize::MAX {
Some(color[nbr])
} else {
None
}
})
.collect();
let max_needed = neighbor_colors.iter().copied().max().map(|c| c + 1).unwrap_or(0);
if forbidden.len() < max_needed {
forbidden.resize(max_needed, false);
}
for &c in &neighbor_colors {
forbidden[c] = true;
}
let chosen = (0..).find(|&c| c >= forbidden.len() || !forbidden[c]).unwrap_or(0);
color[row] = chosen;
for &c in &neighbor_colors {
forbidden[c] = false;
}
}
let num_colors = color.iter().copied().filter(|&c| c != usize::MAX).max().map(|c| c + 1).unwrap_or(0);
let mut color_sets: Vec<Vec<usize>> = vec![Vec::new(); num_colors];
for (row, &c) in color.iter().enumerate() {
if c < num_colors {
color_sets[c].push(row);
}
}
Ok(Self {
color,
color_sets,
num_colors,
})
}
pub fn sweep(
&self,
a: &CsrMatrix<f64>,
b: &[f64],
x: &mut Vec<f64>,
omega: f64,
) -> SparseResult<()> {
let (n, nc) = a.shape();
if n != nc {
return Err(SparseError::ValueError(
"Matrix must be square for Gauss-Seidel".to_string(),
));
}
if b.len() != n || x.len() != n {
return Err(SparseError::DimensionMismatch {
expected: n,
found: b.len().min(x.len()),
});
}
for color_set in &self.color_sets {
let updates: Vec<(usize, f64)> = parallel_map(color_set, |&row| {
let mut sigma = b[row];
let mut a_ii = 0.0f64;
for j in a.indptr[row]..a.indptr[row + 1] {
let col = a.indices[j];
let val = a.data[j];
if col == row {
a_ii = val;
} else {
sigma -= val * x[col];
}
}
let x_new = if a_ii.abs() > 1e-300 {
sigma / a_ii
} else {
x[row] };
(row, x[row] + omega * (x_new - x[row]))
});
for (row, val) in updates {
x[row] = val;
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
fn small_csr() -> CsrMatrix<f64> {
let rows = vec![0, 0, 1, 1, 1, 2, 2, 2, 3, 3];
let cols = vec![0, 1, 0, 1, 2, 1, 2, 3, 2, 3];
let vals = vec![2.0, -1.0, -1.0, 3.0, -1.0, -1.0, 3.0, -1.0, -1.0, 2.0];
CsrMatrix::new(vals, rows, cols, (4, 4)).expect("build small_csr")
}
#[test]
fn test_row_partitioner() {
let a = small_csr();
let p = RowPartitioner::new(&a.indptr, 4, 2).expect("partition");
assert_eq!(p.num_partitions, 2);
let r0 = p.range(0);
let r1 = p.range(1);
assert!(r0.start < r0.end);
assert_eq!(r1.end, 4);
assert_eq!(r0.end, r1.start);
}
#[test]
fn test_parallel_spmv() {
let a = small_csr();
let x = vec![1.0, 2.0, 3.0, 4.0];
let y = parallel_spmv(&a, &x).expect("spmv");
assert_relative_eq!(y[0], 0.0, epsilon = 1e-12);
assert_relative_eq!(y[1], 2.0, epsilon = 1e-12);
assert_relative_eq!(y[2], 3.0, epsilon = 1e-12);
assert_relative_eq!(y[3], 5.0, epsilon = 1e-12);
}
#[test]
fn test_parallel_csr_add() {
let a = small_csr();
let b = small_csr();
let c = parallel_csr_add(&a, &b).expect("add");
let x = vec![1.0, 2.0, 3.0, 4.0];
let ya = parallel_spmv(&a, &x).expect("ya");
let yc = parallel_spmv(&c, &x).expect("yc");
for i in 0..4 {
assert_relative_eq!(yc[i], 2.0 * ya[i], epsilon = 1e-12);
}
}
#[test]
fn test_parallel_spmm() {
let a = small_csr();
let c = parallel_spmm(&a, &a).expect("spmm");
let x = vec![1.0, 2.0, 3.0, 4.0];
let ax = parallel_spmv(&a, &x).expect("ax");
let aax_ref = parallel_spmv(&a, &ax).expect("aax_ref");
let yc = parallel_spmv(&c, &x).expect("yc");
for i in 0..4 {
assert_relative_eq!(yc[i], aax_ref[i], epsilon = 1e-10);
}
}
#[test]
fn test_parallel_ilu_factor() {
let rows = vec![0, 0, 1, 1, 2, 2, 3, 3];
let cols = vec![0, 1, 0, 1, 1, 2, 2, 3];
let vals = vec![4.0, -1.0, -1.0, 4.0, -1.0, 4.0, -1.0, 4.0];
let a = CsrMatrix::new(vals, rows, cols, (4, 4)).expect("build ilu test matrix");
let ilu = parallel_ilu_factor(&a).expect("ilu factor");
assert!(ilu.level_sets.len() > 0);
let b = vec![1.0; 4];
let ly = ilu.forward_solve(&b).expect("forward solve");
let x = ilu.backward_solve(&ly).expect("backward solve");
let ax = parallel_spmv(&a, &x).expect("verify");
for i in 0..4 {
assert_relative_eq!(ax[i], b[i], epsilon = 1e-10);
}
}
}