scirs2_sparse/gpu_preconditioner.rs
1//! Mixed CPU/GPU preconditioning for sparse linear systems.
2//!
3//! ILU(0) factorization runs on CPU; SpMV applies the preconditioner.
4//! The mixed strategy exploits CPU's suitability for sequential triangular solves
5//! and GPU's throughput for parallel sparse matrix-vector products.
6//!
7//! # References
8//!
9//! - Saad, Y. (2003). *Iterative Methods for Sparse Linear Systems*, 2nd ed. SIAM. §10.3.
10//! - Benzi, M. (2002). Preconditioning techniques for large linear systems. J. Comput. Phys.
11
12use crate::error::{SparseError, SparseResult};
13use scirs2_core::ndarray::Array1;
14
15// ---------------------------------------------------------------------------
16// Lightweight f64 CSR used internally by this module.
17// We deliberately keep this separate from the generic `csr::CsrMatrix<T>` to
18// avoid the heavyweight trait bounds and stay self-contained.
19// ---------------------------------------------------------------------------
20
21/// Sparse matrix in CSR format (row-major compressed), f64 values only.
22///
23/// This is a lightweight internal type used by the GPU preconditioner.
24/// For the general sparse matrix API use [`crate::csr::CsrMatrix`].
25#[derive(Debug, Clone)]
26pub struct GpuPrecondCsr {
27 /// Number of rows.
28 pub nrows: usize,
29 /// Number of columns.
30 pub ncols: usize,
31 /// Row pointers: `row_ptr[i]..row_ptr[i+1]` indexes row `i`.
32 pub row_ptr: Vec<usize>,
33 /// Column indices of non-zeros.
34 pub col_idx: Vec<usize>,
35 /// Non-zero values.
36 pub values: Vec<f64>,
37}
38
39impl GpuPrecondCsr {
40 /// Create from raw CSR data.
41 pub fn new(
42 nrows: usize,
43 ncols: usize,
44 row_ptr: Vec<usize>,
45 col_idx: Vec<usize>,
46 values: Vec<f64>,
47 ) -> Self {
48 Self {
49 nrows,
50 ncols,
51 row_ptr,
52 col_idx,
53 values,
54 }
55 }
56
57 /// Create from a dense `ndarray::Array2<f64>` (useful for testing).
58 pub fn from_dense(dense: &scirs2_core::ndarray::Array2<f64>) -> Self {
59 let (m, n) = (dense.nrows(), dense.ncols());
60 let mut row_ptr = vec![0usize];
61 let mut col_idx = Vec::new();
62 let mut values = Vec::new();
63 for i in 0..m {
64 for j in 0..n {
65 let v = dense[[i, j]];
66 if v.abs() > 1e-14 {
67 col_idx.push(j);
68 values.push(v);
69 }
70 }
71 row_ptr.push(col_idx.len());
72 }
73 Self {
74 nrows: m,
75 ncols: n,
76 row_ptr,
77 col_idx,
78 values,
79 }
80 }
81
82 /// Sparse matrix-vector product `y = A * x`.
83 pub fn spmv(&self, x: &Array1<f64>) -> Array1<f64> {
84 let mut y = Array1::zeros(self.nrows);
85 for i in 0..self.nrows {
86 let mut sum = 0.0_f64;
87 for k in self.row_ptr[i]..self.row_ptr[i + 1] {
88 sum += self.values[k] * x[self.col_idx[k]];
89 }
90 y[i] = sum;
91 }
92 y
93 }
94
95 /// Number of non-zeros.
96 pub fn nnz(&self) -> usize {
97 self.values.len()
98 }
99}
100
101// ---------------------------------------------------------------------------
102// ILU(0) factorization
103// ---------------------------------------------------------------------------
104
105/// ILU(0) preconditioner — zero fill-in incomplete LU factorization.
106///
107/// Computes factors L and U such that LU ≈ A on the same sparsity pattern
108/// as A. The factorization is performed row-by-row (Crout variant).
109pub struct Ilu0Preconditioner {
110 n: usize,
111 /// L factor values stored in the combined `row_ptr`/`col_idx` layout.
112 /// Entries with `col_idx[k] < i` are L; `col_idx[k] == i` is L diagonal
113 /// (always 1.0); entries with `col_idx[k] > i` are 0.0 here.
114 l_values: Vec<f64>,
115 /// U factor values stored in the same layout.
116 /// Entries with `col_idx[k] >= i` are U; `col_idx[k] < i` is 0.0 here.
117 u_values: Vec<f64>,
118 row_ptr: Vec<usize>,
119 col_idx: Vec<usize>,
120}
121
122impl Ilu0Preconditioner {
123 /// Compute ILU(0) factorization of `a` (must be square with non-zero diagonal).
124 ///
125 /// # Errors
126 ///
127 /// Returns [`SparseError::ValueError`] if the matrix is non-square or if a
128 /// zero pivot is encountered during factorization.
129 pub fn compute(a: &GpuPrecondCsr) -> SparseResult<Self> {
130 if a.nrows != a.ncols {
131 return Err(SparseError::ValueError(
132 "ILU(0) requires a square matrix".to_string(),
133 ));
134 }
135 let n = a.nrows;
136 if n == 0 {
137 return Ok(Self {
138 n,
139 l_values: Vec::new(),
140 u_values: Vec::new(),
141 row_ptr: vec![0],
142 col_idx: Vec::new(),
143 });
144 }
145
146 // Working copy of values; we overwrite in-place during elimination.
147 let mut values = a.values.clone();
148 let row_ptr = a.row_ptr.clone();
149 let col_idx = a.col_idx.clone();
150
151 // Locate diagonal entries for each row (needed for pivot access).
152 let mut diag_idx = vec![usize::MAX; n];
153 for i in 0..n {
154 for k in row_ptr[i]..row_ptr[i + 1] {
155 if col_idx[k] == i {
156 diag_idx[i] = k;
157 break;
158 }
159 }
160 if diag_idx[i] == usize::MAX {
161 return Err(SparseError::ValueError(format!(
162 "ILU(0): missing diagonal entry in row {i}"
163 )));
164 }
165 }
166
167 // Row-by-row ILU(0) Gaussian elimination (keeps original sparsity).
168 for i in 1..n {
169 for k in row_ptr[i]..row_ptr[i + 1] {
170 let j = col_idx[k];
171 if j >= i {
172 break; // columns are sorted; strictly lower part is done
173 }
174 let u_jj = values[diag_idx[j]];
175 if u_jj.abs() < 1e-300 {
176 return Err(SparseError::ValueError(format!(
177 "ILU(0): zero pivot at diagonal ({j},{j})"
178 )));
179 }
180 // L[i,j] = A[i,j] / U[j,j]
181 values[k] /= u_jj;
182 let l_ij = values[k];
183
184 // Eliminate: A[i, col] -= L[i,j] * U[j, col] for col > j
185 // walking row i and row j in lock-step (both sorted by column).
186 let mut ki = k + 1; // position in row i, col > j
187 let mut kj = diag_idx[j] + 1; // position in row j, col > j
188 while ki < row_ptr[i + 1] && kj < row_ptr[j + 1] {
189 let ci = col_idx[ki];
190 let cj = col_idx[kj];
191 match ci.cmp(&cj) {
192 std::cmp::Ordering::Equal => {
193 // Pattern match: both rows have column ci=cj; subtract.
194 values[ki] -= l_ij * values[kj];
195 ki += 1;
196 kj += 1;
197 }
198 std::cmp::Ordering::Less => ki += 1,
199 std::cmp::Ordering::Greater => kj += 1,
200 }
201 }
202 }
203 }
204
205 // Split into L (lower, unit diagonal) and U (upper + diagonal).
206 let nnz = a.nnz();
207 let mut l_values = vec![0.0_f64; nnz];
208 let mut u_values = vec![0.0_f64; nnz];
209
210 for i in 0..n {
211 for k in row_ptr[i]..row_ptr[i + 1] {
212 let j = col_idx[k];
213 if j < i {
214 l_values[k] = values[k]; // L factor
215 } else if j == i {
216 l_values[k] = 1.0; // unit diagonal for L
217 u_values[k] = values[k]; // diagonal of U
218 } else {
219 u_values[k] = values[k]; // strictly upper part of U
220 }
221 }
222 }
223
224 Ok(Self {
225 n,
226 l_values,
227 u_values,
228 row_ptr,
229 col_idx,
230 })
231 }
232
233 /// Apply preconditioner: solve `(LU) z = r`.
234 ///
235 /// Performs forward solve `Ly = r` then backward solve `Uz = y`.
236 pub fn apply(&self, r: &Array1<f64>) -> Array1<f64> {
237 // Forward solve: L y = r (unit lower triangular)
238 let mut y = r.clone();
239 for i in 0..self.n {
240 for k in self.row_ptr[i]..self.row_ptr[i + 1] {
241 let j = self.col_idx[k];
242 if j < i {
243 let ly = self.l_values[k] * y[j];
244 y[i] -= ly;
245 }
246 }
247 }
248
249 // Backward solve: U z = y (upper triangular with explicit diagonal)
250 let mut z = y;
251 for ii in 0..self.n {
252 let i = self.n - 1 - ii;
253 let mut diag_val = 1.0_f64;
254 for k in self.row_ptr[i]..self.row_ptr[i + 1] {
255 let j = self.col_idx[k];
256 if j > i {
257 let uz = self.u_values[k] * z[j];
258 z[i] -= uz;
259 } else if j == i {
260 diag_val = self.u_values[k];
261 }
262 }
263 if diag_val.abs() > 1e-300 {
264 z[i] /= diag_val;
265 }
266 }
267 z
268 }
269}
270
271// ---------------------------------------------------------------------------
272// Mixed preconditioned CG
273// ---------------------------------------------------------------------------
274
275/// Mixed CPU/GPU preconditioned conjugate gradient solver.
276///
277/// The preconditioner (ILU(0)) runs entirely on CPU; SpMV is the hot path
278/// that would execute on GPU in a real deployment. In this pure-Rust
279/// implementation SpMV uses [`GpuPrecondCsr::spmv`], which simulates the GPU
280/// compute path without an actual GPU dependency.
281pub struct MixedPreconditionedCg {
282 preconditioner: Ilu0Preconditioner,
283 max_iter: usize,
284 tol: f64,
285}
286
287impl MixedPreconditionedCg {
288 /// Create a new solver, pre-computing ILU(0) from matrix `a`.
289 ///
290 /// # Errors
291 ///
292 /// Propagates any error from [`Ilu0Preconditioner::compute`].
293 pub fn new(a: &GpuPrecondCsr, max_iter: usize, tol: f64) -> SparseResult<Self> {
294 let preconditioner = Ilu0Preconditioner::compute(a)?;
295 Ok(Self {
296 preconditioner,
297 max_iter,
298 tol,
299 })
300 }
301
302 /// Solve `Ax = b` using preconditioned CG (Fletcher-Reeves with ILU(0)).
303 ///
304 /// Returns `(x, iterations_used)`.
305 ///
306 /// # Errors
307 ///
308 /// Returns [`SparseError::ValueError`] if `b.len() != a.nrows`.
309 pub fn solve(&self, a: &GpuPrecondCsr, b: &Array1<f64>) -> SparseResult<(Array1<f64>, usize)> {
310 let n = b.len();
311 if n != a.nrows {
312 return Err(SparseError::ValueError(format!(
313 "solve: b length {n} does not match matrix nrows {}",
314 a.nrows
315 )));
316 }
317
318 // Initial residual r = b - A*x (start from zero)
319 let mut x = Array1::zeros(n);
320 let ax0 = a.spmv(&x);
321 let mut r: Array1<f64> = b - &ax0;
322 let mut z = self.preconditioner.apply(&r);
323 let mut p = z.clone();
324 let mut rz = r.dot(&z);
325
326 for iter in 0..self.max_iter {
327 let ap = a.spmv(&p);
328 let pap = p.dot(&ap);
329 if pap.abs() < 1e-300 {
330 break;
331 }
332 let alpha = rz / pap;
333 x = &x + &(&p * alpha);
334 r = &r - &(&ap * alpha);
335
336 let res_norm = r.dot(&r).sqrt();
337 if res_norm < self.tol {
338 return Ok((x, iter + 1));
339 }
340
341 z = self.preconditioner.apply(&r);
342 let rz_new = r.dot(&z);
343 if rz.abs() < 1e-300 {
344 break;
345 }
346 let beta = rz_new / rz;
347 p = &z + &(&p * beta);
348 rz = rz_new;
349 }
350
351 Ok((x, self.max_iter))
352 }
353}
354
355// ---------------------------------------------------------------------------
356// Tests
357// ---------------------------------------------------------------------------
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362 use scirs2_core::ndarray::Array2;
363
364 /// Build a tridiagonal matrix [-1, 2, -1] of size n×n.
365 fn tridiag(n: usize) -> GpuPrecondCsr {
366 let mut row_ptr = vec![0usize];
367 let mut col_idx = Vec::new();
368 let mut values = Vec::new();
369 for i in 0..n {
370 if i > 0 {
371 col_idx.push(i - 1);
372 values.push(-1.0_f64);
373 }
374 col_idx.push(i);
375 values.push(2.0_f64);
376 if i + 1 < n {
377 col_idx.push(i + 1);
378 values.push(-1.0_f64);
379 }
380 row_ptr.push(col_idx.len());
381 }
382 GpuPrecondCsr::new(n, n, row_ptr, col_idx, values)
383 }
384
385 /// Build a diagonally dominant SPD matrix of size n×n for CG testing.
386 fn spd_matrix(n: usize) -> GpuPrecondCsr {
387 let mut row_ptr = vec![0usize];
388 let mut col_idx = Vec::new();
389 let mut values = Vec::new();
390 for i in 0..n {
391 if i > 0 {
392 col_idx.push(i - 1);
393 values.push(-1.0_f64);
394 }
395 col_idx.push(i);
396 // Diagonal is (n + 2.0) to ensure strict diagonal dominance.
397 values.push((n as f64) + 2.0);
398 if i + 1 < n {
399 col_idx.push(i + 1);
400 values.push(-1.0_f64);
401 }
402 row_ptr.push(col_idx.len());
403 }
404 GpuPrecondCsr::new(n, n, row_ptr, col_idx, values)
405 }
406
407 #[test]
408 fn test_spmv() {
409 let n = 4;
410 let a = tridiag(n);
411 // x = [1, 1, 1, 1] => A*x = [1, 0, 0, 1] for tridiag(2,-1)
412 let x = Array1::ones(n);
413 let y = a.spmv(&x);
414 // row 0: 2*1 + (-1)*1 = 1
415 // row 1: (-1)*1 + 2*1 + (-1)*1 = 0
416 // row 2: (-1)*1 + 2*1 + (-1)*1 = 0
417 // row 3: (-1)*1 + 2*1 = 1
418 assert!((y[0] - 1.0).abs() < 1e-12, "y[0]={}", y[0]);
419 assert!((y[1]).abs() < 1e-12, "y[1]={}", y[1]);
420 assert!((y[2]).abs() < 1e-12, "y[2]={}", y[2]);
421 assert!((y[3] - 1.0).abs() < 1e-12, "y[3]={}", y[3]);
422 }
423
424 #[test]
425 fn test_ilu0_compute() {
426 let a = tridiag(4);
427 let ilu = Ilu0Preconditioner::compute(&a).expect("ILU(0) on tridiagonal must succeed");
428 // Verify we got a proper object (n = 4).
429 assert_eq!(ilu.n, 4);
430 assert_eq!(ilu.row_ptr.len(), 5);
431 }
432
433 #[test]
434 fn test_ilu0_apply() {
435 let a = tridiag(6);
436 let ilu = Ilu0Preconditioner::compute(&a).expect("ILU(0) should succeed");
437 let r = Array1::from(vec![1.0_f64; 6]);
438 let z = ilu.apply(&r);
439 // Shape check
440 assert_eq!(z.len(), 6);
441 // All-ones r should produce a finite, non-NaN result.
442 for (i, &v) in z.iter().enumerate() {
443 assert!(v.is_finite(), "z[{i}] is not finite: {v}");
444 }
445 }
446
447 #[test]
448 fn test_from_dense_roundtrip() {
449 let mut dense = Array2::zeros((3, 3));
450 dense[[0, 0]] = 4.0;
451 dense[[0, 1]] = -1.0;
452 dense[[1, 0]] = -1.0;
453 dense[[1, 1]] = 4.0;
454 dense[[1, 2]] = -1.0;
455 dense[[2, 1]] = -1.0;
456 dense[[2, 2]] = 4.0;
457
458 let a = GpuPrecondCsr::from_dense(&dense);
459 assert_eq!(a.nrows, 3);
460 assert_eq!(a.ncols, 3);
461 assert_eq!(a.nnz(), 7);
462
463 let x = Array1::from(vec![1.0, 1.0, 1.0]);
464 let y = a.spmv(&x);
465 // Row 0: 4 - 1 = 3
466 assert!((y[0] - 3.0).abs() < 1e-12);
467 // Row 1: -1 + 4 - 1 = 2
468 assert!((y[1] - 2.0).abs() < 1e-12);
469 // Row 2: -1 + 4 = 3
470 assert!((y[2] - 3.0).abs() < 1e-12);
471 }
472
473 #[test]
474 fn test_preconditioned_cg_solve() {
475 let n = 10;
476 let a = spd_matrix(n);
477 // b = A * x_exact, x_exact = [1, 2, ..., n]
478 let x_exact = Array1::from_iter((1..=n).map(|i| i as f64));
479 let b = a.spmv(&x_exact);
480
481 let solver = MixedPreconditionedCg::new(&a, 200, 1e-10)
482 .expect("MixedPreconditionedCg construction must succeed");
483 let (x_sol, iters) = solver.solve(&a, &b).expect("PCG solve must succeed");
484
485 assert!(iters <= 200, "solver used more iterations than max");
486 // Verify ||x_sol - x_exact||_inf < 1e-6
487 for i in 0..n {
488 assert!(
489 (x_sol[i] - x_exact[i]).abs() < 1e-6,
490 "x_sol[{i}]={} vs x_exact[{i}]={}",
491 x_sol[i],
492 x_exact[i]
493 );
494 }
495 }
496}