Skip to main content

oxiphysics_gpu/
gpu_sparse_solver.rs

1#![allow(clippy::needless_range_loop, clippy::ptr_arg)]
2// Copyright 2026 COOLJAPAN OU (Team KitaSan)
3// SPDX-License-Identifier: Apache-2.0
4
5//! GPU sparse linear system solver (CPU mock implementation).
6//!
7//! Provides CSR sparse matrix storage, SpMV, dot product, AXPY, conjugate
8//! gradient (CG), and preconditioned CG solvers that mirror GPU dispatch
9//! semantics while executing on the CPU.
10
11#![allow(dead_code)]
12#![allow(clippy::too_many_arguments)]
13
14// ── CSR Sparse Matrix ────────────────────────────────────────────────────────
15
16/// Compressed Sparse Row (CSR) matrix stored in GPU-friendly layout.
17///
18/// Rows have entries in `col_idx[row_ptr[i]..row_ptr[i+1]]` with
19/// corresponding values in `values[row_ptr[i]..row_ptr[i+1]]`.
20#[derive(Debug, Clone)]
21pub struct SparseMatrixGpu {
22    /// Number of rows (and columns — assumed square).
23    pub n: usize,
24    /// Number of non-zero entries.
25    pub nnz: usize,
26    /// Row pointer array (length n+1).
27    pub row_ptr: Vec<usize>,
28    /// Column index array (length nnz).
29    pub col_idx: Vec<usize>,
30    /// Value array (length nnz).
31    pub values: Vec<f64>,
32}
33
34impl SparseMatrixGpu {
35    /// Create a new empty n×n sparse matrix.
36    pub fn new(n: usize) -> Self {
37        Self {
38            n,
39            nnz: 0,
40            row_ptr: vec![0usize; n + 1],
41            col_idx: Vec::new(),
42            values: Vec::new(),
43        }
44    }
45
46    /// Construct from pre-built CSR arrays.
47    ///
48    /// # Panics
49    /// Panics in debug mode if `row_ptr.len() != n + 1`.
50    pub fn from_csr(n: usize, row_ptr: Vec<usize>, col_idx: Vec<usize>, values: Vec<f64>) -> Self {
51        debug_assert_eq!(row_ptr.len(), n + 1);
52        let nnz = values.len();
53        Self {
54            n,
55            nnz,
56            row_ptr,
57            col_idx,
58            values,
59        }
60    }
61
62    /// Append a single entry `(row, col, val)` in COO style.
63    ///
64    /// This invalidates the CSR structure until [`finalize`](Self::finalize) is called.
65    pub fn add_entry(&mut self, row: usize, col: usize, val: f64) {
66        // Store as a pending triplet encoded in the arrays.
67        self.col_idx.push(col);
68        self.values.push(val);
69        // Use row_ptr[row+1] as a count (finalize will prefix-sum them).
70        if row + 1 < self.row_ptr.len() {
71            self.row_ptr[row + 1] += 1;
72        }
73        self.nnz += 1;
74    }
75
76    /// Convert count-per-row in `row_ptr` to proper CSR offsets.
77    ///
78    /// Must be called after a series of [`add_entry`](Self::add_entry) calls
79    /// and before any SpMV or solver operations.
80    pub fn finalize(&mut self) {
81        // Prefix-sum row_ptr in-place.
82        for i in 1..=self.n {
83            self.row_ptr[i] += self.row_ptr[i - 1];
84        }
85        self.nnz = *self.row_ptr.last().unwrap_or(&0);
86    }
87
88    /// Return the number of non-zero entries.
89    pub fn nnz(&self) -> usize {
90        self.nnz
91    }
92
93    /// Return the number of rows.
94    pub fn rows(&self) -> usize {
95        self.n
96    }
97
98    /// Extract the main diagonal as a dense vector.
99    ///
100    /// Missing diagonal entries are filled with zero.
101    pub fn diagonal(&self) -> Vec<f64> {
102        let mut diag = vec![0.0f64; self.n];
103        for row in 0..self.n {
104            let start = self.row_ptr[row];
105            let end = self.row_ptr[row + 1];
106            for k in start..end {
107                if self.col_idx[k] == row {
108                    diag[row] = self.values[k];
109                }
110            }
111        }
112        diag
113    }
114
115    /// Check whether the matrix is structurally symmetric (A\[i,j\] ↔ A\[j,i\]).
116    pub fn is_symmetric(&self) -> bool {
117        for row in 0..self.n {
118            let start = self.row_ptr[row];
119            let end = self.row_ptr[row + 1];
120            for k in start..end {
121                let col = self.col_idx[k];
122                let val = self.values[k];
123                // Look for the transpose entry.
124                let mut found = false;
125                let cs = self.row_ptr[col];
126                let ce = self.row_ptr[col + 1];
127                for m in cs..ce {
128                    if self.col_idx[m] == row {
129                        if (self.values[m] - val).abs() > 1e-12 {
130                            return false;
131                        }
132                        found = true;
133                        break;
134                    }
135                }
136                if !found {
137                    return false;
138                }
139            }
140        }
141        true
142    }
143
144    /// Frobenius norm: √(∑ aᵢⱼ²).
145    pub fn frobenius_norm(&self) -> f64 {
146        self.values.iter().map(|v| v * v).sum::<f64>().sqrt()
147    }
148}
149
150// ── Utility constructors ─────────────────────────────────────────────────────
151
152/// Build an n×n identity matrix in CSR format.
153pub fn sparse_identity(n: usize) -> SparseMatrixGpu {
154    let row_ptr: Vec<usize> = (0..=n).collect();
155    let col_idx: Vec<usize> = (0..n).collect();
156    let values = vec![1.0f64; n];
157    SparseMatrixGpu::from_csr(n, row_ptr, col_idx, values)
158}
159
160/// Build a diagonal matrix from a dense vector.
161pub fn sparse_diagonal_matrix(diag: &[f64]) -> SparseMatrixGpu {
162    let n = diag.len();
163    let row_ptr: Vec<usize> = (0..=n).collect();
164    let col_idx: Vec<usize> = (0..n).collect();
165    let values = diag.to_vec();
166    SparseMatrixGpu::from_csr(n, row_ptr, col_idx, values)
167}
168
169// ── BLAS-like primitives ─────────────────────────────────────────────────────
170
171/// Sparse matrix–vector product: y = A · x.
172///
173/// # Panics
174/// Panics if `x.len() != mat.n`.
175pub fn gpu_spmv(mat: &SparseMatrixGpu, x: &[f64]) -> Vec<f64> {
176    assert_eq!(x.len(), mat.n, "gpu_spmv: x length mismatch");
177    let mut y = vec![0.0f64; mat.n];
178    for row in 0..mat.n {
179        let start = mat.row_ptr[row];
180        let end = mat.row_ptr[row + 1];
181        let mut sum = 0.0f64;
182        for k in start..end {
183            sum += mat.values[k] * x[mat.col_idx[k]];
184        }
185        y[row] = sum;
186    }
187    y
188}
189
190/// Dense dot product: ∑ aᵢ bᵢ.
191///
192/// Returns 0 for empty slices.
193pub fn gpu_dot(a: &[f64], b: &[f64]) -> f64 {
194    a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
195}
196
197/// In-place AXPY: y ← a·x + y.
198///
199/// # Panics
200/// Panics if `x.len() != y.len()`.
201pub fn gpu_axpy(a: f64, x: &[f64], y: &mut Vec<f64>) {
202    assert_eq!(x.len(), y.len(), "gpu_axpy: length mismatch");
203    for (yi, &xi) in y.iter_mut().zip(x.iter()) {
204        *yi += a * xi;
205    }
206}
207
208// ── Solvers ──────────────────────────────────────────────────────────────────
209
210/// Conjugate gradient solver for symmetric positive-definite systems A·x = b.
211///
212/// Returns `(x, iterations, final_residual_norm)`.
213/// Terminates when the relative residual ‖r‖/‖b‖ < `tol` or after `max_iter`
214/// steps.
215pub fn gpu_cg_solver(
216    mat: &SparseMatrixGpu,
217    b: &[f64],
218    max_iter: usize,
219    tol: f64,
220) -> (Vec<f64>, usize, f64) {
221    let n = mat.n;
222    let mut x = vec![0.0f64; n];
223    let mut r = b.to_vec(); // r = b - A*x; x=0 so r=b
224    let mut p = r.clone();
225    let mut rr = gpu_dot(&r, &r);
226    let b_norm = rr.sqrt().max(1e-100);
227
228    for iter in 0..max_iter {
229        if rr.sqrt() / b_norm < tol {
230            return (x, iter, rr.sqrt());
231        }
232        let ap = gpu_spmv(mat, &p);
233        let pap = gpu_dot(&p, &ap);
234        if pap.abs() < 1e-300 {
235            break;
236        }
237        let alpha = rr / pap;
238        gpu_axpy(alpha, &p, &mut x);
239        gpu_axpy(-alpha, &ap, &mut r);
240        let rr_new = gpu_dot(&r, &r);
241        let beta = rr_new / rr.max(1e-300);
242        // p = r + beta*p
243        for i in 0..n {
244            p[i] = r[i] + beta * p[i];
245        }
246        rr = rr_new;
247    }
248    (x, max_iter, rr.sqrt())
249}
250
251/// Build the Jacobi (diagonal) preconditioner for matrix `mat`.
252///
253/// Returns a vector `M⁻¹` where `M⁻¹[i] = 1/A[i,i]` (or 1 if the diagonal
254/// entry is zero).
255pub fn gpu_jacobi_preconditioner(mat: &SparseMatrixGpu) -> Vec<f64> {
256    mat.diagonal()
257        .iter()
258        .map(|&d| if d.abs() > 1e-15 { 1.0 / d } else { 1.0 })
259        .collect()
260}
261
262/// Preconditioned conjugate gradient solver.
263///
264/// `precond` should be a vector of reciprocal diagonal entries (from
265/// [`gpu_jacobi_preconditioner`]).  Returns `(x, iterations, residual_norm)`.
266pub fn gpu_pcg_solver(
267    mat: &SparseMatrixGpu,
268    b: &[f64],
269    precond: &[f64],
270    max_iter: usize,
271    tol: f64,
272) -> (Vec<f64>, usize, f64) {
273    let n = mat.n;
274    let mut x = vec![0.0f64; n];
275    let mut r = b.to_vec();
276    // z = M⁻¹ r
277    let mut z: Vec<f64> = r
278        .iter()
279        .zip(precond.iter())
280        .map(|(ri, mi)| ri * mi)
281        .collect();
282    let mut p = z.clone();
283    let mut rz = gpu_dot(&r, &z);
284    let b_norm = gpu_dot(b, b).sqrt().max(1e-100);
285
286    for iter in 0..max_iter {
287        if gpu_dot(&r, &r).sqrt() / b_norm < tol {
288            return (x, iter, gpu_dot(&r, &r).sqrt());
289        }
290        let ap = gpu_spmv(mat, &p);
291        let pap = gpu_dot(&p, &ap);
292        if pap.abs() < 1e-300 {
293            break;
294        }
295        let alpha = rz / pap;
296        gpu_axpy(alpha, &p, &mut x);
297        gpu_axpy(-alpha, &ap, &mut r);
298        z = r
299            .iter()
300            .zip(precond.iter())
301            .map(|(ri, mi)| ri * mi)
302            .collect();
303        let rz_new = gpu_dot(&r, &z);
304        let beta = rz_new / rz.max(1e-300);
305        for i in 0..n {
306            p[i] = z[i] + beta * p[i];
307        }
308        rz = rz_new;
309    }
310    (x, max_iter, gpu_dot(&r, &r).sqrt())
311}
312
313// ── Stats ────────────────────────────────────────────────────────────────────
314
315/// Performance statistics for a GPU sparse solve.
316#[derive(Debug, Clone)]
317pub struct GpuSparseSolverStats {
318    /// Number of solver iterations performed.
319    pub iterations: usize,
320    /// ‖r‖ after the final iteration.
321    pub final_residual: f64,
322    /// Whether the solver converged within the requested tolerance.
323    pub converged: bool,
324    /// Wall-clock time in milliseconds (mock — always 0.0 in CPU simulation).
325    pub time_ms: f64,
326}
327
328impl GpuSparseSolverStats {
329    /// Create a new stats record.
330    pub fn new(iterations: usize, final_residual: f64, converged: bool, time_ms: f64) -> Self {
331        Self {
332            iterations,
333            final_residual,
334            converged,
335            time_ms,
336        }
337    }
338}
339
340// ============================================================
341// Tests
342// ============================================================
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    // ── helpers ─────────────────────────────────────────────────────────────
348
349    /// Build a simple 3×3 SPD matrix:
350    /// \[ 4 -1  0 \]
351    /// \[-1  4 -1 \]
352    /// \[ 0 -1  4 \]
353    fn build_3x3_spd() -> SparseMatrixGpu {
354        let row_ptr = vec![0, 2, 5, 7];
355        let col_idx = vec![0, 1, 0, 1, 2, 1, 2];
356        let values = vec![4.0, -1.0, -1.0, 4.0, -1.0, -1.0, 4.0];
357        SparseMatrixGpu::from_csr(3, row_ptr, col_idx, values)
358    }
359
360    // ── identity ─────────────────────────────────────────────────────────────
361
362    #[test]
363    fn test_identity_spmv_returns_input() {
364        let id = sparse_identity(4);
365        let x = vec![1.0, 2.0, 3.0, 4.0];
366        let y = gpu_spmv(&id, &x);
367        for (yi, xi) in y.iter().zip(x.iter()) {
368            assert!((yi - xi).abs() < 1e-12);
369        }
370    }
371
372    #[test]
373    fn test_identity_nnz() {
374        let id = sparse_identity(5);
375        assert_eq!(id.nnz(), 5);
376    }
377
378    #[test]
379    fn test_identity_rows() {
380        let id = sparse_identity(7);
381        assert_eq!(id.rows(), 7);
382    }
383
384    #[test]
385    fn test_identity_diagonal() {
386        let id = sparse_identity(4);
387        let diag = id.diagonal();
388        assert_eq!(diag, vec![1.0; 4]);
389    }
390
391    #[test]
392    fn test_identity_frobenius() {
393        let n = 9usize;
394        let id = sparse_identity(n);
395        let expected = (n as f64).sqrt();
396        assert!((id.frobenius_norm() - expected).abs() < 1e-10);
397    }
398
399    #[test]
400    fn test_identity_is_symmetric() {
401        assert!(sparse_identity(4).is_symmetric());
402    }
403
404    // ── diagonal matrix ──────────────────────────────────────────────────────
405
406    #[test]
407    fn test_diagonal_matrix_spmv() {
408        let d = vec![2.0, 3.0, 5.0];
409        let mat = sparse_diagonal_matrix(&d);
410        let x = vec![1.0, 1.0, 1.0];
411        let y = gpu_spmv(&mat, &x);
412        assert!((y[0] - 2.0).abs() < 1e-12);
413        assert!((y[1] - 3.0).abs() < 1e-12);
414        assert!((y[2] - 5.0).abs() < 1e-12);
415    }
416
417    #[test]
418    fn test_diagonal_matrix_frobenius() {
419        let d = vec![3.0, 4.0];
420        let mat = sparse_diagonal_matrix(&d);
421        assert!((mat.frobenius_norm() - 5.0).abs() < 1e-10);
422    }
423
424    #[test]
425    fn test_diagonal_matrix_is_symmetric() {
426        let mat = sparse_diagonal_matrix(&[1.0, 2.0, 3.0]);
427        assert!(mat.is_symmetric());
428    }
429
430    #[test]
431    fn test_sparse_identity_zero_size() {
432        let id = sparse_identity(0);
433        assert_eq!(id.nnz(), 0);
434        assert_eq!(id.rows(), 0);
435    }
436
437    // ── dot product ──────────────────────────────────────────────────────────
438
439    #[test]
440    fn test_gpu_dot_basic() {
441        assert!((gpu_dot(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]) - 32.0).abs() < 1e-12);
442    }
443
444    #[test]
445    fn test_gpu_dot_empty() {
446        assert!((gpu_dot(&[], &[])).abs() < 1e-15);
447    }
448
449    #[test]
450    fn test_gpu_dot_orthogonal() {
451        assert!((gpu_dot(&[1.0, 0.0], &[0.0, 1.0])).abs() < 1e-15);
452    }
453
454    // ── axpy ─────────────────────────────────────────────────────────────────
455
456    #[test]
457    fn test_gpu_axpy_basic() {
458        let x = vec![1.0, 2.0, 3.0];
459        let mut y = vec![4.0, 5.0, 6.0];
460        gpu_axpy(2.0, &x, &mut y);
461        assert!((y[0] - 6.0).abs() < 1e-12);
462        assert!((y[1] - 9.0).abs() < 1e-12);
463        assert!((y[2] - 12.0).abs() < 1e-12);
464    }
465
466    #[test]
467    fn test_gpu_axpy_zero_alpha() {
468        let x = vec![1.0, 2.0];
469        let mut y = vec![3.0, 4.0];
470        gpu_axpy(0.0, &x, &mut y);
471        assert!((y[0] - 3.0).abs() < 1e-12);
472        assert!((y[1] - 4.0).abs() < 1e-12);
473    }
474
475    // ── spmv ─────────────────────────────────────────────────────────────────
476
477    #[test]
478    fn test_spmv_3x3_spd() {
479        let mat = build_3x3_spd();
480        let x = vec![1.0, 0.0, 0.0];
481        let y = gpu_spmv(&mat, &x);
482        assert!((y[0] - 4.0).abs() < 1e-12);
483        assert!((y[1] + 1.0).abs() < 1e-12);
484        assert!((y[2]).abs() < 1e-12);
485    }
486
487    #[test]
488    fn test_spmv_zeros_input() {
489        let mat = build_3x3_spd();
490        let y = gpu_spmv(&mat, &[0.0, 0.0, 0.0]);
491        for yi in y {
492            assert!(yi.abs() < 1e-15);
493        }
494    }
495
496    // ── from_csr / add_entry / finalize ──────────────────────────────────────
497
498    #[test]
499    fn test_from_csr_nnz() {
500        let mat = build_3x3_spd();
501        assert_eq!(mat.nnz(), 7);
502    }
503
504    #[test]
505    fn test_add_entry_finalize() {
506        let mut mat = SparseMatrixGpu::new(2);
507        mat.add_entry(0, 0, 2.0);
508        mat.add_entry(0, 1, -1.0);
509        mat.add_entry(1, 0, -1.0);
510        mat.add_entry(1, 1, 2.0);
511        mat.finalize();
512        assert_eq!(mat.nnz(), 4);
513        let y = gpu_spmv(&mat, &[1.0, 0.0]);
514        assert!((y[0] - 2.0).abs() < 1e-12);
515        assert!((y[1] + 1.0).abs() < 1e-12);
516    }
517
518    // ── CG solver ────────────────────────────────────────────────────────────
519
520    #[test]
521    fn test_cg_converges_3x3() {
522        let mat = build_3x3_spd();
523        let b = vec![1.0, 0.0, 0.0];
524        let (x, iters, res) = gpu_cg_solver(&mat, &b, 100, 1e-10);
525        assert!(iters < 100, "CG did not converge: iters={iters}");
526        // Verify A*x ≈ b
527        let ax = gpu_spmv(&mat, &x);
528        for (ai, bi) in ax.iter().zip(b.iter()) {
529            assert!((ai - bi).abs() < 1e-8, "residual too large");
530        }
531        let _ = res; // residual checked via A*x
532    }
533
534    #[test]
535    fn test_cg_identity_system() {
536        let id = sparse_identity(3);
537        let b = vec![1.0, 2.0, 3.0];
538        let (x, _iters, _res) = gpu_cg_solver(&id, &b, 50, 1e-10);
539        for (xi, bi) in x.iter().zip(b.iter()) {
540            assert!((xi - bi).abs() < 1e-8);
541        }
542    }
543
544    // ── Jacobi preconditioner ────────────────────────────────────────────────
545
546    #[test]
547    fn test_jacobi_preconditioner_diagonal() {
548        let d = vec![2.0, 4.0, 5.0];
549        let mat = sparse_diagonal_matrix(&d);
550        let prec = gpu_jacobi_preconditioner(&mat);
551        assert!((prec[0] - 0.5).abs() < 1e-12);
552        assert!((prec[1] - 0.25).abs() < 1e-12);
553        assert!((prec[2] - 0.2).abs() < 1e-12);
554    }
555
556    #[test]
557    fn test_jacobi_preconditioner_identity() {
558        let id = sparse_identity(3);
559        let prec = gpu_jacobi_preconditioner(&id);
560        for p in prec {
561            assert!((p - 1.0).abs() < 1e-12);
562        }
563    }
564
565    // ── PCG solver ───────────────────────────────────────────────────────────
566
567    #[test]
568    fn test_pcg_converges_3x3() {
569        let mat = build_3x3_spd();
570        let b = vec![1.0, 0.0, 1.0];
571        let prec = gpu_jacobi_preconditioner(&mat);
572        let (x, iters, _res) = gpu_pcg_solver(&mat, &b, &prec, 100, 1e-10);
573        assert!(iters < 100);
574        let ax = gpu_spmv(&mat, &x);
575        for (ai, bi) in ax.iter().zip(b.iter()) {
576            assert!((ai - bi).abs() < 1e-7);
577        }
578    }
579
580    #[test]
581    fn test_pcg_identity_trivial() {
582        let id = sparse_identity(4);
583        let b = vec![1.0, 2.0, 3.0, 4.0];
584        let prec = gpu_jacobi_preconditioner(&id);
585        let (x, _iters, _res) = gpu_pcg_solver(&id, &b, &prec, 10, 1e-12);
586        for (xi, bi) in x.iter().zip(b.iter()) {
587            assert!((xi - bi).abs() < 1e-8);
588        }
589    }
590
591    // ── stats ────────────────────────────────────────────────────────────────
592
593    #[test]
594    fn test_solver_stats_fields() {
595        let s = GpuSparseSolverStats::new(5, 1e-8, true, 0.0);
596        assert_eq!(s.iterations, 5);
597        assert!(s.converged);
598        assert!((s.final_residual - 1e-8).abs() < 1e-20);
599    }
600
601    // ── is_symmetric ────────────────────────────────────────────────────────
602
603    #[test]
604    fn test_asymmetric_matrix() {
605        // Upper triangular only — not symmetric
606        let row_ptr = vec![0, 2, 3, 3];
607        let col_idx = vec![0, 1, 1, 2];
608        let values = vec![1.0, 2.0, 3.0, 4.0];
609        let mat = SparseMatrixGpu::from_csr(3, row_ptr, col_idx, values);
610        assert!(!mat.is_symmetric());
611    }
612
613    #[test]
614    fn test_frobenius_zero_matrix() {
615        let mat = SparseMatrixGpu::new(4);
616        assert!((mat.frobenius_norm()).abs() < 1e-15);
617    }
618
619    // ── new() ────────────────────────────────────────────────────────────────
620
621    #[test]
622    fn test_new_empty_matrix() {
623        let mat = SparseMatrixGpu::new(3);
624        assert_eq!(mat.n, 3);
625        assert_eq!(mat.nnz, 0);
626        assert_eq!(mat.row_ptr, vec![0; 4]);
627    }
628
629    #[test]
630    fn test_diagonal_of_empty_matrix() {
631        let mat = SparseMatrixGpu::new(3);
632        let diag = mat.diagonal();
633        assert_eq!(diag, vec![0.0; 3]);
634    }
635
636    #[test]
637    fn test_cg_zero_rhs() {
638        let id = sparse_identity(3);
639        let b = vec![0.0; 3];
640        let (x, _iters, res) = gpu_cg_solver(&id, &b, 20, 1e-12);
641        for xi in &x {
642            assert!(xi.abs() < 1e-12);
643        }
644        assert!(res < 1e-10);
645    }
646}