1#![allow(clippy::needless_range_loop, clippy::ptr_arg)]
2#![allow(dead_code)]
12#![allow(clippy::too_many_arguments)]
13
14#[derive(Debug, Clone)]
21pub struct SparseMatrixGpu {
22 pub n: usize,
24 pub nnz: usize,
26 pub row_ptr: Vec<usize>,
28 pub col_idx: Vec<usize>,
30 pub values: Vec<f64>,
32}
33
34impl SparseMatrixGpu {
35 pub fn new(n: usize) -> Self {
37 Self {
38 n,
39 nnz: 0,
40 row_ptr: vec![0usize; n + 1],
41 col_idx: Vec::new(),
42 values: Vec::new(),
43 }
44 }
45
46 pub fn from_csr(n: usize, row_ptr: Vec<usize>, col_idx: Vec<usize>, values: Vec<f64>) -> Self {
51 debug_assert_eq!(row_ptr.len(), n + 1);
52 let nnz = values.len();
53 Self {
54 n,
55 nnz,
56 row_ptr,
57 col_idx,
58 values,
59 }
60 }
61
62 pub fn add_entry(&mut self, row: usize, col: usize, val: f64) {
66 self.col_idx.push(col);
68 self.values.push(val);
69 if row + 1 < self.row_ptr.len() {
71 self.row_ptr[row + 1] += 1;
72 }
73 self.nnz += 1;
74 }
75
76 pub fn finalize(&mut self) {
81 for i in 1..=self.n {
83 self.row_ptr[i] += self.row_ptr[i - 1];
84 }
85 self.nnz = *self.row_ptr.last().unwrap_or(&0);
86 }
87
88 pub fn nnz(&self) -> usize {
90 self.nnz
91 }
92
93 pub fn rows(&self) -> usize {
95 self.n
96 }
97
98 pub fn diagonal(&self) -> Vec<f64> {
102 let mut diag = vec![0.0f64; self.n];
103 for row in 0..self.n {
104 let start = self.row_ptr[row];
105 let end = self.row_ptr[row + 1];
106 for k in start..end {
107 if self.col_idx[k] == row {
108 diag[row] = self.values[k];
109 }
110 }
111 }
112 diag
113 }
114
115 pub fn is_symmetric(&self) -> bool {
117 for row in 0..self.n {
118 let start = self.row_ptr[row];
119 let end = self.row_ptr[row + 1];
120 for k in start..end {
121 let col = self.col_idx[k];
122 let val = self.values[k];
123 let mut found = false;
125 let cs = self.row_ptr[col];
126 let ce = self.row_ptr[col + 1];
127 for m in cs..ce {
128 if self.col_idx[m] == row {
129 if (self.values[m] - val).abs() > 1e-12 {
130 return false;
131 }
132 found = true;
133 break;
134 }
135 }
136 if !found {
137 return false;
138 }
139 }
140 }
141 true
142 }
143
144 pub fn frobenius_norm(&self) -> f64 {
146 self.values.iter().map(|v| v * v).sum::<f64>().sqrt()
147 }
148}
149
150pub fn sparse_identity(n: usize) -> SparseMatrixGpu {
154 let row_ptr: Vec<usize> = (0..=n).collect();
155 let col_idx: Vec<usize> = (0..n).collect();
156 let values = vec![1.0f64; n];
157 SparseMatrixGpu::from_csr(n, row_ptr, col_idx, values)
158}
159
160pub fn sparse_diagonal_matrix(diag: &[f64]) -> SparseMatrixGpu {
162 let n = diag.len();
163 let row_ptr: Vec<usize> = (0..=n).collect();
164 let col_idx: Vec<usize> = (0..n).collect();
165 let values = diag.to_vec();
166 SparseMatrixGpu::from_csr(n, row_ptr, col_idx, values)
167}
168
169pub fn gpu_spmv(mat: &SparseMatrixGpu, x: &[f64]) -> Vec<f64> {
176 assert_eq!(x.len(), mat.n, "gpu_spmv: x length mismatch");
177 let mut y = vec![0.0f64; mat.n];
178 for row in 0..mat.n {
179 let start = mat.row_ptr[row];
180 let end = mat.row_ptr[row + 1];
181 let mut sum = 0.0f64;
182 for k in start..end {
183 sum += mat.values[k] * x[mat.col_idx[k]];
184 }
185 y[row] = sum;
186 }
187 y
188}
189
190pub fn gpu_dot(a: &[f64], b: &[f64]) -> f64 {
194 a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
195}
196
197pub fn gpu_axpy(a: f64, x: &[f64], y: &mut Vec<f64>) {
202 assert_eq!(x.len(), y.len(), "gpu_axpy: length mismatch");
203 for (yi, &xi) in y.iter_mut().zip(x.iter()) {
204 *yi += a * xi;
205 }
206}
207
208pub fn gpu_cg_solver(
216 mat: &SparseMatrixGpu,
217 b: &[f64],
218 max_iter: usize,
219 tol: f64,
220) -> (Vec<f64>, usize, f64) {
221 let n = mat.n;
222 let mut x = vec![0.0f64; n];
223 let mut r = b.to_vec(); let mut p = r.clone();
225 let mut rr = gpu_dot(&r, &r);
226 let b_norm = rr.sqrt().max(1e-100);
227
228 for iter in 0..max_iter {
229 if rr.sqrt() / b_norm < tol {
230 return (x, iter, rr.sqrt());
231 }
232 let ap = gpu_spmv(mat, &p);
233 let pap = gpu_dot(&p, &ap);
234 if pap.abs() < 1e-300 {
235 break;
236 }
237 let alpha = rr / pap;
238 gpu_axpy(alpha, &p, &mut x);
239 gpu_axpy(-alpha, &ap, &mut r);
240 let rr_new = gpu_dot(&r, &r);
241 let beta = rr_new / rr.max(1e-300);
242 for i in 0..n {
244 p[i] = r[i] + beta * p[i];
245 }
246 rr = rr_new;
247 }
248 (x, max_iter, rr.sqrt())
249}
250
251pub fn gpu_jacobi_preconditioner(mat: &SparseMatrixGpu) -> Vec<f64> {
256 mat.diagonal()
257 .iter()
258 .map(|&d| if d.abs() > 1e-15 { 1.0 / d } else { 1.0 })
259 .collect()
260}
261
262pub fn gpu_pcg_solver(
267 mat: &SparseMatrixGpu,
268 b: &[f64],
269 precond: &[f64],
270 max_iter: usize,
271 tol: f64,
272) -> (Vec<f64>, usize, f64) {
273 let n = mat.n;
274 let mut x = vec![0.0f64; n];
275 let mut r = b.to_vec();
276 let mut z: Vec<f64> = r
278 .iter()
279 .zip(precond.iter())
280 .map(|(ri, mi)| ri * mi)
281 .collect();
282 let mut p = z.clone();
283 let mut rz = gpu_dot(&r, &z);
284 let b_norm = gpu_dot(b, b).sqrt().max(1e-100);
285
286 for iter in 0..max_iter {
287 if gpu_dot(&r, &r).sqrt() / b_norm < tol {
288 return (x, iter, gpu_dot(&r, &r).sqrt());
289 }
290 let ap = gpu_spmv(mat, &p);
291 let pap = gpu_dot(&p, &ap);
292 if pap.abs() < 1e-300 {
293 break;
294 }
295 let alpha = rz / pap;
296 gpu_axpy(alpha, &p, &mut x);
297 gpu_axpy(-alpha, &ap, &mut r);
298 z = r
299 .iter()
300 .zip(precond.iter())
301 .map(|(ri, mi)| ri * mi)
302 .collect();
303 let rz_new = gpu_dot(&r, &z);
304 let beta = rz_new / rz.max(1e-300);
305 for i in 0..n {
306 p[i] = z[i] + beta * p[i];
307 }
308 rz = rz_new;
309 }
310 (x, max_iter, gpu_dot(&r, &r).sqrt())
311}
312
313#[derive(Debug, Clone)]
317pub struct GpuSparseSolverStats {
318 pub iterations: usize,
320 pub final_residual: f64,
322 pub converged: bool,
324 pub time_ms: f64,
326}
327
328impl GpuSparseSolverStats {
329 pub fn new(iterations: usize, final_residual: f64, converged: bool, time_ms: f64) -> Self {
331 Self {
332 iterations,
333 final_residual,
334 converged,
335 time_ms,
336 }
337 }
338}
339
340#[cfg(test)]
344mod tests {
345 use super::*;
346
347 fn build_3x3_spd() -> SparseMatrixGpu {
354 let row_ptr = vec![0, 2, 5, 7];
355 let col_idx = vec![0, 1, 0, 1, 2, 1, 2];
356 let values = vec![4.0, -1.0, -1.0, 4.0, -1.0, -1.0, 4.0];
357 SparseMatrixGpu::from_csr(3, row_ptr, col_idx, values)
358 }
359
360 #[test]
363 fn test_identity_spmv_returns_input() {
364 let id = sparse_identity(4);
365 let x = vec![1.0, 2.0, 3.0, 4.0];
366 let y = gpu_spmv(&id, &x);
367 for (yi, xi) in y.iter().zip(x.iter()) {
368 assert!((yi - xi).abs() < 1e-12);
369 }
370 }
371
372 #[test]
373 fn test_identity_nnz() {
374 let id = sparse_identity(5);
375 assert_eq!(id.nnz(), 5);
376 }
377
378 #[test]
379 fn test_identity_rows() {
380 let id = sparse_identity(7);
381 assert_eq!(id.rows(), 7);
382 }
383
384 #[test]
385 fn test_identity_diagonal() {
386 let id = sparse_identity(4);
387 let diag = id.diagonal();
388 assert_eq!(diag, vec![1.0; 4]);
389 }
390
391 #[test]
392 fn test_identity_frobenius() {
393 let n = 9usize;
394 let id = sparse_identity(n);
395 let expected = (n as f64).sqrt();
396 assert!((id.frobenius_norm() - expected).abs() < 1e-10);
397 }
398
399 #[test]
400 fn test_identity_is_symmetric() {
401 assert!(sparse_identity(4).is_symmetric());
402 }
403
404 #[test]
407 fn test_diagonal_matrix_spmv() {
408 let d = vec![2.0, 3.0, 5.0];
409 let mat = sparse_diagonal_matrix(&d);
410 let x = vec![1.0, 1.0, 1.0];
411 let y = gpu_spmv(&mat, &x);
412 assert!((y[0] - 2.0).abs() < 1e-12);
413 assert!((y[1] - 3.0).abs() < 1e-12);
414 assert!((y[2] - 5.0).abs() < 1e-12);
415 }
416
417 #[test]
418 fn test_diagonal_matrix_frobenius() {
419 let d = vec![3.0, 4.0];
420 let mat = sparse_diagonal_matrix(&d);
421 assert!((mat.frobenius_norm() - 5.0).abs() < 1e-10);
422 }
423
424 #[test]
425 fn test_diagonal_matrix_is_symmetric() {
426 let mat = sparse_diagonal_matrix(&[1.0, 2.0, 3.0]);
427 assert!(mat.is_symmetric());
428 }
429
430 #[test]
431 fn test_sparse_identity_zero_size() {
432 let id = sparse_identity(0);
433 assert_eq!(id.nnz(), 0);
434 assert_eq!(id.rows(), 0);
435 }
436
437 #[test]
440 fn test_gpu_dot_basic() {
441 assert!((gpu_dot(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]) - 32.0).abs() < 1e-12);
442 }
443
444 #[test]
445 fn test_gpu_dot_empty() {
446 assert!((gpu_dot(&[], &[])).abs() < 1e-15);
447 }
448
449 #[test]
450 fn test_gpu_dot_orthogonal() {
451 assert!((gpu_dot(&[1.0, 0.0], &[0.0, 1.0])).abs() < 1e-15);
452 }
453
454 #[test]
457 fn test_gpu_axpy_basic() {
458 let x = vec![1.0, 2.0, 3.0];
459 let mut y = vec![4.0, 5.0, 6.0];
460 gpu_axpy(2.0, &x, &mut y);
461 assert!((y[0] - 6.0).abs() < 1e-12);
462 assert!((y[1] - 9.0).abs() < 1e-12);
463 assert!((y[2] - 12.0).abs() < 1e-12);
464 }
465
466 #[test]
467 fn test_gpu_axpy_zero_alpha() {
468 let x = vec![1.0, 2.0];
469 let mut y = vec![3.0, 4.0];
470 gpu_axpy(0.0, &x, &mut y);
471 assert!((y[0] - 3.0).abs() < 1e-12);
472 assert!((y[1] - 4.0).abs() < 1e-12);
473 }
474
475 #[test]
478 fn test_spmv_3x3_spd() {
479 let mat = build_3x3_spd();
480 let x = vec![1.0, 0.0, 0.0];
481 let y = gpu_spmv(&mat, &x);
482 assert!((y[0] - 4.0).abs() < 1e-12);
483 assert!((y[1] + 1.0).abs() < 1e-12);
484 assert!((y[2]).abs() < 1e-12);
485 }
486
487 #[test]
488 fn test_spmv_zeros_input() {
489 let mat = build_3x3_spd();
490 let y = gpu_spmv(&mat, &[0.0, 0.0, 0.0]);
491 for yi in y {
492 assert!(yi.abs() < 1e-15);
493 }
494 }
495
496 #[test]
499 fn test_from_csr_nnz() {
500 let mat = build_3x3_spd();
501 assert_eq!(mat.nnz(), 7);
502 }
503
504 #[test]
505 fn test_add_entry_finalize() {
506 let mut mat = SparseMatrixGpu::new(2);
507 mat.add_entry(0, 0, 2.0);
508 mat.add_entry(0, 1, -1.0);
509 mat.add_entry(1, 0, -1.0);
510 mat.add_entry(1, 1, 2.0);
511 mat.finalize();
512 assert_eq!(mat.nnz(), 4);
513 let y = gpu_spmv(&mat, &[1.0, 0.0]);
514 assert!((y[0] - 2.0).abs() < 1e-12);
515 assert!((y[1] + 1.0).abs() < 1e-12);
516 }
517
518 #[test]
521 fn test_cg_converges_3x3() {
522 let mat = build_3x3_spd();
523 let b = vec![1.0, 0.0, 0.0];
524 let (x, iters, res) = gpu_cg_solver(&mat, &b, 100, 1e-10);
525 assert!(iters < 100, "CG did not converge: iters={iters}");
526 let ax = gpu_spmv(&mat, &x);
528 for (ai, bi) in ax.iter().zip(b.iter()) {
529 assert!((ai - bi).abs() < 1e-8, "residual too large");
530 }
531 let _ = res; }
533
534 #[test]
535 fn test_cg_identity_system() {
536 let id = sparse_identity(3);
537 let b = vec![1.0, 2.0, 3.0];
538 let (x, _iters, _res) = gpu_cg_solver(&id, &b, 50, 1e-10);
539 for (xi, bi) in x.iter().zip(b.iter()) {
540 assert!((xi - bi).abs() < 1e-8);
541 }
542 }
543
544 #[test]
547 fn test_jacobi_preconditioner_diagonal() {
548 let d = vec![2.0, 4.0, 5.0];
549 let mat = sparse_diagonal_matrix(&d);
550 let prec = gpu_jacobi_preconditioner(&mat);
551 assert!((prec[0] - 0.5).abs() < 1e-12);
552 assert!((prec[1] - 0.25).abs() < 1e-12);
553 assert!((prec[2] - 0.2).abs() < 1e-12);
554 }
555
556 #[test]
557 fn test_jacobi_preconditioner_identity() {
558 let id = sparse_identity(3);
559 let prec = gpu_jacobi_preconditioner(&id);
560 for p in prec {
561 assert!((p - 1.0).abs() < 1e-12);
562 }
563 }
564
565 #[test]
568 fn test_pcg_converges_3x3() {
569 let mat = build_3x3_spd();
570 let b = vec![1.0, 0.0, 1.0];
571 let prec = gpu_jacobi_preconditioner(&mat);
572 let (x, iters, _res) = gpu_pcg_solver(&mat, &b, &prec, 100, 1e-10);
573 assert!(iters < 100);
574 let ax = gpu_spmv(&mat, &x);
575 for (ai, bi) in ax.iter().zip(b.iter()) {
576 assert!((ai - bi).abs() < 1e-7);
577 }
578 }
579
580 #[test]
581 fn test_pcg_identity_trivial() {
582 let id = sparse_identity(4);
583 let b = vec![1.0, 2.0, 3.0, 4.0];
584 let prec = gpu_jacobi_preconditioner(&id);
585 let (x, _iters, _res) = gpu_pcg_solver(&id, &b, &prec, 10, 1e-12);
586 for (xi, bi) in x.iter().zip(b.iter()) {
587 assert!((xi - bi).abs() < 1e-8);
588 }
589 }
590
591 #[test]
594 fn test_solver_stats_fields() {
595 let s = GpuSparseSolverStats::new(5, 1e-8, true, 0.0);
596 assert_eq!(s.iterations, 5);
597 assert!(s.converged);
598 assert!((s.final_residual - 1e-8).abs() < 1e-20);
599 }
600
601 #[test]
604 fn test_asymmetric_matrix() {
605 let row_ptr = vec![0, 2, 3, 3];
607 let col_idx = vec![0, 1, 1, 2];
608 let values = vec![1.0, 2.0, 3.0, 4.0];
609 let mat = SparseMatrixGpu::from_csr(3, row_ptr, col_idx, values);
610 assert!(!mat.is_symmetric());
611 }
612
613 #[test]
614 fn test_frobenius_zero_matrix() {
615 let mat = SparseMatrixGpu::new(4);
616 assert!((mat.frobenius_norm()).abs() < 1e-15);
617 }
618
619 #[test]
622 fn test_new_empty_matrix() {
623 let mat = SparseMatrixGpu::new(3);
624 assert_eq!(mat.n, 3);
625 assert_eq!(mat.nnz, 0);
626 assert_eq!(mat.row_ptr, vec![0; 4]);
627 }
628
629 #[test]
630 fn test_diagonal_of_empty_matrix() {
631 let mat = SparseMatrixGpu::new(3);
632 let diag = mat.diagonal();
633 assert_eq!(diag, vec![0.0; 3]);
634 }
635
636 #[test]
637 fn test_cg_zero_rhs() {
638 let id = sparse_identity(3);
639 let b = vec![0.0; 3];
640 let (x, _iters, res) = gpu_cg_solver(&id, &b, 20, 1e-12);
641 for xi in &x {
642 assert!(xi.abs() < 1e-12);
643 }
644 assert!(res < 1e-10);
645 }
646}