#![allow(clippy::needless_range_loop, clippy::ptr_arg)]
#![allow(dead_code)]
#![allow(clippy::too_many_arguments)]
#[derive(Debug, Clone)]
pub struct SparseMatrixGpu {
pub n: usize,
pub nnz: usize,
pub row_ptr: Vec<usize>,
pub col_idx: Vec<usize>,
pub values: Vec<f64>,
}
impl SparseMatrixGpu {
pub fn new(n: usize) -> Self {
Self {
n,
nnz: 0,
row_ptr: vec![0usize; n + 1],
col_idx: Vec::new(),
values: Vec::new(),
}
}
pub fn from_csr(n: usize, row_ptr: Vec<usize>, col_idx: Vec<usize>, values: Vec<f64>) -> Self {
debug_assert_eq!(row_ptr.len(), n + 1);
let nnz = values.len();
Self {
n,
nnz,
row_ptr,
col_idx,
values,
}
}
pub fn add_entry(&mut self, row: usize, col: usize, val: f64) {
self.col_idx.push(col);
self.values.push(val);
if row + 1 < self.row_ptr.len() {
self.row_ptr[row + 1] += 1;
}
self.nnz += 1;
}
pub fn finalize(&mut self) {
for i in 1..=self.n {
self.row_ptr[i] += self.row_ptr[i - 1];
}
self.nnz = *self.row_ptr.last().unwrap_or(&0);
}
pub fn nnz(&self) -> usize {
self.nnz
}
pub fn rows(&self) -> usize {
self.n
}
pub fn diagonal(&self) -> Vec<f64> {
let mut diag = vec![0.0f64; self.n];
for row in 0..self.n {
let start = self.row_ptr[row];
let end = self.row_ptr[row + 1];
for k in start..end {
if self.col_idx[k] == row {
diag[row] = self.values[k];
}
}
}
diag
}
pub fn is_symmetric(&self) -> bool {
for row in 0..self.n {
let start = self.row_ptr[row];
let end = self.row_ptr[row + 1];
for k in start..end {
let col = self.col_idx[k];
let val = self.values[k];
let mut found = false;
let cs = self.row_ptr[col];
let ce = self.row_ptr[col + 1];
for m in cs..ce {
if self.col_idx[m] == row {
if (self.values[m] - val).abs() > 1e-12 {
return false;
}
found = true;
break;
}
}
if !found {
return false;
}
}
}
true
}
pub fn frobenius_norm(&self) -> f64 {
self.values.iter().map(|v| v * v).sum::<f64>().sqrt()
}
}
pub fn sparse_identity(n: usize) -> SparseMatrixGpu {
let row_ptr: Vec<usize> = (0..=n).collect();
let col_idx: Vec<usize> = (0..n).collect();
let values = vec![1.0f64; n];
SparseMatrixGpu::from_csr(n, row_ptr, col_idx, values)
}
pub fn sparse_diagonal_matrix(diag: &[f64]) -> SparseMatrixGpu {
let n = diag.len();
let row_ptr: Vec<usize> = (0..=n).collect();
let col_idx: Vec<usize> = (0..n).collect();
let values = diag.to_vec();
SparseMatrixGpu::from_csr(n, row_ptr, col_idx, values)
}
pub fn gpu_spmv(mat: &SparseMatrixGpu, x: &[f64]) -> Vec<f64> {
assert_eq!(x.len(), mat.n, "gpu_spmv: x length mismatch");
let mut y = vec![0.0f64; mat.n];
for row in 0..mat.n {
let start = mat.row_ptr[row];
let end = mat.row_ptr[row + 1];
let mut sum = 0.0f64;
for k in start..end {
sum += mat.values[k] * x[mat.col_idx[k]];
}
y[row] = sum;
}
y
}
pub fn gpu_dot(a: &[f64], b: &[f64]) -> f64 {
a.iter().zip(b.iter()).map(|(ai, bi)| ai * bi).sum()
}
pub fn gpu_axpy(a: f64, x: &[f64], y: &mut Vec<f64>) {
assert_eq!(x.len(), y.len(), "gpu_axpy: length mismatch");
for (yi, &xi) in y.iter_mut().zip(x.iter()) {
*yi += a * xi;
}
}
pub fn gpu_cg_solver(
mat: &SparseMatrixGpu,
b: &[f64],
max_iter: usize,
tol: f64,
) -> (Vec<f64>, usize, f64) {
let n = mat.n;
let mut x = vec![0.0f64; n];
let mut r = b.to_vec(); let mut p = r.clone();
let mut rr = gpu_dot(&r, &r);
let b_norm = rr.sqrt().max(1e-100);
for iter in 0..max_iter {
if rr.sqrt() / b_norm < tol {
return (x, iter, rr.sqrt());
}
let ap = gpu_spmv(mat, &p);
let pap = gpu_dot(&p, &ap);
if pap.abs() < 1e-300 {
break;
}
let alpha = rr / pap;
gpu_axpy(alpha, &p, &mut x);
gpu_axpy(-alpha, &ap, &mut r);
let rr_new = gpu_dot(&r, &r);
let beta = rr_new / rr.max(1e-300);
for i in 0..n {
p[i] = r[i] + beta * p[i];
}
rr = rr_new;
}
(x, max_iter, rr.sqrt())
}
pub fn gpu_jacobi_preconditioner(mat: &SparseMatrixGpu) -> Vec<f64> {
mat.diagonal()
.iter()
.map(|&d| if d.abs() > 1e-15 { 1.0 / d } else { 1.0 })
.collect()
}
pub fn gpu_pcg_solver(
mat: &SparseMatrixGpu,
b: &[f64],
precond: &[f64],
max_iter: usize,
tol: f64,
) -> (Vec<f64>, usize, f64) {
let n = mat.n;
let mut x = vec![0.0f64; n];
let mut r = b.to_vec();
let mut z: Vec<f64> = r
.iter()
.zip(precond.iter())
.map(|(ri, mi)| ri * mi)
.collect();
let mut p = z.clone();
let mut rz = gpu_dot(&r, &z);
let b_norm = gpu_dot(b, b).sqrt().max(1e-100);
for iter in 0..max_iter {
if gpu_dot(&r, &r).sqrt() / b_norm < tol {
return (x, iter, gpu_dot(&r, &r).sqrt());
}
let ap = gpu_spmv(mat, &p);
let pap = gpu_dot(&p, &ap);
if pap.abs() < 1e-300 {
break;
}
let alpha = rz / pap;
gpu_axpy(alpha, &p, &mut x);
gpu_axpy(-alpha, &ap, &mut r);
z = r
.iter()
.zip(precond.iter())
.map(|(ri, mi)| ri * mi)
.collect();
let rz_new = gpu_dot(&r, &z);
let beta = rz_new / rz.max(1e-300);
for i in 0..n {
p[i] = z[i] + beta * p[i];
}
rz = rz_new;
}
(x, max_iter, gpu_dot(&r, &r).sqrt())
}
#[derive(Debug, Clone)]
pub struct GpuSparseSolverStats {
pub iterations: usize,
pub final_residual: f64,
pub converged: bool,
pub time_ms: f64,
}
impl GpuSparseSolverStats {
pub fn new(iterations: usize, final_residual: f64, converged: bool, time_ms: f64) -> Self {
Self {
iterations,
final_residual,
converged,
time_ms,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn build_3x3_spd() -> SparseMatrixGpu {
let row_ptr = vec![0, 2, 5, 7];
let col_idx = vec![0, 1, 0, 1, 2, 1, 2];
let values = vec![4.0, -1.0, -1.0, 4.0, -1.0, -1.0, 4.0];
SparseMatrixGpu::from_csr(3, row_ptr, col_idx, values)
}
#[test]
fn test_identity_spmv_returns_input() {
let id = sparse_identity(4);
let x = vec![1.0, 2.0, 3.0, 4.0];
let y = gpu_spmv(&id, &x);
for (yi, xi) in y.iter().zip(x.iter()) {
assert!((yi - xi).abs() < 1e-12);
}
}
#[test]
fn test_identity_nnz() {
let id = sparse_identity(5);
assert_eq!(id.nnz(), 5);
}
#[test]
fn test_identity_rows() {
let id = sparse_identity(7);
assert_eq!(id.rows(), 7);
}
#[test]
fn test_identity_diagonal() {
let id = sparse_identity(4);
let diag = id.diagonal();
assert_eq!(diag, vec![1.0; 4]);
}
#[test]
fn test_identity_frobenius() {
let n = 9usize;
let id = sparse_identity(n);
let expected = (n as f64).sqrt();
assert!((id.frobenius_norm() - expected).abs() < 1e-10);
}
#[test]
fn test_identity_is_symmetric() {
assert!(sparse_identity(4).is_symmetric());
}
#[test]
fn test_diagonal_matrix_spmv() {
let d = vec![2.0, 3.0, 5.0];
let mat = sparse_diagonal_matrix(&d);
let x = vec![1.0, 1.0, 1.0];
let y = gpu_spmv(&mat, &x);
assert!((y[0] - 2.0).abs() < 1e-12);
assert!((y[1] - 3.0).abs() < 1e-12);
assert!((y[2] - 5.0).abs() < 1e-12);
}
#[test]
fn test_diagonal_matrix_frobenius() {
let d = vec![3.0, 4.0];
let mat = sparse_diagonal_matrix(&d);
assert!((mat.frobenius_norm() - 5.0).abs() < 1e-10);
}
#[test]
fn test_diagonal_matrix_is_symmetric() {
let mat = sparse_diagonal_matrix(&[1.0, 2.0, 3.0]);
assert!(mat.is_symmetric());
}
#[test]
fn test_sparse_identity_zero_size() {
let id = sparse_identity(0);
assert_eq!(id.nnz(), 0);
assert_eq!(id.rows(), 0);
}
#[test]
fn test_gpu_dot_basic() {
assert!((gpu_dot(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]) - 32.0).abs() < 1e-12);
}
#[test]
fn test_gpu_dot_empty() {
assert!((gpu_dot(&[], &[])).abs() < 1e-15);
}
#[test]
fn test_gpu_dot_orthogonal() {
assert!((gpu_dot(&[1.0, 0.0], &[0.0, 1.0])).abs() < 1e-15);
}
#[test]
fn test_gpu_axpy_basic() {
let x = vec![1.0, 2.0, 3.0];
let mut y = vec![4.0, 5.0, 6.0];
gpu_axpy(2.0, &x, &mut y);
assert!((y[0] - 6.0).abs() < 1e-12);
assert!((y[1] - 9.0).abs() < 1e-12);
assert!((y[2] - 12.0).abs() < 1e-12);
}
#[test]
fn test_gpu_axpy_zero_alpha() {
let x = vec![1.0, 2.0];
let mut y = vec![3.0, 4.0];
gpu_axpy(0.0, &x, &mut y);
assert!((y[0] - 3.0).abs() < 1e-12);
assert!((y[1] - 4.0).abs() < 1e-12);
}
#[test]
fn test_spmv_3x3_spd() {
let mat = build_3x3_spd();
let x = vec![1.0, 0.0, 0.0];
let y = gpu_spmv(&mat, &x);
assert!((y[0] - 4.0).abs() < 1e-12);
assert!((y[1] + 1.0).abs() < 1e-12);
assert!((y[2]).abs() < 1e-12);
}
#[test]
fn test_spmv_zeros_input() {
let mat = build_3x3_spd();
let y = gpu_spmv(&mat, &[0.0, 0.0, 0.0]);
for yi in y {
assert!(yi.abs() < 1e-15);
}
}
#[test]
fn test_from_csr_nnz() {
let mat = build_3x3_spd();
assert_eq!(mat.nnz(), 7);
}
#[test]
fn test_add_entry_finalize() {
let mut mat = SparseMatrixGpu::new(2);
mat.add_entry(0, 0, 2.0);
mat.add_entry(0, 1, -1.0);
mat.add_entry(1, 0, -1.0);
mat.add_entry(1, 1, 2.0);
mat.finalize();
assert_eq!(mat.nnz(), 4);
let y = gpu_spmv(&mat, &[1.0, 0.0]);
assert!((y[0] - 2.0).abs() < 1e-12);
assert!((y[1] + 1.0).abs() < 1e-12);
}
#[test]
fn test_cg_converges_3x3() {
let mat = build_3x3_spd();
let b = vec![1.0, 0.0, 0.0];
let (x, iters, res) = gpu_cg_solver(&mat, &b, 100, 1e-10);
assert!(iters < 100, "CG did not converge: iters={iters}");
let ax = gpu_spmv(&mat, &x);
for (ai, bi) in ax.iter().zip(b.iter()) {
assert!((ai - bi).abs() < 1e-8, "residual too large");
}
let _ = res; }
#[test]
fn test_cg_identity_system() {
let id = sparse_identity(3);
let b = vec![1.0, 2.0, 3.0];
let (x, _iters, _res) = gpu_cg_solver(&id, &b, 50, 1e-10);
for (xi, bi) in x.iter().zip(b.iter()) {
assert!((xi - bi).abs() < 1e-8);
}
}
#[test]
fn test_jacobi_preconditioner_diagonal() {
let d = vec![2.0, 4.0, 5.0];
let mat = sparse_diagonal_matrix(&d);
let prec = gpu_jacobi_preconditioner(&mat);
assert!((prec[0] - 0.5).abs() < 1e-12);
assert!((prec[1] - 0.25).abs() < 1e-12);
assert!((prec[2] - 0.2).abs() < 1e-12);
}
#[test]
fn test_jacobi_preconditioner_identity() {
let id = sparse_identity(3);
let prec = gpu_jacobi_preconditioner(&id);
for p in prec {
assert!((p - 1.0).abs() < 1e-12);
}
}
#[test]
fn test_pcg_converges_3x3() {
let mat = build_3x3_spd();
let b = vec![1.0, 0.0, 1.0];
let prec = gpu_jacobi_preconditioner(&mat);
let (x, iters, _res) = gpu_pcg_solver(&mat, &b, &prec, 100, 1e-10);
assert!(iters < 100);
let ax = gpu_spmv(&mat, &x);
for (ai, bi) in ax.iter().zip(b.iter()) {
assert!((ai - bi).abs() < 1e-7);
}
}
#[test]
fn test_pcg_identity_trivial() {
let id = sparse_identity(4);
let b = vec![1.0, 2.0, 3.0, 4.0];
let prec = gpu_jacobi_preconditioner(&id);
let (x, _iters, _res) = gpu_pcg_solver(&id, &b, &prec, 10, 1e-12);
for (xi, bi) in x.iter().zip(b.iter()) {
assert!((xi - bi).abs() < 1e-8);
}
}
#[test]
fn test_solver_stats_fields() {
let s = GpuSparseSolverStats::new(5, 1e-8, true, 0.0);
assert_eq!(s.iterations, 5);
assert!(s.converged);
assert!((s.final_residual - 1e-8).abs() < 1e-20);
}
#[test]
fn test_asymmetric_matrix() {
let row_ptr = vec![0, 2, 3, 3];
let col_idx = vec![0, 1, 1, 2];
let values = vec![1.0, 2.0, 3.0, 4.0];
let mat = SparseMatrixGpu::from_csr(3, row_ptr, col_idx, values);
assert!(!mat.is_symmetric());
}
#[test]
fn test_frobenius_zero_matrix() {
let mat = SparseMatrixGpu::new(4);
assert!((mat.frobenius_norm()).abs() < 1e-15);
}
#[test]
fn test_new_empty_matrix() {
let mat = SparseMatrixGpu::new(3);
assert_eq!(mat.n, 3);
assert_eq!(mat.nnz, 0);
assert_eq!(mat.row_ptr, vec![0; 4]);
}
#[test]
fn test_diagonal_of_empty_matrix() {
let mat = SparseMatrixGpu::new(3);
let diag = mat.diagonal();
assert_eq!(diag, vec![0.0; 3]);
}
#[test]
fn test_cg_zero_rhs() {
let id = sparse_identity(3);
let b = vec![0.0; 3];
let (x, _iters, res) = gpu_cg_solver(&id, &b, 20, 1e-12);
for xi in &x {
assert!(xi.abs() < 1e-12);
}
assert!(res < 1e-10);
}
}