Skip to main content

scirs2_sparse/gpu/
construction.rs

1//! GPU-accelerated COO/CSR/CSC construction from triplets.
2//!
3//! Provides standalone `CooMatrix`, `CsrMatrix`, and `CscMatrix` types that
4//! are designed for GPU-friendly construction workflows (sort-then-scan
5//! patterns matching a compute shader pipeline).
6
7use crate::error::{SparseError, SparseResult};
8use crate::gpu::spmv::{csr_spmv, GpuSpMvConfig};
9use scirs2_core::ndarray::Array2;
10
11// ============================================================
12// CooMatrix — coordinate format
13// ============================================================
14
15/// Sparse matrix in coordinate (COO) format.
16///
17/// Entries can be pushed in arbitrary order; call `to_csr` or
18/// `to_csc` to produce a compressed representation.  Duplicate
19/// `(row, col)` entries are **summed** during conversion.
20#[derive(Debug, Clone)]
21pub struct GpuCooMatrix {
22    /// Row indices of non-zero entries.
23    pub row_idx: Vec<usize>,
24    /// Column indices of non-zero entries.
25    pub col_idx: Vec<usize>,
26    /// Values of non-zero entries.
27    pub values: Vec<f64>,
28    /// Number of rows.
29    pub n_rows: usize,
30    /// Number of columns.
31    pub n_cols: usize,
32}
33
34impl GpuCooMatrix {
35    /// Create an empty COO matrix of the given shape.
36    pub fn new(n_rows: usize, n_cols: usize) -> Self {
37        Self {
38            row_idx: Vec::new(),
39            col_idx: Vec::new(),
40            values: Vec::new(),
41            n_rows,
42            n_cols,
43        }
44    }
45
46    /// Append a single triplet `(row, col, val)`.
47    ///
48    /// Panics in debug mode when indices are out of bounds.
49    pub fn push(&mut self, row: usize, col: usize, val: f64) {
50        debug_assert!(row < self.n_rows, "row index out of bounds");
51        debug_assert!(col < self.n_cols, "col index out of bounds");
52        self.row_idx.push(row);
53        self.col_idx.push(col);
54        self.values.push(val);
55    }
56
57    /// Construct a COO matrix from parallel triplet slices.
58    ///
59    /// # Errors
60    ///
61    /// Returns [`SparseError::InconsistentData`] when slice lengths differ, or
62    /// [`SparseError::IndexOutOfBounds`] when any index exceeds the declared
63    /// dimensions.
64    pub fn from_triplets(
65        n_rows: usize,
66        n_cols: usize,
67        rows: &[usize],
68        cols: &[usize],
69        vals: &[f64],
70    ) -> SparseResult<Self> {
71        if rows.len() != cols.len() || cols.len() != vals.len() {
72            return Err(SparseError::InconsistentData {
73                reason: format!(
74                    "triplet slice lengths do not match: rows={} cols={} vals={}",
75                    rows.len(),
76                    cols.len(),
77                    vals.len()
78                ),
79            });
80        }
81        for (i, (&r, &c)) in rows.iter().zip(cols.iter()).enumerate() {
82            if r >= n_rows || c >= n_cols {
83                return Err(SparseError::IndexOutOfBounds {
84                    index: (r, c),
85                    shape: (n_rows, n_cols),
86                });
87            }
88            let _ = i; // suppress unused warning
89        }
90        Ok(Self {
91            row_idx: rows.to_vec(),
92            col_idx: cols.to_vec(),
93            values: vals.to_vec(),
94            n_rows,
95            n_cols,
96        })
97    }
98
99    /// Convert to CSR by sorting triplets in row-major order and summing
100    /// duplicates.
101    pub fn to_csr(&self) -> GpuCsrMatrix {
102        let nnz = self.row_idx.len();
103        // Build sort order: sort by (row, col)
104        let mut order: Vec<usize> = (0..nnz).collect();
105        order.sort_unstable_by_key(|&k| (self.row_idx[k], self.col_idx[k]));
106
107        // Merge duplicates
108        let mut merged_rows: Vec<usize> = Vec::with_capacity(nnz);
109        let mut merged_cols: Vec<usize> = Vec::with_capacity(nnz);
110        let mut merged_vals: Vec<f64> = Vec::with_capacity(nnz);
111
112        for &k in &order {
113            let r = self.row_idx[k];
114            let c = self.col_idx[k];
115            let v = self.values[k];
116            if let Some(&last_r) = merged_rows.last() {
117                let last_c = *merged_cols.last().expect("cols non-empty");
118                if last_r == r && last_c == c {
119                    *merged_vals.last_mut().expect("vals non-empty") += v;
120                    continue;
121                }
122            }
123            merged_rows.push(r);
124            merged_cols.push(c);
125            merged_vals.push(v);
126        }
127
128        // Build row_ptr via prefix sum
129        let merged_nnz = merged_rows.len();
130        let mut row_ptr = vec![0usize; self.n_rows + 1];
131        for &r in &merged_rows {
132            row_ptr[r + 1] += 1;
133        }
134        for i in 0..self.n_rows {
135            row_ptr[i + 1] += row_ptr[i];
136        }
137
138        GpuCsrMatrix {
139            row_ptr,
140            col_idx: merged_cols,
141            values: merged_vals,
142            n_rows: self.n_rows,
143            n_cols: self.n_cols,
144        }
145    }
146
147    /// Convert to CSC by sorting triplets in column-major order and summing
148    /// duplicates.
149    pub fn to_csc(&self) -> GpuCscMatrix {
150        let nnz = self.row_idx.len();
151        let mut order: Vec<usize> = (0..nnz).collect();
152        order.sort_unstable_by_key(|&k| (self.col_idx[k], self.row_idx[k]));
153
154        let mut merged_rows: Vec<usize> = Vec::with_capacity(nnz);
155        let mut merged_cols: Vec<usize> = Vec::with_capacity(nnz);
156        let mut merged_vals: Vec<f64> = Vec::with_capacity(nnz);
157
158        for &k in &order {
159            let r = self.row_idx[k];
160            let c = self.col_idx[k];
161            let v = self.values[k];
162            if let Some(&last_c) = merged_cols.last() {
163                let last_r = *merged_rows.last().expect("rows non-empty");
164                if last_c == c && last_r == r {
165                    *merged_vals.last_mut().expect("vals non-empty") += v;
166                    continue;
167                }
168            }
169            merged_rows.push(r);
170            merged_cols.push(c);
171            merged_vals.push(v);
172        }
173
174        // Build col_ptr via prefix sum
175        let mut col_ptr = vec![0usize; self.n_cols + 1];
176        for &c in &merged_cols {
177            col_ptr[c + 1] += 1;
178        }
179        for i in 0..self.n_cols {
180            col_ptr[i + 1] += col_ptr[i];
181        }
182
183        GpuCscMatrix {
184            col_ptr,
185            row_idx: merged_rows,
186            values: merged_vals,
187            n_rows: self.n_rows,
188            n_cols: self.n_cols,
189        }
190    }
191}
192
193// ============================================================
194// GpuCsrMatrix — compressed sparse row
195// ============================================================
196
197/// Sparse matrix in compressed sparse row (CSR) format.
198///
199/// This is a lightweight, GPU-friendly structure with direct access to the
200/// three CSR arrays.  For richer functionality use the existing
201/// [`crate::csr::CsrMatrix`].
202#[derive(Debug, Clone)]
203pub struct GpuCsrMatrix {
204    /// Row pointer array of length `n_rows + 1`.
205    pub row_ptr: Vec<usize>,
206    /// Column indices of non-zero entries.
207    pub col_idx: Vec<usize>,
208    /// Values of non-zero entries.
209    pub values: Vec<f64>,
210    /// Number of rows.
211    pub n_rows: usize,
212    /// Number of columns.
213    pub n_cols: usize,
214}
215
216impl GpuCsrMatrix {
217    /// Return the number of stored non-zeros.
218    pub fn n_nnz(&self) -> usize {
219        self.values.len()
220    }
221
222    /// Return the fill density: `nnz / (n_rows * n_cols)`.
223    pub fn density(&self) -> f64 {
224        let total = self.n_rows * self.n_cols;
225        if total == 0 {
226            return 0.0;
227        }
228        self.n_nnz() as f64 / total as f64
229    }
230
231    /// Compute `y = A * x`.
232    ///
233    /// # Errors
234    ///
235    /// Returns [`SparseError::DimensionMismatch`] when `x.len() != n_cols`.
236    pub fn spmv(&self, x: &[f64]) -> SparseResult<Vec<f64>> {
237        if x.len() != self.n_cols {
238            return Err(SparseError::DimensionMismatch {
239                expected: self.n_cols,
240                found: x.len(),
241            });
242        }
243        csr_spmv(
244            &self.row_ptr,
245            &self.col_idx,
246            &self.values,
247            x,
248            &GpuSpMvConfig::default(),
249        )
250    }
251
252    /// Compute the transpose `A^T`.
253    pub fn transpose(&self) -> GpuCsrMatrix {
254        // Build COO for A^T by swapping row and col, then convert.
255        let mut coo = GpuCooMatrix::new(self.n_cols, self.n_rows);
256        for row in 0..self.n_rows {
257            let start = self.row_ptr[row];
258            let end = self.row_ptr[row + 1];
259            for k in start..end {
260                coo.push(self.col_idx[k], row, self.values[k]);
261            }
262        }
263        coo.to_csr()
264    }
265
266    /// Convert to a dense `n_rows × n_cols` matrix.
267    pub fn to_dense(&self) -> Array2<f64> {
268        let mut dense = Array2::zeros((self.n_rows, self.n_cols));
269        for row in 0..self.n_rows {
270            let start = self.row_ptr[row];
271            let end = self.row_ptr[row + 1];
272            for k in start..end {
273                dense[[row, self.col_idx[k]]] = self.values[k];
274            }
275        }
276        dense
277    }
278}
279
280// ============================================================
281// GpuCscMatrix — compressed sparse column
282// ============================================================
283
284/// Sparse matrix in compressed sparse column (CSC) format.
285#[derive(Debug, Clone)]
286pub struct GpuCscMatrix {
287    /// Column pointer array of length `n_cols + 1`.
288    pub col_ptr: Vec<usize>,
289    /// Row indices of non-zero entries.
290    pub row_idx: Vec<usize>,
291    /// Values of non-zero entries.
292    pub values: Vec<f64>,
293    /// Number of rows.
294    pub n_rows: usize,
295    /// Number of columns.
296    pub n_cols: usize,
297}
298
299impl GpuCscMatrix {
300    /// Return the number of stored non-zeros.
301    pub fn n_nnz(&self) -> usize {
302        self.values.len()
303    }
304
305    /// Convert to CSR format.
306    pub fn to_csr(&self) -> GpuCsrMatrix {
307        // Build COO and delegate.
308        let mut coo = GpuCooMatrix::new(self.n_rows, self.n_cols);
309        for col in 0..self.n_cols {
310            let start = self.col_ptr[col];
311            let end = self.col_ptr[col + 1];
312            for k in start..end {
313                coo.push(self.row_idx[k], col, self.values[k]);
314            }
315        }
316        coo.to_csr()
317    }
318}
319
320// ============================================================
321// Tests
322// ============================================================
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn test_coo_to_csr_basic() {
330        // 3×3 matrix
331        // [1 0 2]
332        // [0 3 0]
333        // [4 5 0]
334        let rows = vec![0, 0, 1, 2, 2];
335        let cols = vec![0, 2, 1, 0, 1];
336        let vals = vec![1.0, 2.0, 3.0, 4.0, 5.0];
337        let coo =
338            GpuCooMatrix::from_triplets(3, 3, &rows, &cols, &vals).expect("from_triplets failed");
339        let csr = coo.to_csr();
340        assert_eq!(csr.n_rows, 3);
341        assert_eq!(csr.n_cols, 3);
342        assert_eq!(csr.n_nnz(), 5);
343        assert_eq!(csr.row_ptr, vec![0, 2, 3, 5]);
344    }
345
346    #[test]
347    fn test_coo_duplicate_sum() {
348        // Duplicate (0,0) entries → should be summed
349        let rows = vec![0, 0];
350        let cols = vec![0, 0];
351        let vals = vec![1.0, 2.0];
352        let coo =
353            GpuCooMatrix::from_triplets(2, 2, &rows, &cols, &vals).expect("from_triplets failed");
354        let csr = coo.to_csr();
355        assert_eq!(csr.n_nnz(), 1);
356        assert!((csr.values[0] - 3.0).abs() < 1e-12);
357    }
358
359    #[test]
360    fn test_csr_spmv_identity() {
361        let n = 4;
362        let row_ptr: Vec<usize> = (0..=n).collect();
363        let col_idx: Vec<usize> = (0..n).collect();
364        let values = vec![1.0; n];
365        let mat = GpuCsrMatrix {
366            row_ptr,
367            col_idx,
368            values,
369            n_rows: n,
370            n_cols: n,
371        };
372        let x = vec![1.0, 2.0, 3.0, 4.0];
373        let y = mat.spmv(&x).expect("spmv failed");
374        assert_eq!(y, x);
375    }
376
377    #[test]
378    fn test_csr_transpose_round_trip() {
379        // (A^T)^T = A
380        let rows = vec![0, 0, 1];
381        let cols = vec![0, 1, 2];
382        let vals = vec![1.0, 2.0, 3.0];
383        let coo =
384            GpuCooMatrix::from_triplets(2, 3, &rows, &cols, &vals).expect("from_triplets failed");
385        let csr = coo.to_csr();
386        let csr_tt = csr.transpose().transpose();
387        assert_eq!(csr.row_ptr, csr_tt.row_ptr);
388        assert_eq!(csr.col_idx, csr_tt.col_idx);
389        for (a, b) in csr.values.iter().zip(csr_tt.values.iter()) {
390            assert!((a - b).abs() < 1e-12);
391        }
392    }
393
394    #[test]
395    fn test_coo_from_triplets_sorted_csr() {
396        // Out-of-order triplets → CSR should be sorted
397        let rows = vec![2, 0, 1];
398        let cols = vec![0, 0, 0];
399        let vals = vec![3.0, 1.0, 2.0];
400        let coo =
401            GpuCooMatrix::from_triplets(3, 2, &rows, &cols, &vals).expect("from_triplets failed");
402        let csr = coo.to_csr();
403        // row_ptr must be monotonically non-decreasing
404        for w in csr.row_ptr.windows(2) {
405            assert!(w[0] <= w[1]);
406        }
407    }
408
409    #[test]
410    fn test_coo_to_csc() {
411        let rows = vec![0, 1, 0];
412        let cols = vec![0, 0, 1];
413        let vals = vec![1.0, 2.0, 3.0];
414        let coo =
415            GpuCooMatrix::from_triplets(2, 2, &rows, &cols, &vals).expect("from_triplets failed");
416        let csc = coo.to_csc();
417        assert_eq!(csc.n_nnz(), 3);
418        // col_ptr[1] should be 2 (two entries in col 0)
419        assert_eq!(csc.col_ptr[1], 2);
420    }
421
422    #[test]
423    fn test_density() {
424        let rows = vec![0, 1];
425        let cols = vec![0, 1];
426        let vals = vec![1.0, 1.0];
427        let coo =
428            GpuCooMatrix::from_triplets(2, 2, &rows, &cols, &vals).expect("from_triplets failed");
429        let csr = coo.to_csr();
430        assert!((csr.density() - 0.5).abs() < 1e-12);
431    }
432
433    #[test]
434    fn test_to_dense() {
435        let rows = vec![0, 1];
436        let cols = vec![1, 0];
437        let vals = vec![5.0, 7.0];
438        let coo =
439            GpuCooMatrix::from_triplets(2, 2, &rows, &cols, &vals).expect("from_triplets failed");
440        let csr = coo.to_csr();
441        let dense = csr.to_dense();
442        assert!((dense[[0, 1]] - 5.0).abs() < 1e-12);
443        assert!((dense[[1, 0]] - 7.0).abs() < 1e-12);
444        assert!((dense[[0, 0]]).abs() < 1e-12);
445    }
446
447    #[test]
448    fn test_empty_matrix() {
449        let coo = GpuCooMatrix::new(0, 0);
450        let csr = coo.to_csr();
451        assert_eq!(csr.n_nnz(), 0);
452        assert_eq!(csr.density(), 0.0);
453    }
454}