sklears_utils/array_utils/
sparse.rs

1//! Sparse array operations
2
3use crate::{UtilsError, UtilsResult};
4use scirs2_core::ndarray::{Array2, ArrayView1};
5use scirs2_core::numeric::{Float, Zero};
6
7/// Type alias for sparse matrix addition result
8type SparseAddResult<T> = UtilsResult<(Vec<T>, Vec<(usize, usize)>)>;
9
10/// Sparse-dense dot product with bounds checking
11pub fn safe_sparse_dot<T>(
12    sparse_indices: &[usize],
13    sparse_values: &[T],
14    dense: &ArrayView1<T>,
15) -> UtilsResult<T>
16where
17    T: Float + Clone,
18{
19    if sparse_indices.len() != sparse_values.len() {
20        return Err(UtilsError::ShapeMismatch {
21            expected: vec![sparse_indices.len()],
22            actual: vec![sparse_values.len()],
23        });
24    }
25
26    let mut result = T::zero();
27    for (&idx, &val) in sparse_indices.iter().zip(sparse_values.iter()) {
28        if idx >= dense.len() {
29            return Err(UtilsError::InvalidParameter(format!(
30                "Sparse index {} out of bounds for dense array of length {}",
31                idx,
32                dense.len()
33            )));
34        }
35        result = result + val * dense[idx];
36    }
37
38    Ok(result)
39}
40
41/// Specialized f64 sparse-dense dot product
42pub fn safe_sparse_dot_f64(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> UtilsResult<f64> {
43    if a.len() != b.len() {
44        return Err(UtilsError::ShapeMismatch {
45            expected: vec![a.len()],
46            actual: vec![b.len()],
47        });
48    }
49
50    let mut result = 0.0;
51    for (ai, bi) in a.iter().zip(b.iter()) {
52        result += ai * bi;
53    }
54    Ok(result)
55}
56
57/// Specialized f32 sparse-dense dot product
58pub fn safe_sparse_dot_f32(a: &ArrayView1<f32>, b: &ArrayView1<f32>) -> UtilsResult<f32> {
59    if a.len() != b.len() {
60        return Err(UtilsError::ShapeMismatch {
61            expected: vec![a.len()],
62            actual: vec![b.len()],
63        });
64    }
65
66    let mut result = 0.0;
67    for (ai, bi) in a.iter().zip(b.iter()) {
68        result += ai * bi;
69    }
70    Ok(result)
71}
72
73/// Sparse matrix transpose (CSR format)
74pub fn sparse_transpose<T: Clone + Zero + PartialEq>(
75    values: &[T],
76    row_indices: &[usize],
77    col_indices: &[usize],
78    shape: (usize, usize),
79) -> UtilsResult<(Vec<T>, Vec<usize>, Vec<usize>)> {
80    let (_nrows, ncols) = shape;
81
82    // Count non-zeros per column for the transposed matrix
83    let mut col_counts = vec![0; ncols];
84    for &col_idx in col_indices {
85        if col_idx >= ncols {
86            return Err(UtilsError::InvalidParameter(format!(
87                "Column index {} exceeds matrix width {}",
88                col_idx, ncols
89            )));
90        }
91        col_counts[col_idx] += 1;
92    }
93
94    // Build transpose in COO format first
95    let mut transpose_values = Vec::with_capacity(values.len());
96    let mut transpose_rows = Vec::with_capacity(values.len());
97    let mut transpose_cols = Vec::with_capacity(values.len());
98
99    for (idx, (value, &row_idx)) in values.iter().zip(row_indices.iter()).enumerate() {
100        let col_idx = col_indices[idx];
101        transpose_values.push(value.clone());
102        transpose_rows.push(col_idx); // Original column becomes row
103        transpose_cols.push(row_idx); // Original row becomes column
104    }
105
106    Ok((transpose_values, transpose_rows, transpose_cols))
107}
108
109/// Sparse matrix addition
110pub fn sparse_add<T>(
111    a_values: &[T],
112    a_indices: &[(usize, usize)],
113    b_values: &[T],
114    b_indices: &[(usize, usize)],
115) -> SparseAddResult<T>
116where
117    T: Float + Clone,
118{
119    let mut result_map = std::collections::HashMap::new();
120
121    // Add values from matrix A
122    for (idx, &value) in a_indices.iter().zip(a_values.iter()) {
123        *result_map.entry(idx).or_insert(T::zero()) =
124            *result_map.get(&idx).unwrap_or(&T::zero()) + value;
125    }
126
127    // Add values from matrix B
128    for (idx, &value) in b_indices.iter().zip(b_values.iter()) {
129        *result_map.entry(idx).or_insert(T::zero()) =
130            *result_map.get(&idx).unwrap_or(&T::zero()) + value;
131    }
132
133    // Convert back to arrays, filtering out zeros
134    let mut result_values = Vec::new();
135    let mut result_indices = Vec::new();
136
137    for (idx, value) in result_map {
138        if value.abs() > T::from(1e-12).unwrap() {
139            // Filter near-zero values
140            result_values.push(value);
141            result_indices.push(*idx);
142        }
143    }
144
145    Ok((result_values, result_indices))
146}
147
148/// Create sparse diagonal matrix
149pub fn sparse_diag<T: Clone + Zero>(
150    diagonal: &[T],
151) -> UtilsResult<(Vec<T>, Vec<usize>, Vec<usize>)> {
152    let _n = diagonal.len();
153    let mut values = Vec::new();
154    let mut row_indices = Vec::new();
155    let mut col_indices = Vec::new();
156
157    for (i, value) in diagonal.iter().enumerate() {
158        if !value.is_zero() {
159            values.push(value.clone());
160            row_indices.push(i);
161            col_indices.push(i);
162        }
163    }
164
165    Ok((values, row_indices, col_indices))
166}
167
168/// Convert sparse to dense if density exceeds threshold
169pub fn densify_threshold<T>(
170    values: &[T],
171    indices: &[(usize, usize)],
172    shape: (usize, usize),
173    threshold: f64,
174) -> UtilsResult<Option<Array2<T>>>
175where
176    T: Clone + Zero,
177{
178    let (nrows, ncols) = shape;
179    let total_elements = nrows * ncols;
180    let nnz = values.len();
181
182    let density = nnz as f64 / total_elements as f64;
183
184    if density > threshold {
185        let mut dense = Array2::zeros(shape);
186
187        for (idx, value) in indices.iter().zip(values.iter()) {
188            let (row, col) = idx;
189            if *row >= nrows || *col >= ncols {
190                return Err(UtilsError::InvalidParameter(format!(
191                    "Index ({}, {}) out of bounds for shape ({}, {})",
192                    row, col, nrows, ncols
193                )));
194            }
195            dense[[*row, *col]] = value.clone();
196        }
197
198        Ok(Some(dense))
199    } else {
200        Ok(None)
201    }
202}