sprs_rssn/sparse.rs
1use crate::array_backend::Array2;
2use crate::errors::StructureError;
3use crate::indexing::SpIndex;
4use crate::IndPtrBase;
5use std::ops::Deref;
6
7#[cfg(feature = "serde")]
8mod serde_traits;
9#[cfg(feature = "serde")]
10use serde_traits::{CsMatBaseShadow, CsVecBaseShadow, Deserialize, Serialize};
11
12pub use self::csmat::CompressedStorage;
13
14/// Compressed matrix in the CSR or CSC format, with sorted indices.
15///
16/// This sparse matrix format is the preferred format for performing arithmetic
17/// operations. Constructing a sparse matrix directly in this format requires
18/// a deep knowledge of its internals. For easier matrix construction, the
19/// [triplet format](struct.TriMatBase.html) is preferred.
20///
21/// The `CsMatBase` type is parameterized by the scalar type `N`, the indexing
22/// type `I`, the indexing storage backend types `IptrStorage` and `IndStorage`,
23/// and the value storage backend type `DataStorage`. Convenient aliases are
24/// available to specify frequent variants: [`CsMat`] refers to a sparse matrix
25/// that owns its data, similar to `Vec<T>`; [`CsMatView`] refers to a sparse matrix
26/// that borrows its data, similar to `& [T]`; and [`CsMatViewMut`] refers to a sparse
27/// matrix borrowing its data, with a mutable borrow for its values. No mutable
28/// borrow is allowed for the structure of the matrix, allowing the invariants
29/// to be preserved.
30///
31/// Additionaly, the type aliases [`CsMatI`], [`CsMatViewI`] and
32/// [`CsMatViewMutI`] can be used to choose an index type different from the
33/// default `usize`.
34///
35/// [`CsMat`]: type.CsMat.html
36/// [`CsMatView`]: type.CsMatView.html
37/// [`CsMatViewMut`]: type.CsMatViewMut.html
38/// [`CsMatI`]: type.CsMatI.html
39/// [`CsMatViewI`]: type.CsMatViewI.html
40/// [`CsMatViewMutI`]: type.CsMatViewMutI.html
41///
42/// ## Storage format
43///
44/// In the compressed storage format, the non-zero values of a sparse matrix
45/// are stored as the row and column location of the non-zero values, with
46/// a compression along the rows (CSR) or columns (CSC) indices. The dimension
47/// along which the storage is compressed is referred to as the *outer dimension*,
48/// the other dimension is called the *inner dimension*. For clarity, the
49/// remaining explanation will assume a CSR matrix, but the information stands
50/// for CSC matrices as well.
51///
52/// ### Indptr
53///
54/// An index pointer array `indptr` of size corresponding to the number of rows
55/// stores the cumulative sum of non-zero elements for each row. For instance,
56/// the number of non-zero elements of the i-th row can be obtained by computing
57/// `indptr[i + 1] - indptr[i]`. The total number of non-zero elements is thus
58/// `nnz = indptr[nb_rows + 1]`. This index pointer array can then be used to
59/// efficiently index the `indices` and `data` array, which respectively contain
60/// the column indices and the values of the non-zero elements.
61///
62/// ### Indices and data
63///
64/// The non-zero locations and values are stored in arrays of size `nnz`, `indices`
65/// and `data`. For row `i`, the non-zeros are located in the slices
66/// `indices[indptr[i]..indptr[i+1]]` and `data[indptr[i]..indptr[i+1]]`. We
67/// require and enforce sorted indices for each row.
68///
69/// ## Construction
70///
71/// A sparse matrix can be directly constructed by providing its index pointer,
72/// indices and data arrays. The coherence of the provided structure is then
73/// verified.
74///
75/// For situations where the compressed structure is hard to figure out up front,
76/// the [triplet format](struct.TriMatBase.html) can be used. A matrix in the
77/// triplet format can then be efficiently converted to a `CsMat`.
78///
79/// Alternately, a sparse matrix can be constructed from other sparse matrices
80/// using [`vstack`], [`hstack`] or [`bmat`].
81///
82/// [`vstack`]: fn.vstack.html
83/// [`hstack`]: fn.hstack.html
84/// [`bmat`]: fn.bmat.html
85
86#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
87#[cfg_attr(feature = "serde", derive(Deserialize))]
88#[cfg_attr(
89 feature = "serde",
90 serde(
91 try_from = "CsMatBaseShadow<N, I, IptrStorage, IndStorage, DataStorage, Iptr>"
92 )
93)]
94pub struct CsMatBase<N, I, IptrStorage, IndStorage, DataStorage, Iptr = I>
95where
96 I: SpIndex,
97 Iptr: SpIndex,
98 IptrStorage: Deref<Target = [Iptr]>,
99 IndStorage: Deref<Target = [I]>,
100 DataStorage: Deref<Target = [N]>,
101{
102 storage: CompressedStorage,
103 nrows: usize,
104 ncols: usize,
105 #[cfg_attr(feature = "serde", serde(flatten))]
106 indptr: IndPtrBase<Iptr, IptrStorage>,
107 indices: IndStorage,
108 data: DataStorage,
109}
110
111pub type CsMatI<N, I, Iptr = I> =
112 CsMatBase<N, I, Vec<Iptr>, Vec<I>, Vec<N>, Iptr>;
113pub type CsMatViewI<'a, N, I, Iptr = I> =
114 CsMatBase<N, I, &'a [Iptr], &'a [I], &'a [N], Iptr>;
115pub type CsMatViewMutI<'a, N, I, Iptr = I> =
116 CsMatBase<N, I, &'a [Iptr], &'a [I], &'a mut [N], Iptr>;
117pub type CsMatVecView_<'a, N, I, Iptr = I> =
118 CsMatBase<N, I, Array2<Iptr>, &'a [I], &'a [N], Iptr>;
119
120pub type CsMat<N> = CsMatI<N, usize>;
121pub type CsMatView<'a, N> = CsMatViewI<'a, N, usize>;
122pub type CsMatViewMut<'a, N> = CsMatViewMutI<'a, N, usize>;
123// FIXME: a fixed size array would be better, but no Deref impl
124pub type CsMatVecView<'a, N> = CsMatVecView_<'a, N, usize>;
125
126pub type CsStructureViewI<'a, I, Iptr = I> = CsMatViewI<'a, (), I, Iptr>;
127pub type CsStructureView<'a> = CsStructureViewI<'a, usize>;
128pub type CsStructureI<I, Iptr = I> = CsMatI<(), I, Iptr>;
129pub type CsStructure = CsStructureI<usize>;
130
131/// A sparse vector, storing the indices of its non-zero data.
132///
133/// A `CsVec` represents a sparse vector by storing a sorted `indices()` array
134/// containing the locations of the non-zero values and a `data()` array
135/// containing the corresponding values. The format is compatible with `CsMat`,
136/// ie a `CsVec` can represent the row of a CSR matrix without any copying.
137///
138/// Similar to [`CsMat`] and [`TriMat`], the `CsVecBase` type is parameterized
139/// over the indexing storage backend `IStorage` and the data storage backend
140/// `DStorage`. Type aliases are provided for common cases: [`CsVec`] represents
141/// a sparse vector owning its data, with `Vec`s as storage backends;
142/// [`CsVecView`] represents a sparse vector borrowing its data, using slices
143/// as storage backends; and [`CsVecViewMut`] represents a sparse vector that
144/// mutably borrows its data (but immutably borrows its indices).
145///
146/// Additionaly, the type aliases [`CsVecI`], [`CsVecViewI`], and
147/// [`CsVecViewMutI`] can be used to choose an index type different from the
148/// default `usize`.
149///
150/// [`CsMat`]: struct.CsMatBase.html
151/// [`TriMat`]: struct.TriMatBase.html
152/// [`CsVec`]: type.CsVec.html
153/// [`CsVecView`]: type.CsVecView.html
154/// [`CsVecViewMut`]: type.CsVecViewMut.html
155/// [`CsVecI`]: type.CsVecI.html
156/// [`CsVecViewI`]: type.CsVecViewI.html
157/// [`CsVecViewMutI`]: type.CsVecViewMutI.html
158
159#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
160#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
161#[cfg_attr(
162 feature = "serde",
163 serde(try_from = "CsVecBaseShadow<IStorage, DStorage, N, I>")
164)]
165pub struct CsVecBase<IStorage, DStorage, N, I: SpIndex = usize>
166where
167 IStorage: Deref<Target = [I]>,
168 DStorage: Deref<Target = [N]>,
169{
170 dim: usize,
171 indices: IStorage,
172 data: DStorage,
173}
174
175pub type CsVecI<N, I = usize> = CsVecBase<Vec<I>, Vec<N>, N, I>;
176pub type CsVecViewI<'a, N, I = usize> = CsVecBase<&'a [I], &'a [N], N, I>;
177pub type CsVecViewMutI<'a, N, I = usize> =
178 CsVecBase<&'a [I], &'a mut [N], N, I>;
179
180pub type CsVecView<'a, N> = CsVecViewI<'a, N>;
181pub type CsVecViewMut<'a, N> = CsVecViewMutI<'a, N>;
182pub type CsVec<N> = CsVecI<N>;
183
184/// Sparse matrix in the triplet format.
185///
186/// Sparse matrices in the triplet format use three arrays of equal sizes (accessible through the
187/// methods [`row_inds`], [`col_inds`], [`data`]), the first one
188/// storing the row indices of non-zero values, the second storing the
189/// corresponding column indices and the last array storing the corresponding
190/// scalar value. If a non-zero location is repeated in the arrays, the
191/// non-zero value is taken as the sum of the corresponding scalar entries.
192///
193/// [`row_inds`]: struct.TriMatBase.html#method.row_inds
194/// [`col_inds`]: struct.TriMatBase.html#method.col_inds
195/// [`data`]: struct.TriMatBase.html#method.data
196///
197/// This format is useful for iteratively building a sparse matrix, since the
198/// various non-zero entries can be specified in any order, or even partially
199/// as is common in physics with partial derivatives equations.
200///
201/// This format cannot be used for arithmetic operations. Arithmetic operations
202/// are more efficient in the [compressed format](struct.CsMatBase.html).
203/// A matrix in the triplet format can be converted to the compressed format
204/// using the methods [`to_csc`] and [`to_csr`].
205///
206/// [`to_csc`]: struct.TriMatBase.html#method.to_csc
207/// [`to_csr`]: struct.TriMatBase.html#method.to_csr
208///
209/// The `TriMatBase` type is parameterized by the storage type for the row and
210/// column indices, `IStorage`, and by the storage type for the non-zero values
211/// `DStorage`. Convenient aliases are availaible to specify frequent variant:
212/// [`TriMat`] refers to a triplet matrix owning the storage of its indices and
213/// and values, [`TriMatView`] refers to a triplet matrix with slices to store
214/// its indices and values, while [`TriMatViewMut`] refers to a a triplet matrix
215/// using mutable slices.
216///
217/// Additionaly, the type aliases [`TriMatI`], [`TriMatViewI`] and
218/// [`TriMatViewMutI`] can be used to choose an index type different from the
219/// default `usize`.
220///
221/// [`TriMat`]: type.TriMat.html
222/// [`TriMatView`]: type.TriMatView.html
223/// [`TriMatViewMut`]: type.TriMatViewMut.html
224/// [`TriMatI`]: type.TriMatI.html
225/// [`TriMatViewI`]: type.TriMatViewI.html
226/// [`TriMatViewMutI`]: type.TriMatViewMutI.html
227#[derive(PartialEq, Eq, Debug, Hash)]
228pub struct TriMatBase<IStorage, DStorage> {
229 rows: usize,
230 cols: usize,
231 row_inds: IStorage,
232 col_inds: IStorage,
233 data: DStorage,
234}
235
236pub type TriMatI<N, I> = TriMatBase<Vec<I>, Vec<N>>;
237pub type TriMatViewI<'a, N, I> = TriMatBase<&'a [I], &'a [N]>;
238pub type TriMatViewMutI<'a, N, I> = TriMatBase<&'a mut [I], &'a mut [N]>;
239
240pub type TriMat<N> = TriMatI<N, usize>;
241pub type TriMatView<'a, N> = TriMatViewI<'a, N, usize>;
242pub type TriMatViewMut<'a, N> = TriMatViewMutI<'a, N, usize>;
243
244/// An iterator over elements of a sparse matrix, in the triplet format
245///
246/// The dataypes RI, CI, and DI are iterators yielding the row, column and
247/// values of non-zero entries.
248///
249/// As in `TriMat`, no order guarantee is provided and the same location can
250/// appear multiple times. The non-zero value is then considered as the sum
251/// of all the entries sharing its location.
252#[derive(PartialEq, Eq, Debug, Clone)]
253pub struct TriMatIter<RI, CI, DI> {
254 rows: usize,
255 cols: usize,
256 nnz: usize,
257 row_inds: RI,
258 col_inds: CI,
259 data: DI,
260}
261
262mod prelude {
263 #[allow(unused_imports)]
264 pub use super::{
265 CsMat, CsMatBase, CsMatI, CsMatVecView, CsMatVecView_, CsMatView,
266 CsMatViewI, CsMatViewMut, CsMatViewMutI, CsStructure, CsStructureI,
267 CsStructureView, CsStructureViewI, CsVec, CsVecBase, CsVecI, CsVecView,
268 CsVecViewI, CsVecViewMut, CsVecViewMutI, SparseMat, TriMat, TriMatBase,
269 TriMatI, TriMatIter, TriMatView, TriMatViewI, TriMatViewMut,
270 TriMatViewMutI,
271 };
272}
273
274/// A trait for common members of sparse matrices
275pub trait SparseMat {
276 /// The number of rows of this matrix
277 fn rows(&self) -> usize;
278
279 /// The number of columns of this matrix
280 fn cols(&self) -> usize;
281
282 /// The number of nonzeros of this matrix
283 fn nnz(&self) -> usize;
284}
285
286pub(crate) mod utils {
287 use super::*;
288 use ndarray::Axis;
289 use std::convert::TryInto;
290
291 /// Check the structure of `CsMat` components
292 /// This will ensure that:
293 /// * indptr is of length `outer_dim() + 1`
294 /// * indices and data have the same length, `nnz == indptr[outer_dims()]`
295 /// * indptr is sorted
296 /// * indptr values do not exceed [`usize::MAX`](usize::MAX)`/ 2`, as that would mean
297 /// indices and indptr would take more space than the addressable memory
298 /// * indices is sorted for each outer slice
299 /// * indices are lower than `inner_dims()`
300 pub(crate) fn check_compressed_structure<I: SpIndex, Iptr: SpIndex>(
301 inner: usize,
302 outer: usize,
303 indptr: &[Iptr],
304 indices: &[I],
305 ) -> Result<(), StructureError> {
306 let indptr =
307 crate::IndPtrView::new_checked(indptr).map_err(|(_, e)| e)?;
308 if indptr.len() != outer + 1 {
309 return Err(StructureError::SizeMismatch(
310 "Indptr length does not match dimension",
311 ));
312 }
313 // Make sure Iptr and I can represent all types for this size
314 if I::from(inner).is_none() {
315 return Err(StructureError::OutOfRange(
316 "Index type not large enough for this matrix",
317 ));
318 }
319 if Iptr::from(outer + 1).is_none() {
320 return Err(StructureError::OutOfRange(
321 "Iptr type not large enough for this matrix",
322 ));
323 }
324 // Make sure indices can be converted to usize
325 // this could happen if index is negative for sized types
326 for i in indices.iter() {
327 if i.try_index().is_none() {
328 return Err(StructureError::OutOfRange(
329 "Indices value out of range of usize",
330 ));
331 }
332 }
333 let nnz = indices.len();
334 if nnz != indptr.nnz() {
335 return Err(StructureError::SizeMismatch(
336 "Indices length and inpdtr's nnz do not match",
337 ));
338 }
339
340 // check that the indices are sorted for each row
341 for range in indptr.iter_outer_sz() {
342 let indices = &indices[range];
343 // Indices must be monotonically increasing
344 if !sorted_indices(indices) {
345 return Err(StructureError::Unsorted("Indices are not sorted"));
346 }
347 // Last index (which is the largest) must be in bounds
348 if let Some(i) = indices.last() {
349 if i.to_usize().unwrap() >= inner {
350 return Err(StructureError::OutOfRange(
351 "Indice is larger than inner dimension",
352 ));
353 }
354 }
355 }
356
357 Ok(())
358 }
359
360 pub fn sorted_indices<I: SpIndex>(indices: &[I]) -> bool {
361 for w in indices.windows(2) {
362 // w will always be a size 2
363 let &[i1, i2]: &[I; 2] = w.try_into().unwrap();
364 if i2 <= i1 {
365 return false;
366 }
367 }
368 true
369 }
370
371 pub fn sort_indices_data_slices<N: Clone, I: SpIndex>(
372 indices: &mut [I],
373 data: &mut [N],
374 buf: &mut Vec<(I, N)>,
375 ) {
376 let len = indices.len();
377 assert_eq!(len, data.len());
378 let indices = &mut indices[..len];
379 let data = &mut data[..len];
380 buf.clear();
381 buf.reserve_exact(len);
382 for (i, v) in indices.iter().zip(data.iter()) {
383 buf.push((*i, v.clone()));
384 }
385
386 buf.sort_unstable_by_key(|x| x.0);
387
388 for ((i, x), (ind, v)) in buf
389 .iter()
390 .cloned()
391 .zip(indices.iter_mut().zip(data.iter_mut()))
392 {
393 *ind = i;
394 *v = x;
395 }
396 }
397 /// Return the axis which varies the fastest. For a
398 /// `C`-style matrix this will be `Axis(1)`, for an
399 /// `F`-style matrix this will be `Axis(0)`
400 pub(crate) fn fastest_axis<T>(mat: ndarray::ArrayView2<T>) -> Axis {
401 if mat.strides()[1] > mat.strides()[0] {
402 Axis(0)
403 } else {
404 Axis(1)
405 }
406 }
407 pub(crate) fn slowest_axis<T>(mat: ndarray::ArrayView2<T>) -> Axis {
408 match fastest_axis(mat) {
409 Axis(1) => Axis(0),
410 Axis(0) => Axis(1),
411 _ => unreachable!(),
412 }
413 }
414}
415
416pub mod binop;
417pub mod compressed;
418pub mod construct;
419pub mod csmat;
420pub mod indptr;
421pub mod kronecker;
422pub mod linalg;
423pub mod permutation;
424pub mod prod;
425pub mod slicing;
426pub mod smmp;
427pub mod special_mats;
428pub mod symmetric;
429pub mod to_dense;
430pub mod triplet;
431pub mod triplet_iter;
432pub mod vec;
433pub mod visu;
434
435#[cfg(test)]
436mod test {
437
438 use super::utils;
439 #[test]
440 fn test_sort_indices() {
441 let mut idx: Vec<usize> = vec![4, 1, 6, 2];
442 let mut dat: Vec<i32> = vec![4, -1, 2, -3];
443 let mut buf: Vec<(usize, i32)> = Vec::new();
444 utils::sort_indices_data_slices(&mut idx[..], &mut dat[..], &mut buf);
445 assert_eq!(idx, vec![1, 2, 4, 6]);
446 assert_eq!(dat, vec![-1, -3, 4, 2]);
447 }
448
449 #[test]
450 fn test_sorted_indices() {
451 use utils::sorted_indices;
452 assert!(sorted_indices(&[1, 2, 3]));
453 assert!(sorted_indices(&[1, 2, 8]));
454 assert!(!sorted_indices(&[1, 1, 3]));
455 assert!(!sorted_indices(&[2, 1, 3]));
456 assert!(sorted_indices(&[1, 2]));
457 assert!(sorted_indices(&[1]));
458 }
459
460 #[test]
461 fn test_fastest_axis() {
462 use ndarray::{arr2, s, Array2, Axis, ShapeBuilder};
463 use utils::fastest_axis;
464 let arr = arr2(&[[1, 2], [3, 4]]);
465 assert_eq!(fastest_axis(arr.view()), Axis(1));
466
467 let arr = Array2::<i32>::zeros((10, 9));
468 assert_eq!(fastest_axis(arr.view()), Axis(1));
469 let arrslice = arr.slice(s![..;2, ..;3]);
470 assert_eq!(fastest_axis(arrslice.view()), Axis(1));
471
472 let arr = Array2::<i32>::zeros((10, 9).f());
473 assert_eq!(fastest_axis(arr.view()), Axis(0));
474 let arrslice = arr.slice(s![..;2, ..;3]);
475 assert_eq!(fastest_axis(arrslice.view()), Axis(0));
476 }
477}