sprs_rssn/
sparse.rs

1use crate::array_backend::Array2;
2use crate::errors::StructureError;
3use crate::indexing::SpIndex;
4use crate::IndPtrBase;
5use std::ops::Deref;
6
7#[cfg(feature = "serde")]
8mod serde_traits;
9#[cfg(feature = "serde")]
10use serde_traits::{CsMatBaseShadow, CsVecBaseShadow, Deserialize, Serialize};
11
12pub use self::csmat::CompressedStorage;
13
14/// Compressed matrix in the CSR or CSC format, with sorted indices.
15///
16/// This sparse matrix format is the preferred format for performing arithmetic
17/// operations. Constructing a sparse matrix directly in this format requires
18/// a deep knowledge of its internals. For easier matrix construction, the
19/// [triplet format](struct.TriMatBase.html) is preferred.
20///
21/// The `CsMatBase` type is parameterized by the scalar type `N`, the indexing
22/// type `I`, the indexing storage backend types `IptrStorage` and `IndStorage`,
23/// and the value storage backend type `DataStorage`. Convenient aliases are
24/// available to specify frequent variants: [`CsMat`] refers to a sparse matrix
25/// that owns its data, similar to `Vec<T>`; [`CsMatView`] refers to a sparse matrix
26/// that borrows its data, similar to `& [T]`; and [`CsMatViewMut`] refers to a sparse
27/// matrix borrowing its data, with a mutable borrow for its values. No mutable
28/// borrow is allowed for the structure of the matrix, allowing the invariants
29/// to be preserved.
30///
31/// Additionaly, the type aliases [`CsMatI`], [`CsMatViewI`] and
32/// [`CsMatViewMutI`] can be used to choose an index type different from the
33/// default `usize`.
34///
35/// [`CsMat`]: type.CsMat.html
36/// [`CsMatView`]: type.CsMatView.html
37/// [`CsMatViewMut`]: type.CsMatViewMut.html
38/// [`CsMatI`]: type.CsMatI.html
39/// [`CsMatViewI`]: type.CsMatViewI.html
40/// [`CsMatViewMutI`]: type.CsMatViewMutI.html
41///
42/// ## Storage format
43///
44/// In the compressed storage format, the non-zero values of a sparse matrix
45/// are stored as the row and column location of the non-zero values, with
46/// a compression along the rows (CSR) or columns (CSC) indices. The dimension
47/// along which the storage is compressed is referred to as the *outer dimension*,
48/// the other dimension is called the *inner dimension*. For clarity, the
49/// remaining explanation will assume a CSR matrix, but the information stands
50/// for CSC matrices as well.
51///
52/// ### Indptr
53///
54/// An index pointer array `indptr` of size corresponding to the number of rows
55/// stores the cumulative sum of non-zero elements for each row. For instance,
56/// the number of non-zero elements of the i-th row can be obtained by computing
57/// `indptr[i + 1] - indptr[i]`. The total number of non-zero elements is thus
58/// `nnz = indptr[nb_rows + 1]`. This index pointer array can then be used to
59/// efficiently index the `indices` and `data` array, which respectively contain
60/// the column indices and the values of the non-zero elements.
61///
62/// ### Indices and data
63///
64/// The non-zero locations and values are stored in arrays of size `nnz`, `indices`
65/// and `data`. For row `i`, the non-zeros are located in the slices
66/// `indices[indptr[i]..indptr[i+1]]` and `data[indptr[i]..indptr[i+1]]`. We
67/// require and enforce sorted indices for each row.
68///
69/// ## Construction
70///
71/// A sparse matrix can be directly constructed by providing its index pointer,
72/// indices and data arrays. The coherence of the provided structure is then
73/// verified.
74///
75/// For situations where the compressed structure is hard to figure out up front,
76/// the [triplet format](struct.TriMatBase.html) can be used. A matrix in the
77/// triplet format can then be efficiently converted to a `CsMat`.
78///
79/// Alternately, a sparse matrix can be constructed from other sparse matrices
80/// using [`vstack`], [`hstack`] or [`bmat`].
81///
82/// [`vstack`]: fn.vstack.html
83/// [`hstack`]: fn.hstack.html
84/// [`bmat`]: fn.bmat.html
85
86#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
87#[cfg_attr(feature = "serde", derive(Deserialize))]
88#[cfg_attr(
89    feature = "serde",
90    serde(
91        try_from = "CsMatBaseShadow<N, I, IptrStorage, IndStorage, DataStorage, Iptr>"
92    )
93)]
94pub struct CsMatBase<N, I, IptrStorage, IndStorage, DataStorage, Iptr = I>
95where
96    I: SpIndex,
97    Iptr: SpIndex,
98    IptrStorage: Deref<Target = [Iptr]>,
99    IndStorage: Deref<Target = [I]>,
100    DataStorage: Deref<Target = [N]>,
101{
102    storage: CompressedStorage,
103    nrows: usize,
104    ncols: usize,
105    #[cfg_attr(feature = "serde", serde(flatten))]
106    indptr: IndPtrBase<Iptr, IptrStorage>,
107    indices: IndStorage,
108    data: DataStorage,
109}
110
111pub type CsMatI<N, I, Iptr = I> =
112    CsMatBase<N, I, Vec<Iptr>, Vec<I>, Vec<N>, Iptr>;
113pub type CsMatViewI<'a, N, I, Iptr = I> =
114    CsMatBase<N, I, &'a [Iptr], &'a [I], &'a [N], Iptr>;
115pub type CsMatViewMutI<'a, N, I, Iptr = I> =
116    CsMatBase<N, I, &'a [Iptr], &'a [I], &'a mut [N], Iptr>;
117pub type CsMatVecView_<'a, N, I, Iptr = I> =
118    CsMatBase<N, I, Array2<Iptr>, &'a [I], &'a [N], Iptr>;
119
120pub type CsMat<N> = CsMatI<N, usize>;
121pub type CsMatView<'a, N> = CsMatViewI<'a, N, usize>;
122pub type CsMatViewMut<'a, N> = CsMatViewMutI<'a, N, usize>;
123// FIXME: a fixed size array would be better, but no Deref impl
124pub type CsMatVecView<'a, N> = CsMatVecView_<'a, N, usize>;
125
126pub type CsStructureViewI<'a, I, Iptr = I> = CsMatViewI<'a, (), I, Iptr>;
127pub type CsStructureView<'a> = CsStructureViewI<'a, usize>;
128pub type CsStructureI<I, Iptr = I> = CsMatI<(), I, Iptr>;
129pub type CsStructure = CsStructureI<usize>;
130
131/// A sparse vector, storing the indices of its non-zero data.
132///
133/// A `CsVec` represents a sparse vector by storing a sorted `indices()` array
134/// containing the locations of the non-zero values and a `data()` array
135/// containing the corresponding values. The format is compatible with `CsMat`,
136/// ie a `CsVec` can represent the row of a CSR matrix without any copying.
137///
138/// Similar to [`CsMat`] and [`TriMat`], the `CsVecBase` type is parameterized
139/// over the indexing storage backend `IStorage` and the data storage backend
140/// `DStorage`. Type aliases are provided for common cases: [`CsVec`] represents
141/// a sparse vector owning its data, with `Vec`s as storage backends;
142/// [`CsVecView`] represents a sparse vector borrowing its data, using slices
143/// as storage backends; and [`CsVecViewMut`] represents a sparse vector that
144/// mutably borrows its data (but immutably borrows its indices).
145///
146/// Additionaly, the type aliases [`CsVecI`], [`CsVecViewI`], and
147/// [`CsVecViewMutI`] can be used to choose an index type different from the
148/// default `usize`.
149///
150/// [`CsMat`]: struct.CsMatBase.html
151/// [`TriMat`]: struct.TriMatBase.html
152/// [`CsVec`]: type.CsVec.html
153/// [`CsVecView`]: type.CsVecView.html
154/// [`CsVecViewMut`]: type.CsVecViewMut.html
155/// [`CsVecI`]: type.CsVecI.html
156/// [`CsVecViewI`]: type.CsVecViewI.html
157/// [`CsVecViewMutI`]: type.CsVecViewMutI.html
158
159#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
160#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
161#[cfg_attr(
162    feature = "serde",
163    serde(try_from = "CsVecBaseShadow<IStorage, DStorage, N, I>")
164)]
165pub struct CsVecBase<IStorage, DStorage, N, I: SpIndex = usize>
166where
167    IStorage: Deref<Target = [I]>,
168    DStorage: Deref<Target = [N]>,
169{
170    dim: usize,
171    indices: IStorage,
172    data: DStorage,
173}
174
175pub type CsVecI<N, I = usize> = CsVecBase<Vec<I>, Vec<N>, N, I>;
176pub type CsVecViewI<'a, N, I = usize> = CsVecBase<&'a [I], &'a [N], N, I>;
177pub type CsVecViewMutI<'a, N, I = usize> =
178    CsVecBase<&'a [I], &'a mut [N], N, I>;
179
180pub type CsVecView<'a, N> = CsVecViewI<'a, N>;
181pub type CsVecViewMut<'a, N> = CsVecViewMutI<'a, N>;
182pub type CsVec<N> = CsVecI<N>;
183
184/// Sparse matrix in the triplet format.
185///
186/// Sparse matrices in the triplet format use three arrays of equal sizes (accessible through the
187/// methods [`row_inds`], [`col_inds`], [`data`]), the first one
188/// storing the row indices of non-zero values, the second storing the
189/// corresponding column indices and the last array storing the corresponding
190/// scalar value. If a non-zero location is repeated in the arrays, the
191/// non-zero value is taken as the sum of the corresponding scalar entries.
192///
193/// [`row_inds`]: struct.TriMatBase.html#method.row_inds
194/// [`col_inds`]: struct.TriMatBase.html#method.col_inds
195/// [`data`]: struct.TriMatBase.html#method.data
196///
197/// This format is useful for iteratively building a sparse matrix, since the
198/// various non-zero entries can be specified in any order, or even partially
199/// as is common in physics with partial derivatives equations.
200///
201/// This format cannot be used for arithmetic operations. Arithmetic operations
202/// are more efficient in the [compressed format](struct.CsMatBase.html).
203/// A matrix in the triplet format can be converted to the compressed format
204/// using the methods [`to_csc`] and [`to_csr`].
205///
206/// [`to_csc`]: struct.TriMatBase.html#method.to_csc
207/// [`to_csr`]: struct.TriMatBase.html#method.to_csr
208///
209/// The `TriMatBase` type is parameterized by the storage type for the row and
210/// column indices, `IStorage`, and by the storage type for the non-zero values
211/// `DStorage`. Convenient aliases are availaible to specify frequent variant:
212/// [`TriMat`] refers to a triplet matrix owning the storage of its indices and
213/// and values, [`TriMatView`] refers to a triplet matrix with slices to store
214/// its indices and values, while [`TriMatViewMut`] refers to a a triplet matrix
215/// using mutable slices.
216///
217/// Additionaly, the type aliases [`TriMatI`], [`TriMatViewI`] and
218/// [`TriMatViewMutI`] can be used to choose an index type different from the
219/// default `usize`.
220///
221/// [`TriMat`]: type.TriMat.html
222/// [`TriMatView`]: type.TriMatView.html
223/// [`TriMatViewMut`]: type.TriMatViewMut.html
224/// [`TriMatI`]: type.TriMatI.html
225/// [`TriMatViewI`]: type.TriMatViewI.html
226/// [`TriMatViewMutI`]: type.TriMatViewMutI.html
227#[derive(PartialEq, Eq, Debug, Hash)]
228pub struct TriMatBase<IStorage, DStorage> {
229    rows: usize,
230    cols: usize,
231    row_inds: IStorage,
232    col_inds: IStorage,
233    data: DStorage,
234}
235
236pub type TriMatI<N, I> = TriMatBase<Vec<I>, Vec<N>>;
237pub type TriMatViewI<'a, N, I> = TriMatBase<&'a [I], &'a [N]>;
238pub type TriMatViewMutI<'a, N, I> = TriMatBase<&'a mut [I], &'a mut [N]>;
239
240pub type TriMat<N> = TriMatI<N, usize>;
241pub type TriMatView<'a, N> = TriMatViewI<'a, N, usize>;
242pub type TriMatViewMut<'a, N> = TriMatViewMutI<'a, N, usize>;
243
244/// An iterator over elements of a sparse matrix, in the triplet format
245///
246/// The dataypes RI, CI, and DI are iterators yielding the row, column and
247/// values of non-zero entries.
248///
249/// As in `TriMat`, no order guarantee is provided and the same location can
250/// appear multiple times. The non-zero value is then considered as the sum
251/// of all the entries sharing its location.
252#[derive(PartialEq, Eq, Debug, Clone)]
253pub struct TriMatIter<RI, CI, DI> {
254    rows: usize,
255    cols: usize,
256    nnz: usize,
257    row_inds: RI,
258    col_inds: CI,
259    data: DI,
260}
261
262mod prelude {
263    #[allow(unused_imports)]
264    pub use super::{
265        CsMat, CsMatBase, CsMatI, CsMatVecView, CsMatVecView_, CsMatView,
266        CsMatViewI, CsMatViewMut, CsMatViewMutI, CsStructure, CsStructureI,
267        CsStructureView, CsStructureViewI, CsVec, CsVecBase, CsVecI, CsVecView,
268        CsVecViewI, CsVecViewMut, CsVecViewMutI, SparseMat, TriMat, TriMatBase,
269        TriMatI, TriMatIter, TriMatView, TriMatViewI, TriMatViewMut,
270        TriMatViewMutI,
271    };
272}
273
274/// A trait for common members of sparse matrices
275pub trait SparseMat {
276    /// The number of rows of this matrix
277    fn rows(&self) -> usize;
278
279    /// The number of columns of this matrix
280    fn cols(&self) -> usize;
281
282    /// The number of nonzeros of this matrix
283    fn nnz(&self) -> usize;
284}
285
286pub(crate) mod utils {
287    use super::*;
288    use ndarray::Axis;
289    use std::convert::TryInto;
290
291    /// Check the structure of `CsMat` components
292    /// This will ensure that:
293    /// * indptr is of length `outer_dim() + 1`
294    /// * indices and data have the same length, `nnz == indptr[outer_dims()]`
295    /// * indptr is sorted
296    /// * indptr values do not exceed [`usize::MAX`](usize::MAX)`/ 2`, as that would mean
297    ///   indices and indptr would take more space than the addressable memory
298    /// * indices is sorted for each outer slice
299    /// * indices are lower than `inner_dims()`
300    pub(crate) fn check_compressed_structure<I: SpIndex, Iptr: SpIndex>(
301        inner: usize,
302        outer: usize,
303        indptr: &[Iptr],
304        indices: &[I],
305    ) -> Result<(), StructureError> {
306        let indptr =
307            crate::IndPtrView::new_checked(indptr).map_err(|(_, e)| e)?;
308        if indptr.len() != outer + 1 {
309            return Err(StructureError::SizeMismatch(
310                "Indptr length does not match dimension",
311            ));
312        }
313        // Make sure Iptr and I can represent all types for this size
314        if I::from(inner).is_none() {
315            return Err(StructureError::OutOfRange(
316                "Index type not large enough for this matrix",
317            ));
318        }
319        if Iptr::from(outer + 1).is_none() {
320            return Err(StructureError::OutOfRange(
321                "Iptr type not large enough for this matrix",
322            ));
323        }
324        // Make sure indices can be converted to usize
325        // this could happen if index is negative for sized types
326        for i in indices.iter() {
327            if i.try_index().is_none() {
328                return Err(StructureError::OutOfRange(
329                    "Indices value out of range of usize",
330                ));
331            }
332        }
333        let nnz = indices.len();
334        if nnz != indptr.nnz() {
335            return Err(StructureError::SizeMismatch(
336                "Indices length and inpdtr's nnz do not match",
337            ));
338        }
339
340        // check that the indices are sorted for each row
341        for range in indptr.iter_outer_sz() {
342            let indices = &indices[range];
343            // Indices must be monotonically increasing
344            if !sorted_indices(indices) {
345                return Err(StructureError::Unsorted("Indices are not sorted"));
346            }
347            // Last index (which is the largest) must be in bounds
348            if let Some(i) = indices.last() {
349                if i.to_usize().unwrap() >= inner {
350                    return Err(StructureError::OutOfRange(
351                        "Indice is larger than inner dimension",
352                    ));
353                }
354            }
355        }
356
357        Ok(())
358    }
359
360    pub fn sorted_indices<I: SpIndex>(indices: &[I]) -> bool {
361        for w in indices.windows(2) {
362            // w will always be a size 2
363            let &[i1, i2]: &[I; 2] = w.try_into().unwrap();
364            if i2 <= i1 {
365                return false;
366            }
367        }
368        true
369    }
370
371    pub fn sort_indices_data_slices<N: Clone, I: SpIndex>(
372        indices: &mut [I],
373        data: &mut [N],
374        buf: &mut Vec<(I, N)>,
375    ) {
376        let len = indices.len();
377        assert_eq!(len, data.len());
378        let indices = &mut indices[..len];
379        let data = &mut data[..len];
380        buf.clear();
381        buf.reserve_exact(len);
382        for (i, v) in indices.iter().zip(data.iter()) {
383            buf.push((*i, v.clone()));
384        }
385
386        buf.sort_unstable_by_key(|x| x.0);
387
388        for ((i, x), (ind, v)) in buf
389            .iter()
390            .cloned()
391            .zip(indices.iter_mut().zip(data.iter_mut()))
392        {
393            *ind = i;
394            *v = x;
395        }
396    }
397    /// Return the axis which varies the fastest. For a
398    /// `C`-style matrix this will be `Axis(1)`, for an
399    /// `F`-style matrix this will be `Axis(0)`
400    pub(crate) fn fastest_axis<T>(mat: ndarray::ArrayView2<T>) -> Axis {
401        if mat.strides()[1] > mat.strides()[0] {
402            Axis(0)
403        } else {
404            Axis(1)
405        }
406    }
407    pub(crate) fn slowest_axis<T>(mat: ndarray::ArrayView2<T>) -> Axis {
408        match fastest_axis(mat) {
409            Axis(1) => Axis(0),
410            Axis(0) => Axis(1),
411            _ => unreachable!(),
412        }
413    }
414}
415
416pub mod binop;
417pub mod compressed;
418pub mod construct;
419pub mod csmat;
420pub mod indptr;
421pub mod kronecker;
422pub mod linalg;
423pub mod permutation;
424pub mod prod;
425pub mod slicing;
426pub mod smmp;
427pub mod special_mats;
428pub mod symmetric;
429pub mod to_dense;
430pub mod triplet;
431pub mod triplet_iter;
432pub mod vec;
433pub mod visu;
434
435#[cfg(test)]
436mod test {
437
438    use super::utils;
439    #[test]
440    fn test_sort_indices() {
441        let mut idx: Vec<usize> = vec![4, 1, 6, 2];
442        let mut dat: Vec<i32> = vec![4, -1, 2, -3];
443        let mut buf: Vec<(usize, i32)> = Vec::new();
444        utils::sort_indices_data_slices(&mut idx[..], &mut dat[..], &mut buf);
445        assert_eq!(idx, vec![1, 2, 4, 6]);
446        assert_eq!(dat, vec![-1, -3, 4, 2]);
447    }
448
449    #[test]
450    fn test_sorted_indices() {
451        use utils::sorted_indices;
452        assert!(sorted_indices(&[1, 2, 3]));
453        assert!(sorted_indices(&[1, 2, 8]));
454        assert!(!sorted_indices(&[1, 1, 3]));
455        assert!(!sorted_indices(&[2, 1, 3]));
456        assert!(sorted_indices(&[1, 2]));
457        assert!(sorted_indices(&[1]));
458    }
459
460    #[test]
461    fn test_fastest_axis() {
462        use ndarray::{arr2, s, Array2, Axis, ShapeBuilder};
463        use utils::fastest_axis;
464        let arr = arr2(&[[1, 2], [3, 4]]);
465        assert_eq!(fastest_axis(arr.view()), Axis(1));
466
467        let arr = Array2::<i32>::zeros((10, 9));
468        assert_eq!(fastest_axis(arr.view()), Axis(1));
469        let arrslice = arr.slice(s![..;2, ..;3]);
470        assert_eq!(fastest_axis(arrslice.view()), Axis(1));
471
472        let arr = Array2::<i32>::zeros((10, 9).f());
473        assert_eq!(fastest_axis(arr.view()), Axis(0));
474        let arrslice = arr.slice(s![..;2, ..;3]);
475        assert_eq!(fastest_axis(arrslice.view()), Axis(0));
476    }
477}