Skip to main content

timsrust_utils/
vec.rs

1use crate::custom_error;
2
3custom_error!(pub SparseVecError);
4
5#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
6enum SparseVecEnum<T> {
7    /// Dense representation: all elements stored directly.
8    Dense(Vec<T>),
9    /// Sparse representation: unique elements and their offsets.
10    Sparse(Vec<T>, Vec<usize>),
11}
12
13impl<T> Default for SparseVecEnum<T> {
14    fn default() -> Self {
15        SparseVecEnum::Dense(Vec::new())
16    }
17}
18
19/// A vector that can be stored in either dense or sparse format.
20///
21/// Useful for compressing repeated values.
22#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)]
23pub struct SparseVec<T> {
24    inner: SparseVecEnum<T>,
25}
26
27impl<T> SparseVec<T> {
28    /// Creates a new empty sparse vector in dense format.
29    ///
30    /// # Examples
31    /// ```
32    /// use timsrust_utils::vec::SparseVec;
33    /// let sv: SparseVec<u32> = SparseVec::new();
34    /// assert_eq!(sv.len(), 0);
35    /// assert!(sv.is_dense());
36    /// ```
37    pub fn new() -> Self {
38        Self::dense()
39    }
40
41    /// Creates a new empty sparse vector in sparse format.
42    ///
43    /// # Examples
44    /// ```
45    /// use timsrust_utils::vec::SparseVec;
46    /// let sv: SparseVec<u32> = SparseVec::sparse();
47    /// assert_eq!(sv.len(), 0);
48    /// assert!(sv.is_sparse());
49    /// ```
50    pub fn sparse() -> Self {
51        Self {
52            inner: SparseVecEnum::Sparse(Vec::new(), vec![0]),
53        }
54    }
55
56    /// Creates a new sparse vector from values and offsets.
57    ///
58    /// The `vals` vector contains the unique values, and `offsets` specifies the start of each run.
59    /// The length of `offsets` must be one more than the length of `vals`, and must be non-decreasing.
60    ///
61    /// # Examples
62    /// ```
63    /// use timsrust_utils::vec::SparseVec;
64    /// let vals = vec![1, 2];
65    /// let offsets = vec![0, 2, 5];
66    /// let sv = SparseVec::sparse_from_offsets(vals, offsets).unwrap();
67    /// assert_eq!(sv.iter().collect::<Vec<_>>(), vec![1, 1, 2, 2, 2]);
68    /// ```
69    pub fn sparse_from_offsets(
70        vals: Vec<T>,
71        offsets: Vec<usize>,
72    ) -> Result<Self, SparseVecError> {
73        if offsets.len() != vals.len() + 1 {
74            return Err(SparseVecError::new(
75                "Offsets must be one element longer than vals".to_string(),
76            ));
77        }
78        if !offsets.windows(2).all(|w| w[0] <= w[1]) {
79            return Err(SparseVecError::new(
80                "Offsets must be non-decreasing".to_string(),
81            ));
82        }
83        let result = Self {
84            inner: SparseVecEnum::Sparse(vals, offsets),
85        };
86        Ok(result)
87    }
88
89    /// Returns the offsets array if the vector is in sparse format, or `None` if dense.
90    ///
91    /// # Examples
92    /// ```
93    /// use timsrust_utils::vec::SparseVec;
94    /// let vals = vec![1, 2];
95    /// let offsets = vec![0, 2, 5];
96    /// let sv = SparseVec::sparse_from_offsets(vals, offsets.clone()).unwrap();
97    /// assert_eq!(sv.get_offsets(), Some(&offsets[..]));
98    ///
99    /// let dv: SparseVec<u32> = SparseVec::dense();
100    /// assert_eq!(dv.get_offsets(), None);
101    /// ```
102    pub fn get_offsets(&self) -> Option<&[usize]> {
103        match &self.inner {
104            SparseVecEnum::Dense(_) => None,
105            SparseVecEnum::Sparse(_, offsets) => Some(offsets),
106        }
107    }
108
109    /// Creates a new empty sparse vector in dense format.
110    ///
111    /// # Examples
112    /// ```
113    /// use timsrust_utils::vec::SparseVec;
114    /// let sv: SparseVec<u32> = SparseVec::dense();
115    /// assert_eq!(sv.len(), 0);
116    /// assert!(sv.is_dense());
117    /// ```
118    pub fn dense() -> Self {
119        Self {
120            inner: SparseVecEnum::Dense(Vec::new()),
121        }
122    }
123
124    /// Returns the total number of elements in the vector.
125    ///
126    /// # Examples
127    /// ```
128    /// use timsrust_utils::vec::SparseVec;
129    /// let mut sv = SparseVec::dense();
130    /// sv.push(1);
131    /// sv.push(2);
132    /// assert_eq!(sv.len(), 2);
133    /// ```
134    pub fn len(&self) -> usize {
135        match &self.inner {
136            SparseVecEnum::Dense(v) => v.len(),
137            SparseVecEnum::Sparse(_, offsets) => {
138                *offsets.last().expect("cannot be empty")
139            },
140        }
141    }
142
143    /// Appends an element to the vector.
144    ///
145    /// In sparse format, consecutive equal values are compressed.
146    ///
147    /// # Examples
148    /// ```
149    /// use timsrust_utils::vec::SparseVec;
150    /// let mut sv = SparseVec::sparse();
151    /// sv.push(1);
152    /// sv.push(1);
153    /// sv.push(2);
154    /// assert_eq!(sv.len(), 3);
155    /// ```
156    pub fn push(&mut self, val: T)
157    where
158        T: PartialEq,
159    {
160        match &mut self.inner {
161            SparseVecEnum::Dense(v) => v.push(val),
162            SparseVecEnum::Sparse(vals, offsets) => {
163                if vals.last() != Some(&val) {
164                    let offset = offsets.last().expect("cannot be empty");
165                    vals.push(val);
166                    offsets.push(*offset);
167                }
168                *offsets.last_mut().expect("cannot be empty") += 1;
169            },
170        }
171    }
172
173    /// Compresses the vector from dense to sparse format if it saves memory.
174    ///
175    /// Only compresses if the sparse representation uses less memory than the dense one.
176    ///
177    /// # Examples
178    /// ```
179    /// use timsrust_utils::vec::SparseVec;
180    /// let mut sv = SparseVec::new();
181    /// for i in 0..100 {
182    ///     sv.push(i / 25);
183    /// }
184    /// assert_eq!(sv.len(), 100);
185    /// assert!(sv.is_dense());
186    /// let mut sv2 = sv.clone();
187    /// sv2.compress();
188    /// assert!(sv2.is_sparse());
189    /// assert_eq!(Vec::from(sv), Vec::from(sv2));
190    /// ```
191    pub fn compress(&mut self)
192    where
193        T: PartialEq + Copy,
194    {
195        match &mut self.inner {
196            SparseVecEnum::Dense(dense) => {
197                let dense_size = dense.len() * std::mem::size_of::<T>();
198                let mut sparse = SparseVec::sparse();
199                for value in dense.iter() {
200                    sparse.push(*value);
201                    if sparse.size_of() >= dense_size {
202                        return;
203                    }
204                }
205                *self = sparse;
206            },
207            SparseVecEnum::Sparse(_, _) => {},
208        }
209    }
210
211    /// Returns the memory usage in bytes.
212    ///
213    /// # Examples
214    /// ```
215    /// use timsrust_utils::vec::SparseVec;
216    /// let mut dv: SparseVec<u32> = SparseVec::dense();
217    /// for i in 0..100 {
218    ///     dv.push(i / 25);
219    /// }
220    /// let mut sv = dv.clone();
221    /// sv.compress();
222    /// assert!(sv.size_of() < dv.size_of());
223    /// ```
224    pub fn size_of(&self) -> usize {
225        std::mem::size_of::<Self>()
226            + match &self.inner {
227                SparseVecEnum::Dense(v) => v.len() * std::mem::size_of::<T>(),
228                SparseVecEnum::Sparse(vals, _) => {
229                    vals.len()
230                        * (std::mem::size_of::<T>()
231                            + std::mem::size_of::<usize>())
232                        + std::mem::size_of::<usize>()
233                },
234            }
235    }
236
237    /// Returns an iterator over all elements in the vector.
238    ///
239    /// # Examples
240    /// ```
241    /// use timsrust_utils::vec::SparseVec;
242    /// let mut sv = SparseVec::dense();
243    /// sv.push(1);
244    /// sv.push(2);
245    /// let v: Vec<_> = sv.iter().collect();
246    /// assert_eq!(v, vec![1, 2]);
247    /// ```
248    pub fn iter(&self) -> SparseVecIter<'_, T> {
249        SparseVecIter {
250            inner: self,
251            index: 0,
252            offset_index: 0,
253        }
254    }
255
256    /// Returns true if the vector contains no elements.
257    ///
258    /// # Examples
259    /// ```
260    /// use timsrust_utils::vec::SparseVec;
261    /// let sv: SparseVec<u32> = SparseVec::sparse();
262    /// assert_eq!(sv.len(), 0);
263    /// ```
264    pub fn is_empty(&self) -> bool {
265        self.len() == 0
266    }
267
268    /// Returns true if the vector is in dense format.
269    ///
270    /// # Examples
271    /// ```
272    /// use timsrust_utils::vec::SparseVec;
273    /// let mut sv: SparseVec<u32> = SparseVec::dense();
274    /// assert!(sv.is_dense());
275    /// ```
276    pub fn is_dense(&self) -> bool {
277        matches!(self.inner, SparseVecEnum::Dense(_))
278    }
279
280    /// Returns true if the vector is in sparse format.
281    ///
282    /// # Examples
283    /// ```
284    /// use timsrust_utils::vec::SparseVec;
285    /// let sv: SparseVec<u32> = SparseVec::sparse();
286    /// assert!(sv.is_sparse());
287    /// ```
288    pub fn is_sparse(&self) -> bool {
289        matches!(self.inner, SparseVecEnum::Sparse(_, _))
290    }
291
292    /// Returns the indices that would sort the vector.
293    ///
294    /// The returned vector contains the indices of the elements in sorted order.
295    ///
296    /// # Examples
297    /// ```
298    /// use timsrust_utils::vec::SparseVec;
299    /// let mut sv = SparseVec::dense();
300    /// sv.push(3);
301    /// sv.push(1);
302    /// sv.push(2);
303    /// let sorted_indices = sv.argsort();
304    /// assert_eq!(sorted_indices, vec![1, 2, 0]);
305    ///
306    /// let mut sv = SparseVec::sparse();
307    /// sv.push(2);
308    /// sv.push(2);
309    /// sv.push(1);
310    /// sv.push(1);
311    /// sv.push(1);
312    /// let sorted_indices = sv.argsort();
313    /// assert_eq!(sorted_indices, vec![2, 3, 0, 1, 2]);
314    /// ```
315    pub fn argsort(&self) -> Vec<usize>
316    where
317        T: Ord + Copy,
318    {
319        match &self.inner {
320            SparseVecEnum::Dense(dense) => {
321                let mut indices = (0..self.len()).collect::<Vec<_>>();
322                indices.sort_by_key(|&a| dense[a]);
323                indices
324            },
325            SparseVecEnum::Sparse(sparse, offsets) => {
326                let mut indices = (0..sparse.len()).collect::<Vec<_>>();
327                indices.sort_by_key(|&a| sparse[a]);
328                let mut result = Vec::with_capacity(self.len());
329                for (i, pos) in indices.iter().enumerate() {
330                    let mut offset = offsets[*pos];
331                    for _ in offsets[i]..offsets[i + 1] {
332                        result.push(offset);
333                        offset += 1;
334                    }
335                }
336                result
337            },
338        }
339    }
340}
341
342/// Iterator for [SparseVec].
343pub struct SparseVecIter<'a, T> {
344    inner: &'a SparseVec<T>,
345    index: usize,
346    offset_index: usize,
347}
348
349impl<T: Copy> Iterator for SparseVecIter<'_, T> {
350    type Item = T;
351
352    fn next(&mut self) -> Option<Self::Item> {
353        match &self.inner.inner {
354            SparseVecEnum::Dense(v) => {
355                if self.index < v.len() {
356                    self.index += 1;
357                    return Some(v[self.index - 1]);
358                }
359            },
360            SparseVecEnum::Sparse(vals, offsets) => {
361                while self.index < offsets.len() {
362                    if self.offset_index < offsets[self.index] {
363                        self.offset_index += 1;
364                        return Some(vals[self.index - 1]);
365                    }
366                    self.index += 1;
367                }
368            },
369        }
370        None
371    }
372}
373
374impl<T: Copy> From<SparseVec<T>> for Vec<T> {
375    /// Converts a `SparseVec` into a regular `Vec`, expanding any compressed values.
376    ///
377    /// # Examples
378    /// ```
379    /// use timsrust_utils::vec::SparseVec;
380    /// let mut sv = SparseVec::sparse();
381    /// sv.push(1);
382    /// sv.push(1);
383    /// sv.push(2);
384    /// let v: Vec<_> = Vec::from(sv);
385    /// assert_eq!(v, vec![1, 1, 2]);
386    /// ```
387    fn from(sparse: SparseVec<T>) -> Self {
388        sparse.iter().collect()
389    }
390}
391
392impl<T: Copy> From<&SparseVec<T>> for Vec<T> {
393    /// Converts a `SparseVec` into a regular `Vec`, expanding any compressed values.
394    ///
395    /// # Examples
396    /// ```
397    /// use timsrust_utils::vec::SparseVec;
398    /// let mut sv = SparseVec::sparse();
399    /// sv.push(1);
400    /// sv.push(1);
401    /// sv.push(2);
402    /// let v: Vec<_> = Vec::from(sv);
403    /// assert_eq!(v, vec![1, 1, 2]);
404    /// ```
405    fn from(sparse: &SparseVec<T>) -> Self {
406        sparse.iter().collect()
407    }
408}
409
410impl<T: Copy + PartialEq> From<Vec<T>> for SparseVec<T> {
411    /// Converts a regular `Vec` into a compressed `SparseVec`.
412    ///
413    /// # Examples
414    /// ```
415    /// use timsrust_utils::vec::SparseVec;
416    /// let mut v = Vec::new();
417    /// for i in 0..100 {
418    ///     v.push(i / 25);
419    /// }
420    /// let sv: SparseVec<_> = v.clone().into();
421    /// assert_eq!(sv.iter().collect::<Vec<_>>(), v);
422    /// ```
423    fn from(vec: Vec<T>) -> Self {
424        let mut sparse = Self {
425            inner: SparseVecEnum::Dense(vec),
426        };
427        sparse.compress();
428        sparse
429    }
430}
431
432pub fn argsort<T: Ord>(vec: &[T]) -> Vec<usize> {
433    let mut indices: Vec<usize> = (0..vec.len()).collect();
434    indices.sort_by_key(|&i| &vec[i]);
435    indices
436}
437
438pub fn group_and_sum<T: Ord + Copy, U: std::ops::Add<Output = U> + Copy>(
439    groups: Vec<T>,
440    values: Vec<U>,
441) -> (Vec<T>, Vec<U>) {
442    if groups.is_empty() {
443        return (vec![], vec![]);
444    }
445    let order: Vec<usize> = argsort(&groups);
446    let mut new_groups: Vec<T> = Vec::with_capacity(order.len());
447    let mut new_values: Vec<U> = Vec::with_capacity(order.len());
448    let mut current_group: T = groups[order[0]];
449    let mut current_value: U = values[order[0]];
450    for &index in &order[1..] {
451        let group: T = groups[index];
452        let value: U = values[index];
453        if group != current_group {
454            new_groups.push(current_group);
455            new_values.push(current_value);
456            current_group = group;
457            current_value = value;
458        } else {
459            current_value = current_value + value;
460        };
461    }
462    new_groups.push(current_group);
463    new_values.push(current_value);
464    (new_groups, new_values)
465}
466
467pub fn find_sparse_local_maxima_mask(
468    indices: &[u32],
469    values: &[u64],
470    window: u32,
471) -> Vec<bool> {
472    let mut local_maxima: Vec<bool> = vec![true; indices.len()];
473    for (index, sparse_index) in indices.iter().enumerate() {
474        let current_intensity: u64 = values[index];
475        for (_next_index, next_sparse_index) in
476            indices[index + 1..].iter().enumerate()
477        {
478            let next_index: usize = _next_index + index + 1;
479            let next_value: u64 = values[next_index];
480            if (next_sparse_index - sparse_index) <= window {
481                if current_intensity < next_value {
482                    local_maxima[index] = false
483                } else {
484                    local_maxima[next_index] = false
485                }
486            } else {
487                break;
488            }
489        }
490    }
491    local_maxima
492}
493
494pub fn filter_with_mask<T: Copy>(vec: &[T], mask: &[bool]) -> Vec<T> {
495    (0..vec.len())
496        .filter(|&x| mask[x])
497        .map(|x| vec[x])
498        .collect()
499}
500
501pub fn is_strictly_ascending<T: std::cmp::PartialOrd>(vec: &[T]) -> bool {
502    vec.windows(2).all(|w| w[0] < w[1])
503}
504
505pub fn arg_max<T: Ord>(kernel: &[T]) -> Option<usize> {
506    kernel
507        .iter()
508        .enumerate()
509        .max_by(|a, b| a.1.cmp(b.1))
510        .map(|(idx, _)| idx)
511}
512
513pub fn get_top_n<T: PartialOrd>(vec: &[T], n: usize) -> Vec<usize> {
514    let top_n = if n == 0 { vec.len() } else { n.min(vec.len()) };
515    if top_n == vec.len() {
516        return (0..vec.len()).collect();
517    }
518    let mut indexed = vec.iter().enumerate().collect::<Vec<_>>();
519    indexed.select_nth_unstable_by(top_n, |a, b| {
520        b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal)
521    });
522    let mut top_indices =
523        indexed[..top_n].iter().map(|(i, _)| *i).collect::<Vec<_>>();
524    top_indices.sort_unstable();
525    top_indices
526}
527
528pub fn extract_kernel(kernel: &[u64], threshold: f32) -> Option<Vec<f32>> {
529    let max_val = *kernel.iter().max()? as f32;
530    let kernel = kernel
531        .iter()
532        .map(|&x| x as f32 / max_val)
533        .collect::<Vec<_>>();
534    let first = kernel.iter().position(|&x| x > threshold)? - 1;
535    let last = kernel.iter().rposition(|&x| x > threshold)? + 1;
536    let kernel = kernel[first..last + 1].to_vec();
537    Some(kernel)
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543
544    #[test]
545    fn test_dense_push_and_len() {
546        let mut sv = SparseVec::dense();
547        sv.push(1);
548        sv.push(2);
549        assert_eq!(sv.len(), 2);
550        assert_eq!(Vec::from(sv.clone()), vec![1, 2]);
551    }
552
553    #[test]
554    fn test_sparse_push_and_len() {
555        let mut sv = SparseVec::sparse();
556        sv.push(1);
557        sv.push(1);
558        sv.push(2);
559        assert_eq!(sv.len(), 3);
560        assert_eq!(Vec::from(sv.clone()), vec![1, 1, 2]);
561    }
562
563    #[test]
564    fn test_compress_dense_to_sparse() {
565        let mut sv = SparseVec::dense();
566        for i in 0..100 {
567            sv.push(i / 25);
568        }
569        assert!(sv.is_dense());
570        sv.compress();
571        assert!(sv.is_sparse());
572    }
573
574    #[test]
575    fn test_size_of() {
576        let mut sv = SparseVec::dense();
577        for i in 0..100 {
578            sv.push(i / 25);
579        }
580        let dense_size = sv.size_of();
581        sv.compress();
582        let sparse_size = sv.size_of();
583        assert!(sparse_size < dense_size);
584    }
585
586    #[test]
587    fn test_iter_dense() {
588        let mut sv = SparseVec::dense();
589        sv.push(1);
590        sv.push(2);
591        let v: Vec<_> = sv.iter().collect();
592        assert_eq!(v, vec![1, 2]);
593    }
594
595    #[test]
596    fn test_iter_sparse() {
597        let mut sv = SparseVec::sparse();
598        sv.push(1);
599        sv.push(1);
600        sv.push(2);
601        let v: Vec<_> = sv.iter().collect();
602        assert_eq!(v, vec![1, 1, 2]);
603    }
604
605    #[test]
606    fn test_from_vec() {
607        let mut v = Vec::new();
608        for i in 0..123 {
609            v.push(i / 25);
610        }
611        let sv: SparseVec<_> = SparseVec::from(v.clone());
612        assert_eq!(sv.iter().collect::<Vec<_>>(), v);
613    }
614
615    #[test]
616    fn test_empty_sparsevec() {
617        let sv: SparseVec<u32> = SparseVec::sparse();
618        assert_eq!(sv.len(), 0);
619        let v: Vec<_> = sv.iter().collect();
620        assert!(v.is_empty());
621    }
622
623    #[test]
624    fn test_empty_densevec() {
625        let sv: SparseVec<u32> = SparseVec::dense();
626        assert_eq!(sv.len(), 0);
627        let v: Vec<_> = sv.iter().collect();
628        assert!(v.is_empty());
629    }
630}