1use crate::custom_error;
2
3custom_error!(pub SparseVecError);
4
5#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
6enum SparseVecEnum<T> {
7 Dense(Vec<T>),
9 Sparse(Vec<T>, Vec<usize>),
11}
12
13impl<T> Default for SparseVecEnum<T> {
14 fn default() -> Self {
15 SparseVecEnum::Dense(Vec::new())
16 }
17}
18
19#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)]
23pub struct SparseVec<T> {
24 inner: SparseVecEnum<T>,
25}
26
27impl<T> SparseVec<T> {
28 pub fn new() -> Self {
38 Self::dense()
39 }
40
41 pub fn sparse() -> Self {
51 Self {
52 inner: SparseVecEnum::Sparse(Vec::new(), vec![0]),
53 }
54 }
55
56 pub fn sparse_from_offsets(
70 vals: Vec<T>,
71 offsets: Vec<usize>,
72 ) -> Result<Self, SparseVecError> {
73 if offsets.len() != vals.len() + 1 {
74 return Err(SparseVecError::new(
75 "Offsets must be one element longer than vals".to_string(),
76 ));
77 }
78 if !offsets.windows(2).all(|w| w[0] <= w[1]) {
79 return Err(SparseVecError::new(
80 "Offsets must be non-decreasing".to_string(),
81 ));
82 }
83 let result = Self {
84 inner: SparseVecEnum::Sparse(vals, offsets),
85 };
86 Ok(result)
87 }
88
89 pub fn get_offsets(&self) -> Option<&[usize]> {
103 match &self.inner {
104 SparseVecEnum::Dense(_) => None,
105 SparseVecEnum::Sparse(_, offsets) => Some(offsets),
106 }
107 }
108
109 pub fn dense() -> Self {
119 Self {
120 inner: SparseVecEnum::Dense(Vec::new()),
121 }
122 }
123
124 pub fn len(&self) -> usize {
135 match &self.inner {
136 SparseVecEnum::Dense(v) => v.len(),
137 SparseVecEnum::Sparse(_, offsets) => {
138 *offsets.last().expect("cannot be empty")
139 },
140 }
141 }
142
143 pub fn push(&mut self, val: T)
157 where
158 T: PartialEq,
159 {
160 match &mut self.inner {
161 SparseVecEnum::Dense(v) => v.push(val),
162 SparseVecEnum::Sparse(vals, offsets) => {
163 if vals.last() != Some(&val) {
164 let offset = offsets.last().expect("cannot be empty");
165 vals.push(val);
166 offsets.push(*offset);
167 }
168 *offsets.last_mut().expect("cannot be empty") += 1;
169 },
170 }
171 }
172
173 pub fn compress(&mut self)
192 where
193 T: PartialEq + Copy,
194 {
195 match &mut self.inner {
196 SparseVecEnum::Dense(dense) => {
197 let dense_size = dense.len() * std::mem::size_of::<T>();
198 let mut sparse = SparseVec::sparse();
199 for value in dense.iter() {
200 sparse.push(*value);
201 if sparse.size_of() >= dense_size {
202 return;
203 }
204 }
205 *self = sparse;
206 },
207 SparseVecEnum::Sparse(_, _) => {},
208 }
209 }
210
211 pub fn size_of(&self) -> usize {
225 std::mem::size_of::<Self>()
226 + match &self.inner {
227 SparseVecEnum::Dense(v) => v.len() * std::mem::size_of::<T>(),
228 SparseVecEnum::Sparse(vals, _) => {
229 vals.len()
230 * (std::mem::size_of::<T>()
231 + std::mem::size_of::<usize>())
232 + std::mem::size_of::<usize>()
233 },
234 }
235 }
236
237 pub fn iter(&self) -> SparseVecIter<'_, T> {
249 SparseVecIter {
250 inner: self,
251 index: 0,
252 offset_index: 0,
253 }
254 }
255
256 pub fn is_empty(&self) -> bool {
265 self.len() == 0
266 }
267
268 pub fn is_dense(&self) -> bool {
277 matches!(self.inner, SparseVecEnum::Dense(_))
278 }
279
280 pub fn is_sparse(&self) -> bool {
289 matches!(self.inner, SparseVecEnum::Sparse(_, _))
290 }
291
292 pub fn argsort(&self) -> Vec<usize>
316 where
317 T: Ord + Copy,
318 {
319 match &self.inner {
320 SparseVecEnum::Dense(dense) => {
321 let mut indices = (0..self.len()).collect::<Vec<_>>();
322 indices.sort_by_key(|&a| dense[a]);
323 indices
324 },
325 SparseVecEnum::Sparse(sparse, offsets) => {
326 let mut indices = (0..sparse.len()).collect::<Vec<_>>();
327 indices.sort_by_key(|&a| sparse[a]);
328 let mut result = Vec::with_capacity(self.len());
329 for (i, pos) in indices.iter().enumerate() {
330 let mut offset = offsets[*pos];
331 for _ in offsets[i]..offsets[i + 1] {
332 result.push(offset);
333 offset += 1;
334 }
335 }
336 result
337 },
338 }
339 }
340}
341
342pub struct SparseVecIter<'a, T> {
344 inner: &'a SparseVec<T>,
345 index: usize,
346 offset_index: usize,
347}
348
349impl<T: Copy> Iterator for SparseVecIter<'_, T> {
350 type Item = T;
351
352 fn next(&mut self) -> Option<Self::Item> {
353 match &self.inner.inner {
354 SparseVecEnum::Dense(v) => {
355 if self.index < v.len() {
356 self.index += 1;
357 return Some(v[self.index - 1]);
358 }
359 },
360 SparseVecEnum::Sparse(vals, offsets) => {
361 while self.index < offsets.len() {
362 if self.offset_index < offsets[self.index] {
363 self.offset_index += 1;
364 return Some(vals[self.index - 1]);
365 }
366 self.index += 1;
367 }
368 },
369 }
370 None
371 }
372}
373
374impl<T: Copy> From<SparseVec<T>> for Vec<T> {
375 fn from(sparse: SparseVec<T>) -> Self {
388 sparse.iter().collect()
389 }
390}
391
392impl<T: Copy> From<&SparseVec<T>> for Vec<T> {
393 fn from(sparse: &SparseVec<T>) -> Self {
406 sparse.iter().collect()
407 }
408}
409
410impl<T: Copy + PartialEq> From<Vec<T>> for SparseVec<T> {
411 fn from(vec: Vec<T>) -> Self {
424 let mut sparse = Self {
425 inner: SparseVecEnum::Dense(vec),
426 };
427 sparse.compress();
428 sparse
429 }
430}
431
432pub fn argsort<T: Ord>(vec: &[T]) -> Vec<usize> {
433 let mut indices: Vec<usize> = (0..vec.len()).collect();
434 indices.sort_by_key(|&i| &vec[i]);
435 indices
436}
437
438pub fn group_and_sum<T: Ord + Copy, U: std::ops::Add<Output = U> + Copy>(
439 groups: Vec<T>,
440 values: Vec<U>,
441) -> (Vec<T>, Vec<U>) {
442 if groups.is_empty() {
443 return (vec![], vec![]);
444 }
445 let order: Vec<usize> = argsort(&groups);
446 let mut new_groups: Vec<T> = Vec::with_capacity(order.len());
447 let mut new_values: Vec<U> = Vec::with_capacity(order.len());
448 let mut current_group: T = groups[order[0]];
449 let mut current_value: U = values[order[0]];
450 for &index in &order[1..] {
451 let group: T = groups[index];
452 let value: U = values[index];
453 if group != current_group {
454 new_groups.push(current_group);
455 new_values.push(current_value);
456 current_group = group;
457 current_value = value;
458 } else {
459 current_value = current_value + value;
460 };
461 }
462 new_groups.push(current_group);
463 new_values.push(current_value);
464 (new_groups, new_values)
465}
466
467pub fn find_sparse_local_maxima_mask(
468 indices: &[u32],
469 values: &[u64],
470 window: u32,
471) -> Vec<bool> {
472 let mut local_maxima: Vec<bool> = vec![true; indices.len()];
473 for (index, sparse_index) in indices.iter().enumerate() {
474 let current_intensity: u64 = values[index];
475 for (_next_index, next_sparse_index) in
476 indices[index + 1..].iter().enumerate()
477 {
478 let next_index: usize = _next_index + index + 1;
479 let next_value: u64 = values[next_index];
480 if (next_sparse_index - sparse_index) <= window {
481 if current_intensity < next_value {
482 local_maxima[index] = false
483 } else {
484 local_maxima[next_index] = false
485 }
486 } else {
487 break;
488 }
489 }
490 }
491 local_maxima
492}
493
494pub fn filter_with_mask<T: Copy>(vec: &[T], mask: &[bool]) -> Vec<T> {
495 (0..vec.len())
496 .filter(|&x| mask[x])
497 .map(|x| vec[x])
498 .collect()
499}
500
501pub fn is_strictly_ascending<T: std::cmp::PartialOrd>(vec: &[T]) -> bool {
502 vec.windows(2).all(|w| w[0] < w[1])
503}
504
505pub fn arg_max<T: Ord>(kernel: &[T]) -> Option<usize> {
506 kernel
507 .iter()
508 .enumerate()
509 .max_by(|a, b| a.1.cmp(b.1))
510 .map(|(idx, _)| idx)
511}
512
513pub fn get_top_n<T: PartialOrd>(vec: &[T], n: usize) -> Vec<usize> {
514 let top_n = if n == 0 { vec.len() } else { n.min(vec.len()) };
515 if top_n == vec.len() {
516 return (0..vec.len()).collect();
517 }
518 let mut indexed = vec.iter().enumerate().collect::<Vec<_>>();
519 indexed.select_nth_unstable_by(top_n, |a, b| {
520 b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal)
521 });
522 let mut top_indices =
523 indexed[..top_n].iter().map(|(i, _)| *i).collect::<Vec<_>>();
524 top_indices.sort_unstable();
525 top_indices
526}
527
528pub fn extract_kernel(kernel: &[u64], threshold: f32) -> Option<Vec<f32>> {
529 let max_val = *kernel.iter().max()? as f32;
530 let kernel = kernel
531 .iter()
532 .map(|&x| x as f32 / max_val)
533 .collect::<Vec<_>>();
534 let first = kernel.iter().position(|&x| x > threshold)? - 1;
535 let last = kernel.iter().rposition(|&x| x > threshold)? + 1;
536 let kernel = kernel[first..last + 1].to_vec();
537 Some(kernel)
538}
539
540#[cfg(test)]
541mod tests {
542 use super::*;
543
544 #[test]
545 fn test_dense_push_and_len() {
546 let mut sv = SparseVec::dense();
547 sv.push(1);
548 sv.push(2);
549 assert_eq!(sv.len(), 2);
550 assert_eq!(Vec::from(sv.clone()), vec![1, 2]);
551 }
552
553 #[test]
554 fn test_sparse_push_and_len() {
555 let mut sv = SparseVec::sparse();
556 sv.push(1);
557 sv.push(1);
558 sv.push(2);
559 assert_eq!(sv.len(), 3);
560 assert_eq!(Vec::from(sv.clone()), vec![1, 1, 2]);
561 }
562
563 #[test]
564 fn test_compress_dense_to_sparse() {
565 let mut sv = SparseVec::dense();
566 for i in 0..100 {
567 sv.push(i / 25);
568 }
569 assert!(sv.is_dense());
570 sv.compress();
571 assert!(sv.is_sparse());
572 }
573
574 #[test]
575 fn test_size_of() {
576 let mut sv = SparseVec::dense();
577 for i in 0..100 {
578 sv.push(i / 25);
579 }
580 let dense_size = sv.size_of();
581 sv.compress();
582 let sparse_size = sv.size_of();
583 assert!(sparse_size < dense_size);
584 }
585
586 #[test]
587 fn test_iter_dense() {
588 let mut sv = SparseVec::dense();
589 sv.push(1);
590 sv.push(2);
591 let v: Vec<_> = sv.iter().collect();
592 assert_eq!(v, vec![1, 2]);
593 }
594
595 #[test]
596 fn test_iter_sparse() {
597 let mut sv = SparseVec::sparse();
598 sv.push(1);
599 sv.push(1);
600 sv.push(2);
601 let v: Vec<_> = sv.iter().collect();
602 assert_eq!(v, vec![1, 1, 2]);
603 }
604
605 #[test]
606 fn test_from_vec() {
607 let mut v = Vec::new();
608 for i in 0..123 {
609 v.push(i / 25);
610 }
611 let sv: SparseVec<_> = SparseVec::from(v.clone());
612 assert_eq!(sv.iter().collect::<Vec<_>>(), v);
613 }
614
615 #[test]
616 fn test_empty_sparsevec() {
617 let sv: SparseVec<u32> = SparseVec::sparse();
618 assert_eq!(sv.len(), 0);
619 let v: Vec<_> = sv.iter().collect();
620 assert!(v.is_empty());
621 }
622
623 #[test]
624 fn test_empty_densevec() {
625 let sv: SparseVec<u32> = SparseVec::dense();
626 assert_eq!(sv.len(), 0);
627 let v: Vec<_> = sv.iter().collect();
628 assert!(v.is_empty());
629 }
630}