prefix_sum_vec/
lib.rs

1#![doc = include_str!("../README.mkd")]
2
3/// A data structure for space-efficient storage of repeating sequences.
4///
5/// Much like [`Vec`], this is a growable array-like type. This data structure is capaable of
6/// storing a contiguous sequence of repeating values only once, thus “compressing” the
7/// representation of the sequence. Conversely index lookups are `O(log n)`, rather than `O(1)` as
8/// provided by `Vec`.
9///
10/// Note, however, that this data structure can become less space-efficient than a plain `Vec` if
11/// the data is incompressible.
12///
13/// [`Vec`]: std::vec::Vec
14///
15/// # Examples
16///
17/// ```
18/// let mut prefix_sum_vec = prefix_sum_vec::PrefixSumVec::new();
19///
20/// prefix_sum_vec.try_push_many(12, "strings").expect("could not push");
21/// assert_eq!(prefix_sum_vec.max_index(), Some(&11));
22///
23/// assert_eq!(prefix_sum_vec[0], "strings");
24/// assert_eq!(prefix_sum_vec[11], "strings");
25/// ```
26#[derive(Debug)]
27pub struct PrefixSumVec<T, Idx = usize> {
28    /// Keys between ((keys[n-1] + 1) or 0) and keys[n] (both included) have value values[n]
29    indices: Vec<Idx>,
30    values: Vec<T>,
31}
32
33impl<T, Idx> PrefixSumVec<T, Idx> {
34    /// Create a new, empty [`PrefixSumVec`].
35    ///
36    /// Does not allocate.
37    pub fn new() -> Self {
38        Self {
39            indices: vec![],
40            values: vec![],
41        }
42    }
43
44    /// Clears the data structure, removing all contained values.
45    ///
46    /// This method has no effect on the allocated capacity. Due to the compressed representation,
47    /// a repeating value in a contiguous sequence is dropped only once.
48    pub fn clear(&mut self) {
49        // TODO: needs panic safety.
50        self.indices.clear();
51        self.values.clear();
52    }
53
54    /// Get the current maximum index that can be used for indexing this data structure.
55    ///
56    /// Will return `None` while this data structure contains no elements. In practice this is the
57    /// number of elements contained within data structure minus one.
58    ///
59    /// **Complexity**: `O(1)`
60    pub fn max_index(&self) -> Option<&Idx> {
61        self.indices.last()
62    }
63
64    fn grow_if_necessary(&mut self) -> Result<(), std::collections::TryReserveError> {
65        if self.values.len() == self.values.capacity() {
66            self.values.try_reserve(1)?;
67        }
68        if self.indices.len() == self.indices.capacity() {
69            self.values.try_reserve(1)?;
70        }
71        Ok(())
72    }
73}
74
75impl<T, Idx: Ord> PrefixSumVec<T, Idx> {
76    /// Find the value by an index.
77    ///
78    /// If the value at this index is not present in the data structure, `None` is returned.
79    ///
80    /// **Complexity**: `O(log n)`
81    pub fn get(&self, index: &Idx) -> Option<&T> {
82        match self.indices.binary_search(index) {
83            // If this index would be inserted at the end of the list, then the
84            // index is out of bounds and we return a None.
85            //
86            // If `Ok` is returned we found the index exactly, or if `Err` is
87            // returned the position is the one which is the least index
88            // greater than `idx`, which is still the type of `idx` according
89            // to our "compressed" representation. In both cases we access the
90            // list at index `i`.
91            Ok(i) | Err(i) => self.values.get(i),
92        }
93    }
94}
95
96impl<T, Idx: Index> PrefixSumVec<T, Idx> {
97
98    fn new_index(&self, additional_count: Idx) -> Result<Idx, TryPushError> {
99        match self.max_index() {
100            Some(current_max) => additional_count
101                .checked_add(current_max)
102                .ok_or(TryPushError::Overflow),
103            None => Ok(additional_count.decrement()),
104        }
105    }
106
107    /// Append `count` copies of a `value` to the back of the collection.
108    ///
109    /// This method will not inspect the values currently stored in the data structure in search
110    /// for further compression opportunities. Instead the provided value will be stored compressed
111    /// as-if a sequence of `count` repeating `value`s were provided.
112    ///
113    /// **Complexity**: `O(1)` amortized.
114    pub fn try_push_many(&mut self, count: Idx, value: T) -> Result<(), TryPushError> {
115        if count.is_zero() {
116            return Ok(());
117        }
118        let new_index = self.new_index(count)?;
119        self.grow_if_necessary().map_err(TryPushError::Reserve)?;
120        self.indices.push(new_index);
121        self.values.push(value);
122        Ok(())
123    }
124}
125
126impl<T: PartialEq, Idx: Index> PrefixSumVec<T, Idx> {
127    /// Append `count` copies of a `value` to the back of the collection, attempting compression.
128    ///
129    /// This method will not inspect the values currently stored in the data structure in search
130    /// for further compression opportunities. Instead the provided value will be stored compressed
131    /// as-if a sequence of `count` repeating `value`s were provided.
132    ///
133    /// **Complexity**: `O(1)` amortized.
134    pub fn try_push_more(&mut self, count: Idx, value: T) -> Result<(), TryPushError> {
135        if count.is_zero() {
136            return Ok(());
137        }
138        if let Some(lastval) = self.values.last_mut() {
139            if PartialEq::eq(&*lastval, &value) {
140                // We can "just" increment the index.
141                let new_index = self.new_index(count)?;
142                let old_index = self.indices.pop();
143                self.indices.push(new_index);
144                // This drop gives some panic safety. We don’t end up with oddly mismatching
145                // index and value array lengths if dropping the `Idx` panics.
146                drop(old_index);
147                return Ok(());
148            }
149        }
150        self.try_push_many(count, value)
151    }
152}
153
154/// A type suitable for indexing into [`PrefixSumVec`].
155pub trait Index: Sized {
156    /// Is the value `zero`?
157    fn is_zero(&self) -> bool;
158    /// Decrement the index by one.
159    ///
160    /// This will never be called for values for which `is_zero` returns `true`.
161    fn decrement(self) -> Self;
162    /// Add the two indices together, checking for overflow.
163    ///
164    /// In case an overflow occurs, this must return `None`.
165    fn checked_add(self, other: &Self) -> Option<Self>;
166}
167
168macro_rules! impl_primitive_index {
169    ($($ty: ty),*) => {
170        $(impl Index for $ty {
171            fn is_zero(&self) -> bool { *self == 0 }
172            fn decrement(self) -> Self { self - 1 }
173            fn checked_add(self, other: &Self) -> Option<Self> { self.checked_add(*other) }
174        })*
175    }
176}
177
178impl_primitive_index!(u8, u16, u32, u64, u128, usize);
179impl_primitive_index!(i8, i16, i32, i64, i128, isize);
180
181impl<K, Idx: Ord> std::ops::Index<Idx> for PrefixSumVec<K, Idx> {
182    type Output = K;
183
184    fn index(&self, index: Idx) -> &Self::Output {
185        self.get(&index).expect("index out of range")
186    }
187}
188
189impl<K, Idx: Ord> std::ops::Index<&Idx> for PrefixSumVec<K, Idx> {
190    type Output = K;
191
192    fn index(&self, index: &Idx) -> &Self::Output {
193        self.get(index).expect("index out of range")
194    }
195}
196
197#[non_exhaustive]
198#[derive(Debug, PartialEq, Eq, Clone)]
199pub enum TryPushError {
200    /// Reserving the additional space for the storage has failed.
201    Reserve(std::collections::TryReserveError),
202    /// The index cannot contain the values required to represent the count of values.
203    Overflow,
204}
205
206impl std::error::Error for TryPushError {}
207impl std::fmt::Display for TryPushError {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        f.write_str(match self {
210            TryPushError::Reserve(_) => "could not reserve additional capacity",
211            TryPushError::Overflow => "the index type overflowed",
212        })
213    }
214}
215
216// TODO: should we instead process the element so that it returns (count, value) rather than
217// (prefix_sum, value)?
218pub struct CompressedIter<'a, T, Idx> {
219    inner: std::iter::Zip<std::slice::Iter<'a, Idx>, std::slice::Iter<'a, T>>,
220}
221
222impl<'a, T, Idx> Iterator for CompressedIter<'a, T, Idx> {
223    type Item = (&'a Idx, &'a T);
224
225    fn next(&mut self) -> Option<Self::Item> {
226        self.inner.next()
227    }
228
229    fn size_hint(&self) -> (usize, Option<usize>) {
230        self.inner.size_hint()
231    }
232
233    fn count(self) -> usize {
234        self.inner.count()
235    }
236
237    fn last(self) -> Option<Self::Item> {
238        self.inner.last()
239    }
240
241    fn nth(&mut self, n: usize) -> Option<Self::Item> {
242        self.inner.nth(n)
243    }
244
245    fn for_each<F: FnMut(Self::Item)>(self, f: F) {
246        self.inner.for_each(f)
247    }
248
249    fn collect<B: std::iter::FromIterator<Self::Item>>(self) -> B {
250        self.inner.collect()
251    }
252
253    fn partition<B, F>(self, f: F) -> (B, B)
254    where
255        B: Default + Extend<Self::Item>,
256        F: FnMut(&Self::Item) -> bool,
257    {
258        self.inner.partition(f)
259    }
260
261    fn fold<B, F>(self, init: B, f: F) -> B
262    where
263        F: FnMut(B, Self::Item) -> B,
264    {
265        self.inner.fold(init, f)
266    }
267
268    fn reduce<F>(self, f: F) -> Option<Self::Item>
269    where
270        F: FnMut(Self::Item, Self::Item) -> Self::Item,
271    {
272        self.inner.reduce(f)
273    }
274
275    fn all<F>(&mut self, f: F) -> bool
276    where
277        F: FnMut(Self::Item) -> bool,
278    {
279        self.inner.all(f)
280    }
281
282    fn any<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
283        self.inner.any(f)
284    }
285
286    fn find<P: FnMut(&Self::Item) -> bool>(&mut self, predicate: P) -> Option<Self::Item> {
287        self.inner.find(predicate)
288    }
289
290    fn find_map<B, F: FnMut(Self::Item) -> Option<B>>(&mut self, f: F) -> Option<B> {
291        self.inner.find_map(f)
292    }
293
294    fn position<P: FnMut(Self::Item) -> bool>(&mut self, predicate: P) -> Option<usize> {
295        self.inner.position(predicate)
296    }
297
298    fn rposition<P: FnMut(Self::Item) -> bool>(&mut self, predicate: P) -> Option<usize> {
299        self.inner.rposition(predicate)
300    }
301
302    fn max(self) -> Option<Self::Item>
303    where
304        Self::Item: Ord,
305    {
306        self.inner.max()
307    }
308
309    fn min(self) -> Option<Self::Item>
310    where
311        Self::Item: Ord,
312    {
313        self.inner.min()
314    }
315
316    fn max_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
317        self.inner.max_by_key(f)
318    }
319
320    fn max_by<F>(self, compare: F) -> Option<Self::Item>
321    where
322        F: FnMut(&Self::Item, &Self::Item) -> std::cmp::Ordering,
323    {
324        self.inner.max_by(compare)
325    }
326
327    fn min_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
328        self.inner.min_by_key(f)
329    }
330
331    fn min_by<F>(self, compare: F) -> Option<Self::Item>
332    where
333        F: FnMut(&Self::Item, &Self::Item) -> std::cmp::Ordering,
334    {
335        self.inner.min_by(compare)
336    }
337
338    fn sum<S: std::iter::Sum<Self::Item>>(self) -> S {
339        self.inner.sum()
340    }
341
342    fn product<P: std::iter::Product<Self::Item>>(self) -> P {
343        self.inner.product()
344    }
345
346    fn cmp<I>(self, other: I) -> std::cmp::Ordering
347    where
348        I: IntoIterator<Item = Self::Item>,
349        Self::Item: Ord,
350    {
351        self.inner.cmp(other)
352    }
353
354    fn partial_cmp<I>(self, other: I) -> Option<std::cmp::Ordering>
355    where
356        I: IntoIterator,
357        Self::Item: PartialOrd<I::Item>,
358    {
359        self.inner.partial_cmp(other)
360    }
361
362    fn eq<I>(self, other: I) -> bool
363    where
364        I: IntoIterator,
365        Self::Item: PartialEq<I::Item>,
366    {
367        self.inner.eq(other)
368    }
369
370    fn ne<I>(self, other: I) -> bool
371    where
372        I: IntoIterator,
373        Self::Item: PartialEq<I::Item>,
374    {
375        self.inner.ne(other)
376    }
377
378    fn lt<I>(self, other: I) -> bool
379    where
380        I: IntoIterator,
381        Self::Item: PartialOrd<I::Item>,
382    {
383        self.inner.lt(other)
384    }
385
386    fn le<I>(self, other: I) -> bool
387    where
388        I: IntoIterator,
389        Self::Item: PartialOrd<I::Item>,
390        Self: Sized,
391    {
392        self.inner.le(other)
393    }
394
395    fn gt<I>(self, other: I) -> bool
396    where
397        I: IntoIterator,
398        Self::Item: PartialOrd<I::Item>,
399    {
400        self.inner.gt(other)
401    }
402
403    fn ge<I>(self, other: I) -> bool
404    where
405        I: IntoIterator,
406        Self::Item: PartialOrd<I::Item>,
407    {
408        self.inner.ge(other)
409    }
410}
411
412impl<'a, T, Idx> DoubleEndedIterator for CompressedIter<'a, T, Idx> {
413    fn next_back(&mut self) -> Option<Self::Item> {
414        self.inner.next_back()
415    }
416}
417
418impl<'a, T, Idx> ExactSizeIterator for CompressedIter<'a, T, Idx> {
419    fn len(&self) -> usize {
420        self.inner.len()
421    }
422}
423
424impl<'a, T, Idx> std::iter::FusedIterator for CompressedIter<'a, T, Idx> {}
425impl<'a, T, Idx> CompressedIter<'a, T, Idx> {
426    #[allow(dead_code)]
427    fn _assert_fused_iterator(mut self) {
428        fn is_fused<T: std::iter::FusedIterator>(_: T) {}
429        is_fused(&mut self.inner);
430    }
431}
432
433impl<'a, T, Idx> IntoIterator for &'a PrefixSumVec<T, Idx> {
434    type Item = (&'a Idx, &'a T);
435    type IntoIter = CompressedIter<'a, T, Idx>;
436    fn into_iter(self) -> Self::IntoIter {
437        CompressedIter {
438            inner: self.indices.iter().zip(self.values.iter()),
439        }
440    }
441}
442
443// TODO: should we implement something like this (an iterator that “decompresses” the
444// CompressedIter)
445// impl<'a, T, Idx: Clone> CompressedIter<'a, T, Idx> {
446//     fn expand(mut self) -> Iter<'a, T, Idx> {
447//         Iter {
448//             last: self.next().map(|(idx, val)| (idx.clone(), val)),
449//             inner: self,
450//         }
451//     }
452// }
453
454
455#[cfg(test)]
456mod tests {
457    use super::{PrefixSumVec, TryPushError};
458
459    #[test]
460    fn empty_partial_map() {
461        let map = PrefixSumVec::<u32, u32>::new();
462        assert_eq!(None, map.get(&0));
463        assert_eq!(None, map.max_index());
464    }
465
466    #[test]
467    fn basic_function() {
468        let mut map = PrefixSumVec::<u32, u32>::new();
469        assert_eq!(None, map.max_index());
470        for i in 0..10 {
471            map.try_push_many(1, i).unwrap();
472            assert_eq!(Some(&i), map.max_index());
473        }
474        for i in 0..10 {
475            assert_eq!(Some(&i), map.get(&i));
476        }
477        assert_eq!(None, map.get(&10));
478        assert_eq!(None, map.get(&0xFFFF_FFFF));
479    }
480
481    #[test]
482    fn zero_count() {
483        let mut map = PrefixSumVec::<u32, u32>::new();
484        assert_eq!(Ok(()), map.try_push_many(0, 0));
485        assert_eq!(None, map.max_index());
486        assert_eq!(Ok(()), map.try_push_many(10, 42));
487        assert_eq!(Some(&9), map.max_index());
488        assert_eq!(Ok(()), map.try_push_many(0, 43));
489        assert_eq!(Some(&9), map.max_index());
490    }
491
492    #[test]
493    fn close_to_limit() {
494        let mut map = PrefixSumVec::<u32, u32>::new();
495        assert_eq!(Ok(()), map.try_push_many(0xFFFF_FFFE, 42));
496        // we added values 0..=0xFFFF_FFFD
497        assert_eq!(Some(&42), map.get(&0xFFFF_FFFD));
498        assert_eq!(None, map.get(&0xFFFF_FFFE));
499
500        assert_eq!(Err(TryPushError::Overflow), map.try_push_many(100, 93));
501        // overflowing does not change the map
502        assert_eq!(Some(&42), map.get(&0xFFFF_FFFD));
503        assert_eq!(None, map.get(&0xFFFF_FFFE));
504
505        assert_eq!(Ok(()), map.try_push_many(1, 322));
506        // we added value at index 0xFFFF_FFFE (which is the 0xFFFF_FFFFth value)
507        assert_eq!(Some(&42), map.get(&0xFFFF_FFFD));
508        assert_eq!(Some(&322), map.get(&0xFFFF_FFFE));
509        assert_eq!(None, map.get(&0xFFFF_FFFF));
510
511        assert_eq!(Err(TryPushError::Overflow), map.try_push_many(2, 1234));
512        // can't add that much more stuff...
513        assert_eq!(Some(&42), map.get(&0xFFFF_FFFD));
514        assert_eq!(Some(&322), map.get(&0xFFFF_FFFE));
515        assert_eq!(Some(&0xFFFF_FFFE), map.max_index());
516        assert_eq!(None, map.get(&0xFFFF_FFFF));
517
518        assert_eq!(Ok(()), map.try_push_many(1, 1234));
519        // but we can add just one more value still.
520        assert_eq!(Some(&42), map.get(&0xFFFF_FFFD));
521        assert_eq!(Some(&322), map.get(&0xFFFF_FFFE));
522        assert_eq!(Some(&0xFFFF_FFFF), map.max_index());
523        assert_eq!(Some(&1234), map.get(&0xFFFF_FFFF));
524
525        assert_eq!(Ok(()), map.try_push_many(0, 12345));
526        // no more capacity.
527        assert_eq!(Err(TryPushError::Overflow), map.try_push_many(1, 12345));
528    }
529
530    #[test]
531    fn try_push_more() {
532        let mut map = PrefixSumVec::<u32, u32>::new();
533        assert_eq!(Ok(()), map.try_push_more(0, 0));
534        assert_eq!(None, map.max_index());
535        assert_eq!(Ok(()), map.try_push_more(10, 42));
536        assert_eq!(Some(&9), map.max_index());
537        assert_eq!(Ok(()), map.try_push_more(0, 43));
538        assert_eq!(Some(&9), map.max_index());
539        assert_eq!(Ok(()), map.try_push_more(10, 42));
540        assert_eq!(Some(&19), map.max_index());
541        assert_eq!(Ok(()), map.try_push_more(10, 44));
542        assert_eq!(Some(&29), map.max_index());
543        let representation = map.into_iter().map(|(&a, &b)| (a, b)).collect::<Vec<_>>();
544        assert_eq!(vec![(19, 42), (29, 44)], representation);
545    }
546}