flatcontainer/impls/
deduplicate.rs

1//! Simple deduplication of equal consecutive items.
2
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6use crate::impls::offsets::{OffsetContainer, OffsetOptimized};
7use crate::{Push, Region, ReserveItems};
8
9/// A region to deduplicate consecutive equal items.
10///
11/// # Examples
12///
13/// The following example shows that two inserts can result in the same index.
14/// ```
15/// use flatcontainer::impls::deduplicate::CollapseSequence;
16/// use flatcontainer::{Push, StringRegion};
17/// let mut r = <CollapseSequence<StringRegion>>::default();
18///
19/// assert_eq!(r.push("abc"), r.push("abc"));
20/// ```
21#[derive(Debug)]
22#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
23pub struct CollapseSequence<R: Region> {
24    /// Inner region.
25    inner: R,
26    /// The index of the last pushed item.
27    last_index: Option<R::Index>,
28}
29
30impl<R: Region + Clone> Clone for CollapseSequence<R> {
31    fn clone(&self) -> Self {
32        Self {
33            inner: self.inner.clone(),
34            last_index: self.last_index,
35        }
36    }
37
38    fn clone_from(&mut self, source: &Self) {
39        self.inner.clone_from(&source.inner);
40        self.last_index = source.last_index;
41    }
42}
43
44impl<R: Region> Default for CollapseSequence<R> {
45    fn default() -> Self {
46        Self {
47            inner: R::default(),
48            last_index: None,
49        }
50    }
51}
52
53impl<R: Region> Region for CollapseSequence<R> {
54    type Owned = R::Owned;
55    type ReadItem<'a> = R::ReadItem<'a> where Self: 'a;
56    type Index = R::Index;
57
58    fn merge_regions<'a>(regions: impl Iterator<Item = &'a Self> + Clone) -> Self
59    where
60        Self: 'a,
61    {
62        Self {
63            inner: R::merge_regions(regions.map(|r| &r.inner)),
64            last_index: None,
65        }
66    }
67
68    fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
69        self.inner.index(index)
70    }
71
72    fn reserve_regions<'a, I>(&mut self, regions: I)
73    where
74        Self: 'a,
75        I: Iterator<Item = &'a Self> + Clone,
76    {
77        self.inner.reserve_regions(regions.map(|r| &r.inner));
78    }
79
80    fn clear(&mut self) {
81        self.inner.clear();
82        self.last_index = None;
83    }
84
85    fn heap_size<F: FnMut(usize, usize)>(&self, callback: F) {
86        self.inner.heap_size(callback);
87    }
88
89    fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b>
90    where
91        Self: 'a,
92    {
93        R::reborrow(item)
94    }
95}
96
97impl<R, T> Push<T> for CollapseSequence<R>
98where
99    R: Region + Push<T>,
100    for<'a> T: PartialEq<R::ReadItem<'a>>,
101{
102    fn push(&mut self, item: T) -> <CollapseSequence<R> as Region>::Index {
103        if let Some(last_index) = self.last_index {
104            if item == self.inner.index(last_index) {
105                return last_index;
106            }
107        }
108        let index = self.inner.push(item);
109        self.last_index = Some(index);
110        index
111    }
112}
113
114/// Transform an index of `(usize, usize)` to a sequence of `0..`. Requires the pairs to
115/// be dense, i.e., `(i, j)` is followed by `(j, k)`.
116///
117/// Defers to region `R` for storing items, and uses offset container `O` to
118/// remeber indices. By default, `O` is `Vec<usize>`.
119///
120/// # Examples
121///
122/// The following example shows that two inserts into a copy region have a collapsible index:
123/// ```
124/// use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs};
125/// use flatcontainer::{Push, OwnedRegion, Region, StringRegion};
126/// let mut r = <ConsecutiveOffsetPairs<OwnedRegion<u8>>>::default();
127///
128/// let index: usize = r.push(&b"abc");
129/// assert_eq!(b"abc", r.index(index));
130/// ```
131#[derive(Debug)]
132#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
133pub struct ConsecutiveOffsetPairs<R, O = OffsetOptimized> {
134    /// Wrapped region
135    inner: R,
136    /// Storage for offsets. Always stores element 0.
137    offsets: O,
138    /// The most recent end of the index pair of region `R`.
139    last_index: usize,
140}
141
142impl<R: Clone, O: Clone> Clone for ConsecutiveOffsetPairs<R, O> {
143    fn clone(&self) -> Self {
144        Self {
145            inner: self.inner.clone(),
146            offsets: self.offsets.clone(),
147            last_index: self.last_index,
148        }
149    }
150
151    fn clone_from(&mut self, source: &Self) {
152        self.inner.clone_from(&source.inner);
153        self.offsets.clone_from(&source.offsets);
154        self.last_index = source.last_index;
155    }
156}
157
158impl<R, O> Default for ConsecutiveOffsetPairs<R, O>
159where
160    R: Default,
161    O: OffsetContainer<usize>,
162{
163    #[inline]
164    fn default() -> Self {
165        let mut d = Self {
166            inner: Default::default(),
167            offsets: Default::default(),
168            last_index: 0,
169        };
170        d.offsets.push(0);
171        d
172    }
173}
174
175impl<R, O> Region for ConsecutiveOffsetPairs<R, O>
176where
177    R: Region<Index = (usize, usize)>,
178    O: OffsetContainer<usize>,
179{
180    type Owned = R::Owned;
181    type ReadItem<'a> = R::ReadItem<'a>
182    where
183        Self: 'a;
184
185    type Index = usize;
186
187    #[inline]
188    fn merge_regions<'a>(regions: impl Iterator<Item = &'a Self> + Clone) -> Self
189    where
190        Self: 'a,
191    {
192        let mut offsets = O::default();
193        offsets.push(0);
194        Self {
195            inner: R::merge_regions(regions.map(|r| &r.inner)),
196            offsets,
197            last_index: 0,
198        }
199    }
200
201    #[inline]
202    fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
203        self.inner
204            .index((self.offsets.index(index), self.offsets.index(index + 1)))
205    }
206
207    #[inline]
208    fn reserve_regions<'a, I>(&mut self, regions: I)
209    where
210        Self: 'a,
211        I: Iterator<Item = &'a Self> + Clone,
212    {
213        self.inner.reserve_regions(regions.map(|r| &r.inner));
214    }
215
216    #[inline]
217    fn clear(&mut self) {
218        self.last_index = 0;
219        self.inner.clear();
220        self.offsets.clear();
221        self.offsets.push(0);
222    }
223
224    #[inline]
225    fn heap_size<F: FnMut(usize, usize)>(&self, mut callback: F) {
226        self.offsets.heap_size(&mut callback);
227        self.inner.heap_size(callback);
228    }
229
230    #[inline]
231    fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b>
232    where
233        Self: 'a,
234    {
235        R::reborrow(item)
236    }
237}
238
239impl<R, O, T> Push<T> for ConsecutiveOffsetPairs<R, O>
240where
241    R: Region<Index = (usize, usize)> + Push<T>,
242    O: OffsetContainer<usize>,
243{
244    #[inline]
245    fn push(&mut self, item: T) -> <ConsecutiveOffsetPairs<R, O> as Region>::Index {
246        let index = self.inner.push(item);
247        debug_assert_eq!(index.0, self.last_index);
248        self.last_index = index.1;
249        self.offsets.push(index.1);
250        self.offsets.len() - 2
251    }
252}
253
254impl<R, O, T> ReserveItems<T> for ConsecutiveOffsetPairs<R, O>
255where
256    R: Region<Index = (usize, usize)> + ReserveItems<T>,
257    O: OffsetContainer<usize>,
258{
259    fn reserve_items<I>(&mut self, items: I)
260    where
261        I: Iterator<Item = T> + Clone,
262    {
263        self.inner.reserve_items(items);
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs};
270    use crate::impls::offsets::OffsetOptimized;
271    use crate::{FlatStack, Push, StringRegion};
272
273    #[test]
274    fn test_dedup_flatstack() {
275        let mut fs = FlatStack::<CollapseSequence<StringRegion>>::default();
276
277        fs.copy("abc");
278        fs.copy("abc");
279
280        assert_eq!(2, fs.len());
281
282        println!("{fs:?}");
283    }
284
285    #[test]
286    fn test_dedup_region() {
287        let mut r = CollapseSequence::<StringRegion>::default();
288
289        assert_eq!(r.push("abc"), r.push("abc"));
290
291        println!("{r:?}");
292    }
293
294    #[test]
295    fn test_offset_optimized() {
296        let mut r =
297            CollapseSequence::<ConsecutiveOffsetPairs<StringRegion, OffsetOptimized>>::default();
298
299        for _ in 0..1000 {
300            let _ = r.push("abc");
301        }
302
303        println!("{r:?}");
304    }
305}