insertion_set/
lib.rs

1//! Performs a set of batched insertions on a vector.
2//!
3//! [`Vec::insert(index, value)`][Vec::insert] takes `O(n)` time to move internal memory,
4//! so calling it in a loop can cause quadratic blowup.
5//!
6//! If you batch multiple values together with an [`InsertionSet`]
7//! you can defer the expensive movement of the vector's
8//! memory till the of the loop.
9//!
10//! This code was originally copied from the first prototype compiler for [DuckLogic].
11//! It was inspired by the way the [B3 JIT] handles insertions.
12//!
13//! [DuckLogic]: https://ducklogic.org/
14//! [B3 JIT]: https://webkit.org/blog/5852/introducing-the-b3-jit-compiler/
15#![deny(missing_docs)]
16use std::fmt::Debug;
17use std::iter::{ExactSizeIterator, FromIterator};
18use std::ops::Range;
19
20mod shift;
21
22use self::shift::BulkShifter;
23
24/// A value that is pending insertion
25#[derive(Debug)]
26pub struct Insertion<T> {
27    /// Where in the original vector to insert this value.
28    ///
29    /// This is equivelant to the index argument in `Vec::insert`
30    pub index: usize,
31    /// The value to be inserted
32    pub element: T,
33}
34impl<T> Insertion<T> {
35    /// Create a new Insertion
36    #[inline]
37    pub fn new(index: usize, element: T) -> Self {
38        Insertion { index, element }
39    }
40}
41impl<T> From<(usize, T)> for Insertion<T> {
42    #[inline]
43    fn from(tuple: (usize, T)) -> Self {
44        Insertion::new(tuple.0, tuple.1)
45    }
46}
47
48/// A set of pending insertions on a Vec
49///
50/// When multiple insertions at a
51///
52/// See module documentation for an overview.
53pub struct InsertionSet<T> {
54    insertions: Vec<Insertion<T>>,
55}
56impl<T> InsertionSet<T> {
57    /// Create a new InsertionSet
58    #[inline]
59    pub fn new() -> Self {
60        InsertionSet {
61            insertions: Vec::new(),
62        }
63    }
64    /// Queue the specified insertion
65    ///
66    /// If there are multiple insertions at the same index,
67    /// they will be applied in the order queued.
68    #[inline]
69    pub fn push(&mut self, insertion: Insertion<T>) {
70        self.insertions.push(insertion)
71    }
72    /// Insert the element to be inserted before the given index
73    ///
74    /// If multiple elements are queued to be inserted at the same index,
75    /// they will be applied in the original order queued.
76    #[inline]
77    pub fn insert(&mut self, index: usize, element: T) {
78        self.push(Insertion { index, element })
79    }
80    /// Apply all of the pending insertions against the specified vector,
81    /// returning the result
82    #[inline]
83    pub fn applied(mut self, mut target: Vec<T>) -> Vec<T> {
84        self.apply(&mut target);
85        target
86    }
87    /// The number of insertions that are currently queued
88    #[inline]
89    pub fn desired_insertions(&self) -> usize {
90        self.insertions.len()
91    }
92    /// List the updated locations of all the elements (both original and newly inserted).
93    ///
94    /// See [Self::compute_updated_locations] for details
95    pub fn list_updated_locations(&mut self, target: &[T]) -> Vec<(OriginalLocation, usize)> {
96        let mut result = Vec::with_capacity(target.len() + self.desired_insertions());
97        self.compute_updated_locations(target, |original, updated| {
98            result.push((original, updated))
99        });
100        result.sort_by_key(|&(_, updated)| updated);
101        result
102    }
103    /// Compute the updated locations of all the elements (both original and newly inserted).
104    ///
105    /// Assumes this set of insertions are being applied against the specified slice,
106    /// invoking the callback on each element (even if the location is unchanged).
107    ///
108    /// If any of the insertion indexes are out of bounds of the original vec,
109    /// then this function will panic.
110    pub fn compute_updated_locations<F>(&mut self, target: &[T], mut func: F)
111    where
112        F: FnMut(OriginalLocation, usize),
113    {
114        self.sort();
115        compute_updated_locations(
116            target,
117            self.insertions
118                .iter()
119                .rev()
120                .map(|insertion| insertion.index),
121            |original, updated| {
122                func(
123                    match original {
124                        OriginalLocation::Original(_) => original,
125                        OriginalLocation::Insertion(reversed_index) => {
126                            // Convert the reversed insertion index back to the original one
127                            OriginalLocation::Insertion(
128                                self.insertions.len() - (reversed_index + 1),
129                            )
130                        }
131                    },
132                    updated,
133                )
134            },
135        )
136    }
137    /// Applies all the insertions to the specified target vector.
138    ///
139    /// This reuses the Vector's existing memory if possible,
140    /// but may require a reallocation (due to new values)
141    ///
142    /// The average runtime of this function is `O(n + m)`,
143    /// where `n` is the number of existing elements and `m` is the number of insertions.
144    /// The worst case running time is `O((k * log(k))` where `k = n + m`.
145    pub fn apply(&mut self, target: &mut Vec<T>) {
146        self.sort();
147        apply_bulk_insertions(target, PoppingIter(&mut self.insertions));
148    }
149    fn sort(&mut self) {
150        /*
151         * In many scenarios, the input is mostly sorted.
152         * In those cases, insertion sort may be better than std::slice::sort.
153         * When the array is already mostly sorted, insertion sort has average running time `O(nk)`,
154         * where `k` is the average distance of each element from its proper position.
155         *
156         * Previous versions of this code used insertion sort for this reason,
157         * because we expected the input to be mostly sorted in the compiler.
158         * However, now that we are a library, we want to support as many use cases as possible.
159         * The documentation guarantees linear behavior,
160         * and we don't want to have quadratic blowup if the user has unexpected input.
161         * See issue #1 for details.
162         *
163         * The big advantage of insertion sort over std::slice::sort is that it avoids allocation.
164         * If the allocation in std::sort becomes a significant performance overhead,
165         * we could try and optimistically perform insertion sort,
166         * falling back to stdlib sort on input that is not already mostly-sorted.
167         * Alternatively, we could try reusing memory or offering the user a choice.
168         */
169        self.insertions.sort_by_key(|insertion| insertion.index);
170    }
171}
172impl<T> FromIterator<Insertion<T>> for InsertionSet<T> {
173    #[inline]
174    fn from_iter<I: IntoIterator<Item = Insertion<T>>>(iter: I) -> Self {
175        InsertionSet {
176            insertions: iter.into_iter().collect(),
177        }
178    }
179}
180impl<T> FromIterator<(usize, T)> for InsertionSet<T> {
181    #[inline]
182    fn from_iter<I: IntoIterator<Item = (usize, T)>>(iter: I) -> Self {
183        iter.into_iter().map(Insertion::from).collect()
184    }
185}
186impl<T> Default for InsertionSet<T> {
187    #[inline]
188    fn default() -> Self {
189        InsertionSet::new()
190    }
191}
192
193struct PoppingIter<'a, T: 'a>(&'a mut Vec<T>);
194impl<'a, T> Iterator for PoppingIter<'a, T> {
195    type Item = T;
196
197    #[inline]
198    fn next(&mut self) -> Option<T> {
199        self.0.pop()
200    }
201
202    #[inline]
203    fn size_hint(&self) -> (usize, Option<usize>) {
204        (self.0.len(), Some(self.0.len()))
205    }
206}
207impl<'a, T> ExactSizeIterator for PoppingIter<'a, T> {}
208
209/// Applies all the specified insertions into the target vector.
210///
211/// The insertion iterator must be sorted in reverse order and give the proper size for its `ExactSizeIterator`.
212/// Violating these constraints will never cause undefined behavior,
213/// since internally we use the completely safe `BulkShifter` abstraction.
214pub fn apply_bulk_insertions<T, I>(target: &mut Vec<T>, mut insertions: I)
215where
216    I: Iterator<Item = Insertion<T>>,
217    I: ExactSizeIterator,
218{
219    let mut shifter = BulkShifter::new(target, insertions.len());
220    /*
221     * We perform insertions in reverse order to reduce moving memory,
222     * and ensure that the function is panic safe.
223     *
224     * For example, given the vector
225     * and the InsertionSet `[(0, 0), (1, 2), (1, 3) (4, 9)]`:
226     *
227     * Since the first (working backwards) insertion is `(4, 9)`,
228     * we need to to shift all elements after our first insertion
229     * to the left 4 places:
230     * `[1, 4, 5, 7, undef, undef, undef, undef, 11]`.
231     * The element `11` will never need to be moved again,
232     * since we've already made room for all future insertions.
233     *
234     * Next, we perform our first insertion (4, 9) at the last `undef` element:
235     * `[1, 4, 5, 7, undef, undef, undef, 9, 11]`.
236     * We only have 3 insertions left to perform,
237     * so all future shifts will only need to move over two.
238     * Then, we handle the group of insertions `[(1, 2), [(1, 3)]`,
239     * and shift all elements past index 1 to the left 3 spaces:
240     * [1, undef, undef, undef, 4, 5, 7, 9, 11].
241     * Then we perform our desired insertions at index 1:
242     * [1, undef, 2, 3, 4, 9, 11].
243     * Finally, we perform the same process for the final insertion (0, 0),
244     * resulting in the desired result: [0, 1, 2, 3, 4, 9, 11].
245     */
246    while !shifter.is_finished() {
247        let Insertion { index, element } = insertions.next().expect("Expected more insertions!");
248        shifter.shift_original(index);
249        shifter.push_shifted(element);
250    }
251    shifter.finish();
252    assert_eq!(insertions.len(), 0, "Unexpected insertions");
253}
254
255/// The original location of an element (before a set of insertions are applied)
256#[derive(Copy, Clone, Debug, Eq, PartialEq)]
257pub enum OriginalLocation {
258    /// The element was a queued insertion with the specified index
259    Insertion(usize),
260    /// The element was originally part of the vector
261    Original(usize),
262}
263
264/// Compute the updated locations of all elements (original + inserted).
265///
266/// See [InsertionSet::compute_updated_locations] for details
267pub fn compute_updated_locations<T, I, F>(target: &[T], mut insertions: I, mut updated: F)
268where
269    I: Iterator<Item = usize>,
270    I: ExactSizeIterator,
271    F: FnMut(OriginalLocation, usize),
272{
273    // This mirrors `apply_bulk_insertions` without actually shifting memory
274    let mut original_len = target.len();
275    let shifted_end = original_len + insertions.len();
276    let mut shifted_start = shifted_end;
277    let mut insertion_id = 0;
278    while original_len != shifted_start {
279        let insertion_index = insertions.next().expect("Expected more insertions!");
280        assert!(
281            insertion_index <= original_len,
282            "Invalid insertion index {} > len {}",
283            insertion_index,
284            original_len
285        );
286        let moved_memory = original_len - insertion_index;
287        if moved_memory > 0 {
288            assert!(
289                shifted_start >= moved_memory && insertion_index <= shifted_start - moved_memory
290            );
291            update_range(
292                insertion_index..original_len,
293                shifted_start - moved_memory,
294                &mut updated,
295            );
296            shifted_start -= moved_memory;
297            original_len = insertion_index;
298        }
299        assert!(shifted_start > original_len);
300        shifted_start -= 1;
301        updated(OriginalLocation::Insertion(insertion_id), shifted_start);
302        insertion_id += 1;
303    }
304    for original_index in 0..original_len {
305        updated(OriginalLocation::Original(original_index), original_index);
306    }
307    assert_eq!(insertions.len(), 0, "Unexpected insertions");
308}
309#[inline]
310fn update_range<F: FnMut(OriginalLocation, usize)>(
311    original: Range<usize>,
312    updated_start: usize,
313    func: &mut F,
314) {
315    let mut updated = updated_start;
316    for original_index in original {
317        func(OriginalLocation::Original(original_index), updated);
318        updated += 1;
319    }
320}
321
322#[cfg(test)]
323mod test {
324    use super::*;
325    #[test]
326    fn basic() {
327        /*
328         * For example, given the vector `[1, 4, 5, 7, 11]`
329         * and the InsertionSet `[(0, 0), (1, 2), (1, 3) (4, 9)]`:
330         */
331        let vector = vec![1, 4, 5, 7, 11];
332        let insertions = [(0, 0), (1, 2), (1, 3), (4, 9)]
333            .iter()
334            .cloned()
335            .collect::<InsertionSet<u32>>();
336        assert_eq!(insertions.applied(vector), vec![0, 1, 2, 3, 4, 5, 7, 9, 11]);
337    }
338    #[test]
339    fn updated_locations() {
340        /*
341         * For example, given the vector `[1, 4, 5, 7, 11]`
342         * and the InsertionSet `[(0, 0), (1, 2), (1, 3) (4, 9)]`:
343         */
344        let vector = vec![1, 4, 5, 7, 11];
345        let mut insertions = [(0, 0), (1, 2), (1, 3), (4, 9)]
346            .iter()
347            .cloned()
348            .collect::<InsertionSet<u32>>();
349        assert_eq!(
350            insertions.list_updated_locations(&vector),
351            vec![
352                (OriginalLocation::Insertion(0), 0),
353                (OriginalLocation::Original(0), 1),
354                (OriginalLocation::Insertion(1), 2),
355                (OriginalLocation::Insertion(2), 3),
356                (OriginalLocation::Original(1), 4),
357                (OriginalLocation::Original(2), 5),
358                (OriginalLocation::Original(3), 6),
359                (OriginalLocation::Insertion(3), 7),
360                (OriginalLocation::Original(4), 8),
361            ]
362        );
363    }
364    #[test]
365    fn empty_updated_locations() {
366        let vector = vec![1, 4, 5, 7, 11];
367        assert_eq!(
368            InsertionSet::new().list_updated_locations(&vector),
369            vec![
370                (OriginalLocation::Original(0), 0),
371                (OriginalLocation::Original(1), 1),
372                (OriginalLocation::Original(2), 2),
373                (OriginalLocation::Original(3), 3),
374                (OriginalLocation::Original(4), 4),
375            ]
376        );
377    }
378}