insertion_set/lib.rs
1//! Performs a set of batched insertions on a vector.
2//!
3//! [`Vec::insert(index, value)`][Vec::insert] takes `O(n)` time to move internal memory,
4//! so calling it in a loop can cause quadratic blowup.
5//!
6//! If you batch multiple values together with an [`InsertionSet`]
7//! you can defer the expensive movement of the vector's
8//! memory till the of the loop.
9//!
10//! This code was originally copied from the first prototype compiler for [DuckLogic].
11//! It was inspired by the way the [B3 JIT] handles insertions.
12//!
13//! [DuckLogic]: https://ducklogic.org/
14//! [B3 JIT]: https://webkit.org/blog/5852/introducing-the-b3-jit-compiler/
15#![deny(missing_docs)]
16use std::fmt::Debug;
17use std::iter::{ExactSizeIterator, FromIterator};
18use std::ops::Range;
19
20mod shift;
21
22use self::shift::BulkShifter;
23
24/// A value that is pending insertion
25#[derive(Debug)]
26pub struct Insertion<T> {
27 /// Where in the original vector to insert this value.
28 ///
29 /// This is equivelant to the index argument in `Vec::insert`
30 pub index: usize,
31 /// The value to be inserted
32 pub element: T,
33}
34impl<T> Insertion<T> {
35 /// Create a new Insertion
36 #[inline]
37 pub fn new(index: usize, element: T) -> Self {
38 Insertion { index, element }
39 }
40}
41impl<T> From<(usize, T)> for Insertion<T> {
42 #[inline]
43 fn from(tuple: (usize, T)) -> Self {
44 Insertion::new(tuple.0, tuple.1)
45 }
46}
47
48/// A set of pending insertions on a Vec
49///
50/// When multiple insertions at a
51///
52/// See module documentation for an overview.
53pub struct InsertionSet<T> {
54 insertions: Vec<Insertion<T>>,
55}
56impl<T> InsertionSet<T> {
57 /// Create a new InsertionSet
58 #[inline]
59 pub fn new() -> Self {
60 InsertionSet {
61 insertions: Vec::new(),
62 }
63 }
64 /// Queue the specified insertion
65 ///
66 /// If there are multiple insertions at the same index,
67 /// they will be applied in the order queued.
68 #[inline]
69 pub fn push(&mut self, insertion: Insertion<T>) {
70 self.insertions.push(insertion)
71 }
72 /// Insert the element to be inserted before the given index
73 ///
74 /// If multiple elements are queued to be inserted at the same index,
75 /// they will be applied in the original order queued.
76 #[inline]
77 pub fn insert(&mut self, index: usize, element: T) {
78 self.push(Insertion { index, element })
79 }
80 /// Apply all of the pending insertions against the specified vector,
81 /// returning the result
82 #[inline]
83 pub fn applied(mut self, mut target: Vec<T>) -> Vec<T> {
84 self.apply(&mut target);
85 target
86 }
87 /// The number of insertions that are currently queued
88 #[inline]
89 pub fn desired_insertions(&self) -> usize {
90 self.insertions.len()
91 }
92 /// List the updated locations of all the elements (both original and newly inserted).
93 ///
94 /// See [Self::compute_updated_locations] for details
95 pub fn list_updated_locations(&mut self, target: &[T]) -> Vec<(OriginalLocation, usize)> {
96 let mut result = Vec::with_capacity(target.len() + self.desired_insertions());
97 self.compute_updated_locations(target, |original, updated| {
98 result.push((original, updated))
99 });
100 result.sort_by_key(|&(_, updated)| updated);
101 result
102 }
103 /// Compute the updated locations of all the elements (both original and newly inserted).
104 ///
105 /// Assumes this set of insertions are being applied against the specified slice,
106 /// invoking the callback on each element (even if the location is unchanged).
107 ///
108 /// If any of the insertion indexes are out of bounds of the original vec,
109 /// then this function will panic.
110 pub fn compute_updated_locations<F>(&mut self, target: &[T], mut func: F)
111 where
112 F: FnMut(OriginalLocation, usize),
113 {
114 self.sort();
115 compute_updated_locations(
116 target,
117 self.insertions
118 .iter()
119 .rev()
120 .map(|insertion| insertion.index),
121 |original, updated| {
122 func(
123 match original {
124 OriginalLocation::Original(_) => original,
125 OriginalLocation::Insertion(reversed_index) => {
126 // Convert the reversed insertion index back to the original one
127 OriginalLocation::Insertion(
128 self.insertions.len() - (reversed_index + 1),
129 )
130 }
131 },
132 updated,
133 )
134 },
135 )
136 }
137 /// Applies all the insertions to the specified target vector.
138 ///
139 /// This reuses the Vector's existing memory if possible,
140 /// but may require a reallocation (due to new values)
141 ///
142 /// The average runtime of this function is `O(n + m)`,
143 /// where `n` is the number of existing elements and `m` is the number of insertions.
144 /// The worst case running time is `O((k * log(k))` where `k = n + m`.
145 pub fn apply(&mut self, target: &mut Vec<T>) {
146 self.sort();
147 apply_bulk_insertions(target, PoppingIter(&mut self.insertions));
148 }
149 fn sort(&mut self) {
150 /*
151 * In many scenarios, the input is mostly sorted.
152 * In those cases, insertion sort may be better than std::slice::sort.
153 * When the array is already mostly sorted, insertion sort has average running time `O(nk)`,
154 * where `k` is the average distance of each element from its proper position.
155 *
156 * Previous versions of this code used insertion sort for this reason,
157 * because we expected the input to be mostly sorted in the compiler.
158 * However, now that we are a library, we want to support as many use cases as possible.
159 * The documentation guarantees linear behavior,
160 * and we don't want to have quadratic blowup if the user has unexpected input.
161 * See issue #1 for details.
162 *
163 * The big advantage of insertion sort over std::slice::sort is that it avoids allocation.
164 * If the allocation in std::sort becomes a significant performance overhead,
165 * we could try and optimistically perform insertion sort,
166 * falling back to stdlib sort on input that is not already mostly-sorted.
167 * Alternatively, we could try reusing memory or offering the user a choice.
168 */
169 self.insertions.sort_by_key(|insertion| insertion.index);
170 }
171}
172impl<T> FromIterator<Insertion<T>> for InsertionSet<T> {
173 #[inline]
174 fn from_iter<I: IntoIterator<Item = Insertion<T>>>(iter: I) -> Self {
175 InsertionSet {
176 insertions: iter.into_iter().collect(),
177 }
178 }
179}
180impl<T> FromIterator<(usize, T)> for InsertionSet<T> {
181 #[inline]
182 fn from_iter<I: IntoIterator<Item = (usize, T)>>(iter: I) -> Self {
183 iter.into_iter().map(Insertion::from).collect()
184 }
185}
186impl<T> Default for InsertionSet<T> {
187 #[inline]
188 fn default() -> Self {
189 InsertionSet::new()
190 }
191}
192
193struct PoppingIter<'a, T: 'a>(&'a mut Vec<T>);
194impl<'a, T> Iterator for PoppingIter<'a, T> {
195 type Item = T;
196
197 #[inline]
198 fn next(&mut self) -> Option<T> {
199 self.0.pop()
200 }
201
202 #[inline]
203 fn size_hint(&self) -> (usize, Option<usize>) {
204 (self.0.len(), Some(self.0.len()))
205 }
206}
207impl<'a, T> ExactSizeIterator for PoppingIter<'a, T> {}
208
209/// Applies all the specified insertions into the target vector.
210///
211/// The insertion iterator must be sorted in reverse order and give the proper size for its `ExactSizeIterator`.
212/// Violating these constraints will never cause undefined behavior,
213/// since internally we use the completely safe `BulkShifter` abstraction.
214pub fn apply_bulk_insertions<T, I>(target: &mut Vec<T>, mut insertions: I)
215where
216 I: Iterator<Item = Insertion<T>>,
217 I: ExactSizeIterator,
218{
219 let mut shifter = BulkShifter::new(target, insertions.len());
220 /*
221 * We perform insertions in reverse order to reduce moving memory,
222 * and ensure that the function is panic safe.
223 *
224 * For example, given the vector
225 * and the InsertionSet `[(0, 0), (1, 2), (1, 3) (4, 9)]`:
226 *
227 * Since the first (working backwards) insertion is `(4, 9)`,
228 * we need to to shift all elements after our first insertion
229 * to the left 4 places:
230 * `[1, 4, 5, 7, undef, undef, undef, undef, 11]`.
231 * The element `11` will never need to be moved again,
232 * since we've already made room for all future insertions.
233 *
234 * Next, we perform our first insertion (4, 9) at the last `undef` element:
235 * `[1, 4, 5, 7, undef, undef, undef, 9, 11]`.
236 * We only have 3 insertions left to perform,
237 * so all future shifts will only need to move over two.
238 * Then, we handle the group of insertions `[(1, 2), [(1, 3)]`,
239 * and shift all elements past index 1 to the left 3 spaces:
240 * [1, undef, undef, undef, 4, 5, 7, 9, 11].
241 * Then we perform our desired insertions at index 1:
242 * [1, undef, 2, 3, 4, 9, 11].
243 * Finally, we perform the same process for the final insertion (0, 0),
244 * resulting in the desired result: [0, 1, 2, 3, 4, 9, 11].
245 */
246 while !shifter.is_finished() {
247 let Insertion { index, element } = insertions.next().expect("Expected more insertions!");
248 shifter.shift_original(index);
249 shifter.push_shifted(element);
250 }
251 shifter.finish();
252 assert_eq!(insertions.len(), 0, "Unexpected insertions");
253}
254
255/// The original location of an element (before a set of insertions are applied)
256#[derive(Copy, Clone, Debug, Eq, PartialEq)]
257pub enum OriginalLocation {
258 /// The element was a queued insertion with the specified index
259 Insertion(usize),
260 /// The element was originally part of the vector
261 Original(usize),
262}
263
264/// Compute the updated locations of all elements (original + inserted).
265///
266/// See [InsertionSet::compute_updated_locations] for details
267pub fn compute_updated_locations<T, I, F>(target: &[T], mut insertions: I, mut updated: F)
268where
269 I: Iterator<Item = usize>,
270 I: ExactSizeIterator,
271 F: FnMut(OriginalLocation, usize),
272{
273 // This mirrors `apply_bulk_insertions` without actually shifting memory
274 let mut original_len = target.len();
275 let shifted_end = original_len + insertions.len();
276 let mut shifted_start = shifted_end;
277 let mut insertion_id = 0;
278 while original_len != shifted_start {
279 let insertion_index = insertions.next().expect("Expected more insertions!");
280 assert!(
281 insertion_index <= original_len,
282 "Invalid insertion index {} > len {}",
283 insertion_index,
284 original_len
285 );
286 let moved_memory = original_len - insertion_index;
287 if moved_memory > 0 {
288 assert!(
289 shifted_start >= moved_memory && insertion_index <= shifted_start - moved_memory
290 );
291 update_range(
292 insertion_index..original_len,
293 shifted_start - moved_memory,
294 &mut updated,
295 );
296 shifted_start -= moved_memory;
297 original_len = insertion_index;
298 }
299 assert!(shifted_start > original_len);
300 shifted_start -= 1;
301 updated(OriginalLocation::Insertion(insertion_id), shifted_start);
302 insertion_id += 1;
303 }
304 for original_index in 0..original_len {
305 updated(OriginalLocation::Original(original_index), original_index);
306 }
307 assert_eq!(insertions.len(), 0, "Unexpected insertions");
308}
309#[inline]
310fn update_range<F: FnMut(OriginalLocation, usize)>(
311 original: Range<usize>,
312 updated_start: usize,
313 func: &mut F,
314) {
315 let mut updated = updated_start;
316 for original_index in original {
317 func(OriginalLocation::Original(original_index), updated);
318 updated += 1;
319 }
320}
321
322#[cfg(test)]
323mod test {
324 use super::*;
325 #[test]
326 fn basic() {
327 /*
328 * For example, given the vector `[1, 4, 5, 7, 11]`
329 * and the InsertionSet `[(0, 0), (1, 2), (1, 3) (4, 9)]`:
330 */
331 let vector = vec![1, 4, 5, 7, 11];
332 let insertions = [(0, 0), (1, 2), (1, 3), (4, 9)]
333 .iter()
334 .cloned()
335 .collect::<InsertionSet<u32>>();
336 assert_eq!(insertions.applied(vector), vec![0, 1, 2, 3, 4, 5, 7, 9, 11]);
337 }
338 #[test]
339 fn updated_locations() {
340 /*
341 * For example, given the vector `[1, 4, 5, 7, 11]`
342 * and the InsertionSet `[(0, 0), (1, 2), (1, 3) (4, 9)]`:
343 */
344 let vector = vec![1, 4, 5, 7, 11];
345 let mut insertions = [(0, 0), (1, 2), (1, 3), (4, 9)]
346 .iter()
347 .cloned()
348 .collect::<InsertionSet<u32>>();
349 assert_eq!(
350 insertions.list_updated_locations(&vector),
351 vec![
352 (OriginalLocation::Insertion(0), 0),
353 (OriginalLocation::Original(0), 1),
354 (OriginalLocation::Insertion(1), 2),
355 (OriginalLocation::Insertion(2), 3),
356 (OriginalLocation::Original(1), 4),
357 (OriginalLocation::Original(2), 5),
358 (OriginalLocation::Original(3), 6),
359 (OriginalLocation::Insertion(3), 7),
360 (OriginalLocation::Original(4), 8),
361 ]
362 );
363 }
364 #[test]
365 fn empty_updated_locations() {
366 let vector = vec![1, 4, 5, 7, 11];
367 assert_eq!(
368 InsertionSet::new().list_updated_locations(&vector),
369 vec![
370 (OriginalLocation::Original(0), 0),
371 (OriginalLocation::Original(1), 1),
372 (OriginalLocation::Original(2), 2),
373 (OriginalLocation::Original(3), 3),
374 (OriginalLocation::Original(4), 4),
375 ]
376 );
377 }
378}