stable_vec/core/
option.rs

1use std::{
2    prelude::v1::*,
3    fmt,
4    hint::unreachable_unchecked,
5    ptr,
6};
7
8use super::Core;
9
10/// A `Core` implementation that is essentially a `Vec<Option<T>>`.
11///
12/// This implementation is quite different from the `BitVecCore`. This one only
13/// manages one allocation, meaning it does not suffer from the same
14/// disadvantages as `BitVecCore`. This can sometimes lead to better memory
15/// access times due to caching effects. However, this implementation has the
16/// major disadvantage of wasting memory in most cases.
17///
18/// Usually, `size_of::<Option<T>>() > size_of::<T>()`. The difference can be
19/// as high as 4 byte due to alignment. But as only one bit is used to store
20/// the `None/Some` information, all that memory is wasted. A worst case
21/// scenario is something like `Option<u32>`: this is 8 byte large (on most
22/// platforms), meaning that a stable vector with `OptionCore` would use twice
23/// as much memory as a `Vec<u32>`. This is not only wasteful, but has a
24/// negative effect on speed as the information is not very densely packed.
25///
26/// In general, only use this implementation in one of these cases:
27///
28/// - Your `T` is `NonNull`, meaning that `Option<T>` has the same size as `T`.
29///   This is then even more memory efficient than the default core
30///   implementation. Iterating over indices of a stable vector is still slower
31///   in this case as the relevant information is further apart.
32/// - Your `T` is very large, meaning that the amount of wasted memory is small
33///   in comparison.
34///
35/// In both cases, switching the implementation from default to this only makes
36/// sense after you measured that you actually gain performance from it. The
37/// interface of both implementations is exactly the same.
38pub struct OptionCore<T> {
39    /// The data and deleted information in one.
40    ///
41    /// The `len` and `capacity` properties of the vector directly correspond
42    /// to `len` and `cap` properties of the `Core` trait. However, as a `Vec`
43    /// assumes that everything beyond `len` is uninitialized, we have to make
44    /// sure to only interact with it in a particular way.
45    ///
46    /// This vector is in a correct state at all times. This means that the
47    /// vector can simply be dropped and it wouldn't access uninitialized
48    /// values or leak memory.
49    ///
50    /// This implementation has one potentially problematic assumption. When we
51    /// allocate new memory, we initialize all slots to `None`. That way we can
52    /// access all slots with indices < cap. However, the `Vec` docs state:
53    ///
54    /// > Its uninitialized memory is scratch space that it may use however it
55    /// > wants. It will generally just do whatever is most efficient or
56    /// > otherwise easy to implement. [...] There is one case which we will
57    /// > not break, however: using `unsafe` code to write to the excess
58    /// > capacity, and then increasing the length to match, is always valid.
59    ///
60    /// This probably says that we cannot rely on the content of the excess
61    /// capacity memory. However, we are careful how we touch the vector and we
62    /// do not use any methods that would benefit in any way from touching that
63    /// memory. Therefore we assume that all slots with indices > len stay
64    /// initialized to `None`. A couple of methods rely on that assumption.
65    data: Vec<Option<T>>,
66}
67
68impl<T> Core<T> for OptionCore<T> {
69    fn new() -> Self {
70        Self {
71            data: Vec::new(),
72        }
73    }
74
75    fn len(&self) -> usize {
76        self.data.len()
77    }
78
79    fn cap(&self) -> usize {
80        self.data.capacity()
81    }
82
83    unsafe fn set_len(&mut self, new_len: usize) {
84        debug_assert!(new_len <= self.cap());
85        // Other precondition is too expensive to test, even in debug:
86        // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
87
88        // We can just call `set_len` on the vector as both of that method's
89        // preconditions are held:
90        // - "new_len must be less than or equal to capacity()": this is also a
91        //   direct precondition of this method.
92        // - "The elements at old_len..new_len must be initialized": all slots
93        //   of the vector are always initialized. On allocation, everything is
94        //   initialized to `None`. All slots in `old_len..new_len` are always
95        //   `None` as stated by the `Core` invariant "`len ≤ i < cap`: slots
96        //   with index `i` are always empty".
97        self.data.set_len(new_len)
98    }
99
100    #[inline(never)]
101    #[cold]
102    unsafe fn realloc(&mut self, new_cap: usize) {
103        debug_assert!(new_cap >= self.len());
104        debug_assert!(new_cap <= isize::max_value() as usize);
105
106        // Do different things depending on whether we shrink or grow.
107        let old_cap = self.cap();
108        let initialized_end = if new_cap > old_cap {
109            // ----- We will grow the vector -----
110
111            // We use `reserve_exact` here instead of creating a new vector,
112            // because the former can use `realloc` which is significantly faster
113            // in many cases. See https://stackoverflow.com/a/39562813/2408867
114            let additional = new_cap - self.data.len();
115            self.data.reserve_exact(additional);
116
117            // `Vec` preserves all elements up to its length. Beyond that, the
118            // slots might have become uninitialized by `reserve_exact`. Thus
119            // we need to initialize them again.
120            self.data.len()
121        } else if new_cap < old_cap {
122            // We will shrink the vector. The only tool we have for this is
123            // `shrink_to_fit`. In order to use this, we temporarily have to
124            // set the length of the vector to the new capacity. This is fine:
125            //
126            // - If `new_cap < old_len`, we temporarily remove elements from
127            //   the vector. But these are all `None`s as guaranteed by the
128            //   preconditions.
129            // - If `new_cap > old_len`, we temporarily add elements to the
130            //   vector. But these have all been initialized to `None`.
131            let old_len = self.data.len();
132            self.data.set_len(new_cap);
133            self.data.shrink_to_fit();
134            self.data.set_len(old_len);
135
136            // When calling `shrink_to_fit`, the `Vec` cannot do anything funky
137            // with the elements up to its size (which at that time was
138            // `new_cap`). However, all memory that might exist beyond that
139            // (i.e. if `shrink_to_fit` does not manage to perfectly fit) might
140            // be uninitialized now.
141            new_cap
142        } else {
143            // If the requested capacity is exactly the current one, we do
144            // nothing. We return the current capacity from this expression to
145            // say that all elements are indeed initialized.
146            self.data.capacity()
147        };
148
149        // We now need to potentially initialize some elements to `None`. The
150        // index `initialized_end` tells us the end of the range where all
151        // elements are guaranteed to be initialized. Thus we need to
152        // initialize `initialized_end..self.data.capacity()`.
153        let actual_capacity = self.data.capacity();
154        let mut ptr = self.data.as_mut_ptr().add(initialized_end);
155        let end = self.data.as_mut_ptr().add(actual_capacity);
156        while ptr != end {
157            ptr::write(ptr, None);
158            ptr = ptr.add(1);
159        }
160    }
161
162    /// Assumes that `idx < capacity`
163    unsafe fn has_element_at(&self, idx: usize) -> bool {
164        debug_assert!(idx < self.cap());
165        // Under the precondition that we maintain `None` values,
166        // for all items removed from the array and
167        // all extra capacity not containing items.
168        //
169        // Given that this is maintained during removal of items,
170        // realloc, and during clear. We can get a valid reference
171        // to an option for any idx < self.cap.
172        (&*self.data.as_ptr().add(idx)).is_some()
173    }
174
175    unsafe fn insert_at(&mut self, idx: usize, elem: T) {
176        debug_assert!(idx < self.cap());
177        debug_assert!(self.has_element_at(idx) == false);
178
179        // We use `ptr::write` instead of a simple assignment here for
180        // performance reason. An assignment would try to drop the value on the
181        // left hand side. Since we know from our preconditions that this value
182        // is in fact `None` and we thus never need to drop it, `ptr::write` is
183        // faster.
184        ptr::write(self.data.as_mut_ptr().add(idx), Some(elem));
185    }
186
187    unsafe fn remove_at(&mut self, idx: usize) -> T {
188        debug_assert!(idx < self.cap());
189        debug_assert!(self.has_element_at(idx));
190
191        match self.data.get_unchecked_mut(idx).take() {
192            // The precondition guarantees us that the slot is not empty, thus
193            // we use this unsafe `unreachable_unchecked` to omit the branch.
194            None => unreachable_unchecked(),
195            Some(elem) => elem,
196        }
197    }
198
199    unsafe fn get_unchecked(&self, idx: usize) -> &T {
200        debug_assert!(idx < self.cap());
201        debug_assert!(self.has_element_at(idx));
202
203        match self.data.get_unchecked(idx) {
204            // The precondition guarantees us that the slot is not empty, thus
205            // we use this unsafe `unreachable_unchecked` to omit the branch.
206            None => unreachable_unchecked(),
207            Some(elem) => elem,
208        }
209    }
210
211    unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
212        debug_assert!(idx < self.cap());
213        debug_assert!(self.has_element_at(idx));
214
215        match self.data.get_unchecked_mut(idx) {
216            // The precondition guarantees us that the slot is not empty, thus
217            // we use this unsafe `unreachable_unchecked` to omit the branch.
218            None => unreachable_unchecked(),
219            Some(elem) => elem,
220        }
221    }
222
223    fn clear(&mut self) {
224        // We can assume that all existing elements have an index lower than
225        // `len` (this is one of the invariants of the `Core` interface).
226        // Calling `clear` on the `Vec` would drop all remaining elements and
227        // sets the length to 0. However those values would subsequently become
228        // uninitialized. Thus we call take for each item to leave a
229        // `None` value in it's place and set the length to zero.
230        for item in self.data.iter_mut() {
231            drop(item.take());
232        }
233        unsafe {
234            self.set_len(0);
235        }
236    }
237
238    unsafe fn swap(&mut self, a: usize, b: usize) {
239        // We can't just have two mutable references, so we use `ptr::swap`
240        // instead of `mem::swap`. We do not use the slice's `swap` method as
241        // that performs bound checks.
242        let p = self.data.as_mut_ptr();
243        let pa: *mut _ = p.add(a);
244        let pb: *mut _ = p.add(b);
245        ptr::swap(pa, pb);
246    }
247}
248
249impl<T: Clone> Clone for OptionCore<T> {
250    fn clone(&self) -> Self {
251        // Cloning the vector is safe: the `Vec` implementation won't access
252        // uninitialized memory. However, simply cloning it would be wrong for
253        // two reasons:
254        //
255        // - `Vec` might not retain the same capacity when cloning it. But this
256        //   is important for us.
257        // - The memory after its length is probably uninitialized.
258        //
259        // To fix both issues, we create a new vec with the appopriate capacity
260        // and extend it with the values of the other. Placing None objects in
261        // the extra capacity and then set the length.
262        let mut data = Vec::with_capacity(self.data.capacity());
263        data.extend(
264            self.data
265                .iter()
266                .cloned()
267                .chain(std::iter::repeat(None).take(self.data.capacity() - self.data.len())),
268        );
269        debug_assert_eq!(data.len(), self.data.capacity());
270        debug_assert_eq!(data.capacity(), self.data.capacity());
271        unsafe {
272            data.set_len(self.data.len());
273        }
274        Self { data }
275    }
276}
277
278impl<T> Drop for OptionCore<T> {
279    fn drop(&mut self) {
280        // We don't need to anything! The `Vec` will be dropped which is
281        // correct: that will drop all remaining elements but won't touch
282        // non-existing elements. This manual `Drop` impl still exists to
283        // explain this fact and to make sure the automatic `Drop` impl won't
284        // lead to unsafety in the future.
285    }
286}
287
288// This impl is usually not used. `StableVec` has its own impl which doesn't
289// use this one.
290impl<T: fmt::Debug> fmt::Debug for OptionCore<T> {
291    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
292        f.debug_tuple("OptionCore")
293            .field(&self.data)
294            .finish()
295    }
296}