Skip to main content

stable_vec/core/
option.rs

1use ::core::{
2    fmt,
3    hint::unreachable_unchecked,
4    ptr,
5};
6
7use alloc::vec::Vec;
8
9use super::Core;
10
11/// A `Core` implementation that is essentially a `Vec<Option<T>>`.
12///
13/// This implementation is quite different from the `BitVecCore`. This one only
14/// manages one allocation, meaning it does not suffer from the same
15/// disadvantages as `BitVecCore`. This can sometimes lead to better memory
16/// access times due to caching effects. However, this implementation has the
17/// major disadvantage of wasting memory in most cases.
18///
19/// Usually, `size_of::<Option<T>>() > size_of::<T>()`. The difference can be
20/// as high as 4 byte due to alignment. But as only one bit is used to store
21/// the `None/Some` information, all that memory is wasted. A worst case
22/// scenario is something like `Option<u32>`: this is 8 byte large (on most
23/// platforms), meaning that a stable vector with `OptionCore` would use twice
24/// as much memory as a `Vec<u32>`. This is not only wasteful, but has a
25/// negative effect on speed as the information is not very densely packed.
26///
27/// In general, only use this implementation in one of these cases:
28///
29/// - Your `T` is `NonNull`, meaning that `Option<T>` has the same size as `T`.
30///   This is then even more memory efficient than the default core
31///   implementation. Iterating over indices of a stable vector is still slower
32///   in this case as the relevant information is further apart.
33/// - Your `T` is very large, meaning that the amount of wasted memory is small
34///   in comparison.
35///
36/// In both cases, switching the implementation from default to this only makes
37/// sense after you measured that you actually gain performance from it. The
38/// interface of both implementations is exactly the same.
39pub struct OptionCore<T> {
40    /// The data and deleted information in one.
41    ///
42    /// The `len` and `capacity` properties of the vector directly correspond
43    /// to `len` and `cap` properties of the `Core` trait. However, as a `Vec`
44    /// assumes that everything beyond `len` is uninitialized, we have to make
45    /// sure to only interact with it in a particular way.
46    ///
47    /// This vector is in a correct state at all times. This means that the
48    /// vector can simply be dropped and it wouldn't access uninitialized
49    /// values or leak memory.
50    ///
51    /// This implementation has one potentially problematic assumption. When we
52    /// allocate new memory, we initialize all slots to `None`. That way we can
53    /// access all slots with indices < cap. However, the `Vec` docs state:
54    ///
55    /// > Its uninitialized memory is scratch space that it may use however it
56    /// > wants. It will generally just do whatever is most efficient or
57    /// > otherwise easy to implement. [...] There is one case which we will
58    /// > not break, however: using `unsafe` code to write to the excess
59    /// > capacity, and then increasing the length to match, is always valid.
60    ///
61    /// This probably says that we cannot rely on the content of the excess
62    /// capacity memory. However, we are careful how we touch the vector and we
63    /// do not use any methods that would benefit in any way from touching that
64    /// memory. Therefore we assume that all slots with indices > len stay
65    /// initialized to `None`. A couple of methods rely on that assumption.
66    data: Vec<Option<T>>,
67}
68
69impl<T> Core<T> for OptionCore<T> {
70    fn new() -> Self {
71        Self {
72            data: Vec::new(),
73        }
74    }
75
76    fn len(&self) -> usize {
77        self.data.len()
78    }
79
80    fn cap(&self) -> usize {
81        self.data.capacity()
82    }
83
84    unsafe fn set_len(&mut self, new_len: usize) {
85        debug_assert!(new_len <= self.cap());
86        // Other precondition is too expensive to test, even in debug:
87        // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
88
89        // We can just call `set_len` on the vector as both of that method's
90        // preconditions are held:
91        // - "new_len must be less than or equal to capacity()": this is also a
92        //   direct precondition of this method.
93        // - "The elements at old_len..new_len must be initialized": all slots
94        //   of the vector are always initialized. On allocation, everything is
95        //   initialized to `None`. All slots in `old_len..new_len` are always
96        //   `None` as stated by the `Core` invariant "`len ≤ i < cap`: slots
97        //   with index `i` are always empty".
98        self.data.set_len(new_len)
99    }
100
101    #[inline(never)]
102    #[cold]
103    unsafe fn realloc(&mut self, new_cap: usize) {
104        debug_assert!(new_cap >= self.len());
105        debug_assert!(new_cap <= isize::max_value() as usize);
106
107        // Do different things depending on whether we shrink or grow.
108        let old_cap = self.cap();
109        let initialized_end = if new_cap > old_cap {
110            // ----- We will grow the vector -----
111
112            // We use `reserve_exact` here instead of creating a new vector,
113            // because the former can use `realloc` which is significantly faster
114            // in many cases. See https://stackoverflow.com/a/39562813/2408867
115            let additional = new_cap - self.data.len();
116            self.data.reserve_exact(additional);
117
118            // `Vec` preserves all elements up to its length. Beyond that, the
119            // slots might have become uninitialized by `reserve_exact`. Thus
120            // we need to initialize them again.
121            self.data.len()
122        } else if new_cap < old_cap {
123            // We will shrink the vector. The only tool we have for this is
124            // `shrink_to_fit`. In order to use this, we temporarily have to
125            // set the length of the vector to the new capacity. This is fine:
126            //
127            // - If `new_cap < old_len`, we temporarily remove elements from
128            //   the vector. But these are all `None`s as guaranteed by the
129            //   preconditions.
130            // - If `new_cap > old_len`, we temporarily add elements to the
131            //   vector. But these have all been initialized to `None`.
132            let old_len = self.data.len();
133            self.data.set_len(new_cap);
134            self.data.shrink_to_fit();
135            self.data.set_len(old_len);
136
137            // When calling `shrink_to_fit`, the `Vec` cannot do anything funky
138            // with the elements up to its size (which at that time was
139            // `new_cap`). However, all memory that might exist beyond that
140            // (i.e. if `shrink_to_fit` does not manage to perfectly fit) might
141            // be uninitialized now.
142            new_cap
143        } else {
144            // If the requested capacity is exactly the current one, we do
145            // nothing. We return the current capacity from this expression to
146            // say that all elements are indeed initialized.
147            self.data.capacity()
148        };
149
150        // We now need to potentially initialize some elements to `None`. The
151        // index `initialized_end` tells us the end of the range where all
152        // elements are guaranteed to be initialized. Thus we need to
153        // initialize `initialized_end..self.data.capacity()`.
154        let actual_capacity = self.data.capacity();
155        let mut ptr = self.data.as_mut_ptr().add(initialized_end);
156        let end = self.data.as_mut_ptr().add(actual_capacity);
157        while ptr != end {
158            ptr::write(ptr, None);
159            ptr = ptr.add(1);
160        }
161    }
162
163    /// Assumes that `idx < capacity`
164    unsafe fn has_element_at(&self, idx: usize) -> bool {
165        debug_assert!(idx < self.cap());
166        // Under the precondition that we maintain `None` values,
167        // for all items removed from the array and
168        // all extra capacity not containing items.
169        //
170        // Given that this is maintained during removal of items,
171        // realloc, and during clear. We can get a valid reference
172        // to an option for any idx < self.cap.
173        (&*self.data.as_ptr().add(idx)).is_some()
174    }
175
176    unsafe fn insert_at(&mut self, idx: usize, elem: T) {
177        debug_assert!(idx < self.cap());
178        debug_assert!(self.has_element_at(idx) == false);
179
180        // We use `ptr::write` instead of a simple assignment here for
181        // performance reason. An assignment would try to drop the value on the
182        // left hand side. Since we know from our preconditions that this value
183        // is in fact `None` and we thus never need to drop it, `ptr::write` is
184        // faster.
185        ptr::write(self.data.as_mut_ptr().add(idx), Some(elem));
186    }
187
188    unsafe fn remove_at(&mut self, idx: usize) -> T {
189        debug_assert!(idx < self.cap());
190        debug_assert!(self.has_element_at(idx));
191
192        match self.data.get_unchecked_mut(idx).take() {
193            // The precondition guarantees us that the slot is not empty, thus
194            // we use this unsafe `unreachable_unchecked` to omit the branch.
195            None => unreachable_unchecked(),
196            Some(elem) => elem,
197        }
198    }
199
200    unsafe fn get_unchecked(&self, idx: usize) -> &T {
201        debug_assert!(idx < self.cap());
202        debug_assert!(self.has_element_at(idx));
203
204        match self.data.get_unchecked(idx) {
205            // The precondition guarantees us that the slot is not empty, thus
206            // we use this unsafe `unreachable_unchecked` to omit the branch.
207            None => unreachable_unchecked(),
208            Some(elem) => elem,
209        }
210    }
211
212    unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
213        debug_assert!(idx < self.cap());
214        debug_assert!(self.has_element_at(idx));
215
216        match self.data.get_unchecked_mut(idx) {
217            // The precondition guarantees us that the slot is not empty, thus
218            // we use this unsafe `unreachable_unchecked` to omit the branch.
219            None => unreachable_unchecked(),
220            Some(elem) => elem,
221        }
222    }
223
224    fn clear(&mut self) {
225        // We can assume that all existing elements have an index lower than
226        // `len` (this is one of the invariants of the `Core` interface).
227        // Calling `clear` on the `Vec` would drop all remaining elements and
228        // sets the length to 0. However those values would subsequently become
229        // uninitialized. Thus we call take for each item to leave a
230        // `None` value in it's place and set the length to zero.
231        for item in self.data.iter_mut() {
232            drop(item.take());
233        }
234        unsafe {
235            self.set_len(0);
236        }
237    }
238
239    unsafe fn swap(&mut self, a: usize, b: usize) {
240        // We can't just have two mutable references, so we use `ptr::swap`
241        // instead of `mem::swap`. We do not use the slice's `swap` method as
242        // that performs bound checks.
243        let p = self.data.as_mut_ptr();
244        let pa: *mut _ = p.add(a);
245        let pb: *mut _ = p.add(b);
246        ptr::swap(pa, pb);
247    }
248}
249
250impl<T: Clone> Clone for OptionCore<T> {
251    fn clone(&self) -> Self {
252        // Cloning the vector is safe: the `Vec` implementation won't access
253        // uninitialized memory. However, simply cloning it would be wrong for
254        // two reasons:
255        //
256        // - `Vec` might not retain the same capacity when cloning it. But this
257        //   is important for us.
258        // - The memory after its length is probably uninitialized.
259        //
260        // To fix both issues, we create a new vec with the appopriate capacity
261        // and extend it with the values of the other. Placing None objects in
262        // the extra capacity and then set the length.
263        let mut data = Vec::with_capacity(self.data.capacity());
264        data.extend(
265            self.data
266                .iter()
267                .cloned()
268                .chain(core::iter::repeat(None).take(self.data.capacity() - self.data.len())),
269        );
270        debug_assert_eq!(data.len(), self.data.capacity());
271        debug_assert_eq!(data.capacity(), self.data.capacity());
272        unsafe {
273            data.set_len(self.data.len());
274        }
275        Self { data }
276    }
277}
278
279impl<T> Drop for OptionCore<T> {
280    fn drop(&mut self) {
281        // We don't need to anything! The `Vec` will be dropped which is
282        // correct: that will drop all remaining elements but won't touch
283        // non-existing elements. This manual `Drop` impl still exists to
284        // explain this fact and to make sure the automatic `Drop` impl won't
285        // lead to unsafety in the future.
286    }
287}
288
289// This impl is usually not used. `StableVec` has its own impl which doesn't
290// use this one.
291impl<T: fmt::Debug> fmt::Debug for OptionCore<T> {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        f.debug_tuple("OptionCore")
294            .field(&self.data)
295            .finish()
296    }
297}