stable_vec/core/option.rs
1use ::core::{
2 fmt,
3 hint::unreachable_unchecked,
4 ptr,
5};
6
7use alloc::vec::Vec;
8
9use super::Core;
10
11/// A `Core` implementation that is essentially a `Vec<Option<T>>`.
12///
13/// This implementation is quite different from the `BitVecCore`. This one only
14/// manages one allocation, meaning it does not suffer from the same
15/// disadvantages as `BitVecCore`. This can sometimes lead to better memory
16/// access times due to caching effects. However, this implementation has the
17/// major disadvantage of wasting memory in most cases.
18///
19/// Usually, `size_of::<Option<T>>() > size_of::<T>()`. The difference can be
20/// as high as 4 byte due to alignment. But as only one bit is used to store
21/// the `None/Some` information, all that memory is wasted. A worst case
22/// scenario is something like `Option<u32>`: this is 8 byte large (on most
23/// platforms), meaning that a stable vector with `OptionCore` would use twice
24/// as much memory as a `Vec<u32>`. This is not only wasteful, but has a
25/// negative effect on speed as the information is not very densely packed.
26///
27/// In general, only use this implementation in one of these cases:
28///
29/// - Your `T` is `NonNull`, meaning that `Option<T>` has the same size as `T`.
30/// This is then even more memory efficient than the default core
31/// implementation. Iterating over indices of a stable vector is still slower
32/// in this case as the relevant information is further apart.
33/// - Your `T` is very large, meaning that the amount of wasted memory is small
34/// in comparison.
35///
36/// In both cases, switching the implementation from default to this only makes
37/// sense after you measured that you actually gain performance from it. The
38/// interface of both implementations is exactly the same.
39pub struct OptionCore<T> {
40 /// The data and deleted information in one.
41 ///
42 /// The `len` and `capacity` properties of the vector directly correspond
43 /// to `len` and `cap` properties of the `Core` trait. However, as a `Vec`
44 /// assumes that everything beyond `len` is uninitialized, we have to make
45 /// sure to only interact with it in a particular way.
46 ///
47 /// This vector is in a correct state at all times. This means that the
48 /// vector can simply be dropped and it wouldn't access uninitialized
49 /// values or leak memory.
50 ///
51 /// This implementation has one potentially problematic assumption. When we
52 /// allocate new memory, we initialize all slots to `None`. That way we can
53 /// access all slots with indices < cap. However, the `Vec` docs state:
54 ///
55 /// > Its uninitialized memory is scratch space that it may use however it
56 /// > wants. It will generally just do whatever is most efficient or
57 /// > otherwise easy to implement. [...] There is one case which we will
58 /// > not break, however: using `unsafe` code to write to the excess
59 /// > capacity, and then increasing the length to match, is always valid.
60 ///
61 /// This probably says that we cannot rely on the content of the excess
62 /// capacity memory. However, we are careful how we touch the vector and we
63 /// do not use any methods that would benefit in any way from touching that
64 /// memory. Therefore we assume that all slots with indices > len stay
65 /// initialized to `None`. A couple of methods rely on that assumption.
66 data: Vec<Option<T>>,
67}
68
69impl<T> Core<T> for OptionCore<T> {
70 fn new() -> Self {
71 Self {
72 data: Vec::new(),
73 }
74 }
75
76 fn len(&self) -> usize {
77 self.data.len()
78 }
79
80 fn cap(&self) -> usize {
81 self.data.capacity()
82 }
83
84 unsafe fn set_len(&mut self, new_len: usize) {
85 debug_assert!(new_len <= self.cap());
86 // Other precondition is too expensive to test, even in debug:
87 // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
88
89 // We can just call `set_len` on the vector as both of that method's
90 // preconditions are held:
91 // - "new_len must be less than or equal to capacity()": this is also a
92 // direct precondition of this method.
93 // - "The elements at old_len..new_len must be initialized": all slots
94 // of the vector are always initialized. On allocation, everything is
95 // initialized to `None`. All slots in `old_len..new_len` are always
96 // `None` as stated by the `Core` invariant "`len ≤ i < cap`: slots
97 // with index `i` are always empty".
98 self.data.set_len(new_len)
99 }
100
101 #[inline(never)]
102 #[cold]
103 unsafe fn realloc(&mut self, new_cap: usize) {
104 debug_assert!(new_cap >= self.len());
105 debug_assert!(new_cap <= isize::max_value() as usize);
106
107 // Do different things depending on whether we shrink or grow.
108 let old_cap = self.cap();
109 let initialized_end = if new_cap > old_cap {
110 // ----- We will grow the vector -----
111
112 // We use `reserve_exact` here instead of creating a new vector,
113 // because the former can use `realloc` which is significantly faster
114 // in many cases. See https://stackoverflow.com/a/39562813/2408867
115 let additional = new_cap - self.data.len();
116 self.data.reserve_exact(additional);
117
118 // `Vec` preserves all elements up to its length. Beyond that, the
119 // slots might have become uninitialized by `reserve_exact`. Thus
120 // we need to initialize them again.
121 self.data.len()
122 } else if new_cap < old_cap {
123 // We will shrink the vector. The only tool we have for this is
124 // `shrink_to_fit`. In order to use this, we temporarily have to
125 // set the length of the vector to the new capacity. This is fine:
126 //
127 // - If `new_cap < old_len`, we temporarily remove elements from
128 // the vector. But these are all `None`s as guaranteed by the
129 // preconditions.
130 // - If `new_cap > old_len`, we temporarily add elements to the
131 // vector. But these have all been initialized to `None`.
132 let old_len = self.data.len();
133 self.data.set_len(new_cap);
134 self.data.shrink_to_fit();
135 self.data.set_len(old_len);
136
137 // When calling `shrink_to_fit`, the `Vec` cannot do anything funky
138 // with the elements up to its size (which at that time was
139 // `new_cap`). However, all memory that might exist beyond that
140 // (i.e. if `shrink_to_fit` does not manage to perfectly fit) might
141 // be uninitialized now.
142 new_cap
143 } else {
144 // If the requested capacity is exactly the current one, we do
145 // nothing. We return the current capacity from this expression to
146 // say that all elements are indeed initialized.
147 self.data.capacity()
148 };
149
150 // We now need to potentially initialize some elements to `None`. The
151 // index `initialized_end` tells us the end of the range where all
152 // elements are guaranteed to be initialized. Thus we need to
153 // initialize `initialized_end..self.data.capacity()`.
154 let actual_capacity = self.data.capacity();
155 let mut ptr = self.data.as_mut_ptr().add(initialized_end);
156 let end = self.data.as_mut_ptr().add(actual_capacity);
157 while ptr != end {
158 ptr::write(ptr, None);
159 ptr = ptr.add(1);
160 }
161 }
162
163 /// Assumes that `idx < capacity`
164 unsafe fn has_element_at(&self, idx: usize) -> bool {
165 debug_assert!(idx < self.cap());
166 // Under the precondition that we maintain `None` values,
167 // for all items removed from the array and
168 // all extra capacity not containing items.
169 //
170 // Given that this is maintained during removal of items,
171 // realloc, and during clear. We can get a valid reference
172 // to an option for any idx < self.cap.
173 (&*self.data.as_ptr().add(idx)).is_some()
174 }
175
176 unsafe fn insert_at(&mut self, idx: usize, elem: T) {
177 debug_assert!(idx < self.cap());
178 debug_assert!(self.has_element_at(idx) == false);
179
180 // We use `ptr::write` instead of a simple assignment here for
181 // performance reason. An assignment would try to drop the value on the
182 // left hand side. Since we know from our preconditions that this value
183 // is in fact `None` and we thus never need to drop it, `ptr::write` is
184 // faster.
185 ptr::write(self.data.as_mut_ptr().add(idx), Some(elem));
186 }
187
188 unsafe fn remove_at(&mut self, idx: usize) -> T {
189 debug_assert!(idx < self.cap());
190 debug_assert!(self.has_element_at(idx));
191
192 match self.data.get_unchecked_mut(idx).take() {
193 // The precondition guarantees us that the slot is not empty, thus
194 // we use this unsafe `unreachable_unchecked` to omit the branch.
195 None => unreachable_unchecked(),
196 Some(elem) => elem,
197 }
198 }
199
200 unsafe fn get_unchecked(&self, idx: usize) -> &T {
201 debug_assert!(idx < self.cap());
202 debug_assert!(self.has_element_at(idx));
203
204 match self.data.get_unchecked(idx) {
205 // The precondition guarantees us that the slot is not empty, thus
206 // we use this unsafe `unreachable_unchecked` to omit the branch.
207 None => unreachable_unchecked(),
208 Some(elem) => elem,
209 }
210 }
211
212 unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
213 debug_assert!(idx < self.cap());
214 debug_assert!(self.has_element_at(idx));
215
216 match self.data.get_unchecked_mut(idx) {
217 // The precondition guarantees us that the slot is not empty, thus
218 // we use this unsafe `unreachable_unchecked` to omit the branch.
219 None => unreachable_unchecked(),
220 Some(elem) => elem,
221 }
222 }
223
224 fn clear(&mut self) {
225 // We can assume that all existing elements have an index lower than
226 // `len` (this is one of the invariants of the `Core` interface).
227 // Calling `clear` on the `Vec` would drop all remaining elements and
228 // sets the length to 0. However those values would subsequently become
229 // uninitialized. Thus we call take for each item to leave a
230 // `None` value in it's place and set the length to zero.
231 for item in self.data.iter_mut() {
232 drop(item.take());
233 }
234 unsafe {
235 self.set_len(0);
236 }
237 }
238
239 unsafe fn swap(&mut self, a: usize, b: usize) {
240 // We can't just have two mutable references, so we use `ptr::swap`
241 // instead of `mem::swap`. We do not use the slice's `swap` method as
242 // that performs bound checks.
243 let p = self.data.as_mut_ptr();
244 let pa: *mut _ = p.add(a);
245 let pb: *mut _ = p.add(b);
246 ptr::swap(pa, pb);
247 }
248}
249
250impl<T: Clone> Clone for OptionCore<T> {
251 fn clone(&self) -> Self {
252 // Cloning the vector is safe: the `Vec` implementation won't access
253 // uninitialized memory. However, simply cloning it would be wrong for
254 // two reasons:
255 //
256 // - `Vec` might not retain the same capacity when cloning it. But this
257 // is important for us.
258 // - The memory after its length is probably uninitialized.
259 //
260 // To fix both issues, we create a new vec with the appopriate capacity
261 // and extend it with the values of the other. Placing None objects in
262 // the extra capacity and then set the length.
263 let mut data = Vec::with_capacity(self.data.capacity());
264 data.extend(
265 self.data
266 .iter()
267 .cloned()
268 .chain(core::iter::repeat(None).take(self.data.capacity() - self.data.len())),
269 );
270 debug_assert_eq!(data.len(), self.data.capacity());
271 debug_assert_eq!(data.capacity(), self.data.capacity());
272 unsafe {
273 data.set_len(self.data.len());
274 }
275 Self { data }
276 }
277}
278
279impl<T> Drop for OptionCore<T> {
280 fn drop(&mut self) {
281 // We don't need to anything! The `Vec` will be dropped which is
282 // correct: that will drop all remaining elements but won't touch
283 // non-existing elements. This manual `Drop` impl still exists to
284 // explain this fact and to make sure the automatic `Drop` impl won't
285 // lead to unsafety in the future.
286 }
287}
288
289// This impl is usually not used. `StableVec` has its own impl which doesn't
290// use this one.
291impl<T: fmt::Debug> fmt::Debug for OptionCore<T> {
292 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293 f.debug_tuple("OptionCore")
294 .field(&self.data)
295 .finish()
296 }
297}