stable_vec/core/option.rs
1use std::{
2 prelude::v1::*,
3 fmt,
4 hint::unreachable_unchecked,
5 ptr,
6};
7
8use super::Core;
9
10/// A `Core` implementation that is essentially a `Vec<Option<T>>`.
11///
12/// This implementation is quite different from the `BitVecCore`. This one only
13/// manages one allocation, meaning it does not suffer from the same
14/// disadvantages as `BitVecCore`. This can sometimes lead to better memory
15/// access times due to caching effects. However, this implementation has the
16/// major disadvantage of wasting memory in most cases.
17///
18/// Usually, `size_of::<Option<T>>() > size_of::<T>()`. The difference can be
19/// as high as 4 byte due to alignment. But as only one bit is used to store
20/// the `None/Some` information, all that memory is wasted. A worst case
21/// scenario is something like `Option<u32>`: this is 8 byte large (on most
22/// platforms), meaning that a stable vector with `OptionCore` would use twice
23/// as much memory as a `Vec<u32>`. This is not only wasteful, but has a
24/// negative effect on speed as the information is not very densely packed.
25///
26/// In general, only use this implementation in one of these cases:
27///
28/// - Your `T` is `NonNull`, meaning that `Option<T>` has the same size as `T`.
29/// This is then even more memory efficient than the default core
30/// implementation. Iterating over indices of a stable vector is still slower
31/// in this case as the relevant information is further apart.
32/// - Your `T` is very large, meaning that the amount of wasted memory is small
33/// in comparison.
34///
35/// In both cases, switching the implementation from default to this only makes
36/// sense after you measured that you actually gain performance from it. The
37/// interface of both implementations is exactly the same.
38pub struct OptionCore<T> {
39 /// The data and deleted information in one.
40 ///
41 /// The `len` and `capacity` properties of the vector directly correspond
42 /// to `len` and `cap` properties of the `Core` trait. However, as a `Vec`
43 /// assumes that everything beyond `len` is uninitialized, we have to make
44 /// sure to only interact with it in a particular way.
45 ///
46 /// This vector is in a correct state at all times. This means that the
47 /// vector can simply be dropped and it wouldn't access uninitialized
48 /// values or leak memory.
49 ///
50 /// This implementation has one potentially problematic assumption. When we
51 /// allocate new memory, we initialize all slots to `None`. That way we can
52 /// access all slots with indices < cap. However, the `Vec` docs state:
53 ///
54 /// > Its uninitialized memory is scratch space that it may use however it
55 /// > wants. It will generally just do whatever is most efficient or
56 /// > otherwise easy to implement. [...] There is one case which we will
57 /// > not break, however: using `unsafe` code to write to the excess
58 /// > capacity, and then increasing the length to match, is always valid.
59 ///
60 /// This probably says that we cannot rely on the content of the excess
61 /// capacity memory. However, we are careful how we touch the vector and we
62 /// do not use any methods that would benefit in any way from touching that
63 /// memory. Therefore we assume that all slots with indices > len stay
64 /// initialized to `None`. A couple of methods rely on that assumption.
65 data: Vec<Option<T>>,
66}
67
68impl<T> Core<T> for OptionCore<T> {
69 fn new() -> Self {
70 Self {
71 data: Vec::new(),
72 }
73 }
74
75 fn len(&self) -> usize {
76 self.data.len()
77 }
78
79 fn cap(&self) -> usize {
80 self.data.capacity()
81 }
82
83 unsafe fn set_len(&mut self, new_len: usize) {
84 debug_assert!(new_len <= self.cap());
85 // Other precondition is too expensive to test, even in debug:
86 // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
87
88 // We can just call `set_len` on the vector as both of that method's
89 // preconditions are held:
90 // - "new_len must be less than or equal to capacity()": this is also a
91 // direct precondition of this method.
92 // - "The elements at old_len..new_len must be initialized": all slots
93 // of the vector are always initialized. On allocation, everything is
94 // initialized to `None`. All slots in `old_len..new_len` are always
95 // `None` as stated by the `Core` invariant "`len ≤ i < cap`: slots
96 // with index `i` are always empty".
97 self.data.set_len(new_len)
98 }
99
100 #[inline(never)]
101 #[cold]
102 unsafe fn realloc(&mut self, new_cap: usize) {
103 debug_assert!(new_cap >= self.len());
104 debug_assert!(new_cap <= isize::max_value() as usize);
105
106 // Do different things depending on whether we shrink or grow.
107 let old_cap = self.cap();
108 let initialized_end = if new_cap > old_cap {
109 // ----- We will grow the vector -----
110
111 // We use `reserve_exact` here instead of creating a new vector,
112 // because the former can use `realloc` which is significantly faster
113 // in many cases. See https://stackoverflow.com/a/39562813/2408867
114 let additional = new_cap - self.data.len();
115 self.data.reserve_exact(additional);
116
117 // `Vec` preserves all elements up to its length. Beyond that, the
118 // slots might have become uninitialized by `reserve_exact`. Thus
119 // we need to initialize them again.
120 self.data.len()
121 } else if new_cap < old_cap {
122 // We will shrink the vector. The only tool we have for this is
123 // `shrink_to_fit`. In order to use this, we temporarily have to
124 // set the length of the vector to the new capacity. This is fine:
125 //
126 // - If `new_cap < old_len`, we temporarily remove elements from
127 // the vector. But these are all `None`s as guaranteed by the
128 // preconditions.
129 // - If `new_cap > old_len`, we temporarily add elements to the
130 // vector. But these have all been initialized to `None`.
131 let old_len = self.data.len();
132 self.data.set_len(new_cap);
133 self.data.shrink_to_fit();
134 self.data.set_len(old_len);
135
136 // When calling `shrink_to_fit`, the `Vec` cannot do anything funky
137 // with the elements up to its size (which at that time was
138 // `new_cap`). However, all memory that might exist beyond that
139 // (i.e. if `shrink_to_fit` does not manage to perfectly fit) might
140 // be uninitialized now.
141 new_cap
142 } else {
143 // If the requested capacity is exactly the current one, we do
144 // nothing. We return the current capacity from this expression to
145 // say that all elements are indeed initialized.
146 self.data.capacity()
147 };
148
149 // We now need to potentially initialize some elements to `None`. The
150 // index `initialized_end` tells us the end of the range where all
151 // elements are guaranteed to be initialized. Thus we need to
152 // initialize `initialized_end..self.data.capacity()`.
153 let actual_capacity = self.data.capacity();
154 let mut ptr = self.data.as_mut_ptr().add(initialized_end);
155 let end = self.data.as_mut_ptr().add(actual_capacity);
156 while ptr != end {
157 ptr::write(ptr, None);
158 ptr = ptr.add(1);
159 }
160 }
161
162 /// Assumes that `idx < capacity`
163 unsafe fn has_element_at(&self, idx: usize) -> bool {
164 debug_assert!(idx < self.cap());
165 // Under the precondition that we maintain `None` values,
166 // for all items removed from the array and
167 // all extra capacity not containing items.
168 //
169 // Given that this is maintained during removal of items,
170 // realloc, and during clear. We can get a valid reference
171 // to an option for any idx < self.cap.
172 (&*self.data.as_ptr().add(idx)).is_some()
173 }
174
175 unsafe fn insert_at(&mut self, idx: usize, elem: T) {
176 debug_assert!(idx < self.cap());
177 debug_assert!(self.has_element_at(idx) == false);
178
179 // We use `ptr::write` instead of a simple assignment here for
180 // performance reason. An assignment would try to drop the value on the
181 // left hand side. Since we know from our preconditions that this value
182 // is in fact `None` and we thus never need to drop it, `ptr::write` is
183 // faster.
184 ptr::write(self.data.as_mut_ptr().add(idx), Some(elem));
185 }
186
187 unsafe fn remove_at(&mut self, idx: usize) -> T {
188 debug_assert!(idx < self.cap());
189 debug_assert!(self.has_element_at(idx));
190
191 match self.data.get_unchecked_mut(idx).take() {
192 // The precondition guarantees us that the slot is not empty, thus
193 // we use this unsafe `unreachable_unchecked` to omit the branch.
194 None => unreachable_unchecked(),
195 Some(elem) => elem,
196 }
197 }
198
199 unsafe fn get_unchecked(&self, idx: usize) -> &T {
200 debug_assert!(idx < self.cap());
201 debug_assert!(self.has_element_at(idx));
202
203 match self.data.get_unchecked(idx) {
204 // The precondition guarantees us that the slot is not empty, thus
205 // we use this unsafe `unreachable_unchecked` to omit the branch.
206 None => unreachable_unchecked(),
207 Some(elem) => elem,
208 }
209 }
210
211 unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
212 debug_assert!(idx < self.cap());
213 debug_assert!(self.has_element_at(idx));
214
215 match self.data.get_unchecked_mut(idx) {
216 // The precondition guarantees us that the slot is not empty, thus
217 // we use this unsafe `unreachable_unchecked` to omit the branch.
218 None => unreachable_unchecked(),
219 Some(elem) => elem,
220 }
221 }
222
223 fn clear(&mut self) {
224 // We can assume that all existing elements have an index lower than
225 // `len` (this is one of the invariants of the `Core` interface).
226 // Calling `clear` on the `Vec` would drop all remaining elements and
227 // sets the length to 0. However those values would subsequently become
228 // uninitialized. Thus we call take for each item to leave a
229 // `None` value in it's place and set the length to zero.
230 for item in self.data.iter_mut() {
231 drop(item.take());
232 }
233 unsafe {
234 self.set_len(0);
235 }
236 }
237
238 unsafe fn swap(&mut self, a: usize, b: usize) {
239 // We can't just have two mutable references, so we use `ptr::swap`
240 // instead of `mem::swap`. We do not use the slice's `swap` method as
241 // that performs bound checks.
242 let p = self.data.as_mut_ptr();
243 let pa: *mut _ = p.add(a);
244 let pb: *mut _ = p.add(b);
245 ptr::swap(pa, pb);
246 }
247}
248
249impl<T: Clone> Clone for OptionCore<T> {
250 fn clone(&self) -> Self {
251 // Cloning the vector is safe: the `Vec` implementation won't access
252 // uninitialized memory. However, simply cloning it would be wrong for
253 // two reasons:
254 //
255 // - `Vec` might not retain the same capacity when cloning it. But this
256 // is important for us.
257 // - The memory after its length is probably uninitialized.
258 //
259 // To fix both issues, we create a new vec with the appopriate capacity
260 // and extend it with the values of the other. Placing None objects in
261 // the extra capacity and then set the length.
262 let mut data = Vec::with_capacity(self.data.capacity());
263 data.extend(
264 self.data
265 .iter()
266 .cloned()
267 .chain(std::iter::repeat(None).take(self.data.capacity() - self.data.len())),
268 );
269 debug_assert_eq!(data.len(), self.data.capacity());
270 debug_assert_eq!(data.capacity(), self.data.capacity());
271 unsafe {
272 data.set_len(self.data.len());
273 }
274 Self { data }
275 }
276}
277
278impl<T> Drop for OptionCore<T> {
279 fn drop(&mut self) {
280 // We don't need to anything! The `Vec` will be dropped which is
281 // correct: that will drop all remaining elements but won't touch
282 // non-existing elements. This manual `Drop` impl still exists to
283 // explain this fact and to make sure the automatic `Drop` impl won't
284 // lead to unsafety in the future.
285 }
286}
287
288// This impl is usually not used. `StableVec` has its own impl which doesn't
289// use this one.
290impl<T: fmt::Debug> fmt::Debug for OptionCore<T> {
291 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
292 f.debug_tuple("OptionCore")
293 .field(&self.data)
294 .finish()
295 }
296}