stable_vec/core/bitvec.rs
1use std::{
2 alloc::{alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc, Layout},
3 fmt,
4 mem::{align_of, size_of},
5 ptr::{self, NonNull},
6};
7
8use super::Core;
9
10
11/// A `Core` implementation that is conceptually a `BitVec` and a `Vec<T>`.
12///
13/// This is the default core as it has quite a few advantages. For one, it does
14/// not waste memory due to padding. The information about whether a slot is
15/// filled or not is stored in a `BitVec` and thus only takes one bit per slot.
16///
17/// Using a `BitVec` has another important advantage: iterating over the
18/// indices of a stable vector (i.e. without touching the actual data) is very
19/// cache-friendly. This is due to the dense packing of information. It's also
20/// possible to very quickly scan the bit vector in order to find filled/empty
21/// slots.
22///
23/// However, this core implementation has disadvantages, too. It manages two
24/// allocations which means that reallocating (growing or shrinking) has to
25/// perform two allocations with the underlying memory allocator. Potentially
26/// more important is the decrease of cache-friendliness when accessing
27/// elements at random. Because in the worst case, this means that each element
28/// access results in two cache-misses instead of only one.
29///
30/// For most use cases, this is a good choice. That's why it's default.
31pub struct BitVecCore<T> {
32 /// This is the memory that stores the actual slots/elements. If a slot is
33 /// empty, the memory at that index is undefined.
34 elem_ptr: NonNull<T>,
35
36 /// Stores whether or not slots are filled (1) or empty (0). Stores one bit
37 /// per slot. Is stored as `usize` instead of `u8` to potentially improve
38 /// performance when finding a hole or counting the elements.
39 bit_ptr: NonNull<usize>,
40
41 /// The capacity: the length of the `elem_ptr` buffer. Corresponse to the
42 /// `cap` of the `Core` definition.
43 cap: usize,
44
45 /// The `len`: corresponse to the `len` of the `Core` definition.
46 len: usize,
47}
48
49const BITS_PER_USIZE: usize = size_of::<usize>() * 8;
50
51impl<T> BitVecCore<T> {
52 /// Deallocates both pointers, sets them to the same value as `new()` does
53 /// and sets `cap` to 0.
54 ///
55 /// # Formal
56 ///
57 /// **Preconditions**:
58 /// - `self.len == 0`
59 /// - All slots are empty
60 unsafe fn dealloc(&mut self) {
61 if self.cap != 0 {
62 if size_of::<T>() != 0 {
63 dealloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout());
64 }
65
66 dealloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout());
67 self.cap = 0;
68 }
69 }
70
71 /// Returns the layout that was used for the last allocation of `elem_ptr`.
72 /// `self.cap` must not be 0 and `T` must not be a ZST, or else this
73 /// method's behavior is undefined.
74 unsafe fn old_elem_layout(&self) -> Layout {
75 Layout::from_size_align_unchecked(
76 // This can't overflow due to being previously allocated.
77 self.cap * size_of::<T>(),
78 align_of::<T>(),
79 )
80 }
81
82 /// Returns the layout that was used for the last allocation of `bit_ptr`.
83 /// `self.cap` must not be 0 or else this method's behavior is undefined.
84 unsafe fn old_bit_layout(&self) -> Layout {
85 Layout::from_size_align_unchecked(
86 size_of::<usize>() * num_usizes_for(self.cap),
87 align_of::<usize>(),
88 )
89 }
90}
91
92impl<T> Core<T> for BitVecCore<T> {
93 fn new() -> Self {
94 Self {
95 elem_ptr: NonNull::dangling(),
96 bit_ptr: NonNull::dangling(),
97 cap: 0,
98 len: 0,
99 }
100 }
101
102 fn len(&self) -> usize {
103 self.len
104 }
105
106 unsafe fn set_len(&mut self, new_len: usize) {
107 debug_assert!(new_len <= self.cap());
108 // Other precondition is too expensive to test, even in debug:
109 // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
110
111 self.len = new_len;
112
113 // The formal requirements of this method hold:
114 //
115 // **Invariants**:
116 // - *slot data* -> trivially holds, we do not touch that
117 // - `len ≤ cap` -> that's a precondition
118 //
119 // **Postconditons**:
120 // - `self.len() == new_len`: trivially holds
121 }
122
123 fn cap(&self) -> usize {
124 self.cap
125 }
126
127 #[inline(never)]
128 #[cold]
129 unsafe fn realloc(&mut self, new_cap: usize) {
130 debug_assert!(new_cap >= self.len());
131 debug_assert!(new_cap <= isize::max_value() as usize);
132
133 #[inline(never)]
134 #[cold]
135 fn capacity_overflow() -> ! {
136 panic!("capacity overflow in `stable_vec::BitVecCore::realloc` (attempt \
137 to allocate more than `usize::MAX` bytes");
138 }
139
140 // Handle special case
141 if new_cap == 0 {
142 // Due to preconditions, we know that `self.len == 0` and that in
143 // turn tells us that there aren't any filled slots. So we can just
144 // deallocate the memory.
145 self.dealloc();
146 return;
147 }
148
149
150 // ----- (Re)allocate element memory ---------------------------------
151
152 // We only have to allocate if our size are not zero-sized. Else, we
153 // just don't do anything.
154 if size_of::<T>() != 0 {
155 // Get the new number of bytes for the allocation and create the
156 // memory layout.
157 let size = new_cap.checked_mul(size_of::<T>())
158 .unwrap_or_else(|| capacity_overflow());
159 let new_elem_layout = Layout::from_size_align_unchecked(size, align_of::<T>());
160
161 // (Re)allocate memory.
162 let ptr = if self.cap == 0 {
163 alloc(new_elem_layout)
164 } else {
165 realloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout(), size)
166 };
167
168 // If the element allocation failed, we quit the program with an
169 // OOM error.
170 if ptr.is_null() {
171 handle_alloc_error(new_elem_layout);
172 }
173
174 // We already overwrite the pointer here. It is not read/changed
175 // anywhere else in this function.
176 self.elem_ptr = NonNull::new_unchecked(ptr as *mut _);
177 };
178
179
180 // ----- (Re)allocate bitvec memory ----------------------------------
181 {
182 // Get the new number of required bytes for the allocation and
183 // create the memory layout.
184 let size = size_of::<usize>() * num_usizes_for(new_cap);
185 let new_bit_layout = Layout::from_size_align_unchecked(size, align_of::<usize>());
186
187 // (Re)allocate memory.
188 let ptr = if self.cap == 0 {
189 alloc_zeroed(new_bit_layout)
190 } else {
191 realloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout(), size)
192 };
193 let ptr = ptr as *mut usize;
194
195 // If the element allocation failed, we quit the program with an
196 // OOM error.
197 if ptr.is_null() {
198 handle_alloc_error(new_bit_layout);
199 }
200
201 // If we reallocated, the new memory is not necessarily zeroed, so
202 // we need to do it. TODO: if `alloc` offers a `realloc_zeroed`
203 // in the future, we should use that.
204 if self.cap != 0 {
205 let initialized_usizes = num_usizes_for(self.cap);
206 let new_usizes = num_usizes_for(new_cap);
207 if new_usizes > initialized_usizes {
208 ptr::write_bytes(
209 ptr.add(initialized_usizes),
210 0,
211 new_usizes - initialized_usizes,
212 );
213 }
214 }
215
216 self.bit_ptr = NonNull::new_unchecked(ptr as *mut _);
217 }
218
219 self.cap = new_cap;
220
221 // All formal requirements are met now:
222 //
223 // **Invariants**:
224 // - *slot data*: by using `realloc` if `self.cap != 0`, the slot data
225 // (including deleted-flag) was correctly copied.
226 // - `self.len()`: indeed didn't change
227 //
228 // **Postconditons**:
229 // - `self.cap() == new_cap`: trivially holds due to last line.
230 }
231
232 unsafe fn has_element_at(&self, idx: usize) -> bool {
233 debug_assert!(idx < self.cap());
234
235 // The divisions will be turned into shift and 'and'-instructions.
236 let usize_pos = idx / BITS_PER_USIZE;
237 let bit_pos = idx % BITS_PER_USIZE;
238
239 let block = *self.bit_ptr.as_ptr().add(usize_pos);
240 ((block >> bit_pos) & 0b1) != 0
241 }
242
243 unsafe fn insert_at(&mut self, idx: usize, elem: T) {
244 debug_assert!(idx < self.cap());
245 debug_assert!(self.has_element_at(idx) == false);
246
247 // We first write the value and then update the bitvector to avoid
248 // potential double drops if a random panic appears.
249 ptr::write(self.elem_ptr.as_ptr().add(idx), elem);
250
251 let usize_pos = idx / BITS_PER_USIZE;
252 let bit_pos = idx % BITS_PER_USIZE;
253
254 let mask = 1 << bit_pos;
255 *self.bit_ptr.as_ptr().add(usize_pos) |= mask;
256 }
257
258 unsafe fn remove_at(&mut self, idx: usize) -> T {
259 debug_assert!(idx < self.cap());
260 debug_assert!(self.has_element_at(idx));
261
262 // We first mark the value as deleted and then read the value.
263 // Otherwise, a random panic could lead to a double drop.
264 let usize_pos = idx / BITS_PER_USIZE;
265 let bit_pos = idx % BITS_PER_USIZE;
266
267 let mask = !(1 << bit_pos);
268 *self.bit_ptr.as_ptr().add(usize_pos) &= mask;
269
270 ptr::read(self.elem_ptr.as_ptr().add(idx))
271 }
272
273 unsafe fn get_unchecked(&self, idx: usize) -> &T {
274 debug_assert!(idx < self.cap());
275 debug_assert!(self.has_element_at(idx));
276
277 // The preconditions of this function guarantees us that all
278 // preconditions for `add` are met and that we can safely dereference
279 // the pointer.
280 &*self.elem_ptr.as_ptr().add(idx)
281 }
282
283 unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
284 debug_assert!(idx < self.cap());
285 debug_assert!(self.has_element_at(idx));
286
287 // The preconditions of this function guarantees us that all
288 // preconditions for `add` are met and that we can safely dereference
289 // the pointer.
290 &mut *self.elem_ptr.as_ptr().add(idx)
291 }
292
293 fn clear(&mut self) {
294 unsafe {
295 // We can assume that all existing elements have an index lower than
296 // `len` (this is one of the invariants of the `Core` interface).
297 for idx in 0..self.len {
298 if self.has_element_at(idx) {
299 ptr::drop_in_place(self.get_unchecked_mut(idx));
300 }
301 }
302 for bit_idx in 0..num_usizes_for(self.len) {
303 *self.bit_ptr.as_ptr().add(bit_idx) = 0;
304 }
305 self.len = 0;
306 }
307 }
308
309 // TODO: maybe override `{next|prev}_{hole|index}_from` for performance? In
310 // principle we could scan the bitvector very quickly with specialized
311 // instructions. Needs benchmarking.
312
313 unsafe fn swap(&mut self, a: usize, b: usize) {
314 // Swapping the bits is a bit annoying. To avoid branches we first xor
315 // both previous bits.
316 let a_existed = self.has_element_at(a);
317 let b_existed = self.has_element_at(b);
318
319 // `swap_bit` is 0 if both slots were empty of filled, and 1 if only
320 // only one slot was empty. That also means the mask is 0 if both were
321 // empty/filled before, otherwise the mask has one bit set. We xor with
322 // this mask, meaning that we will flip the corresponding bit.
323 let swap_bit = (a_existed ^ b_existed) as usize;
324
325 // For a
326 let usize_pos = a / BITS_PER_USIZE;
327 let bit_pos = a % BITS_PER_USIZE;
328 let mask = swap_bit << bit_pos;
329 *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
330
331 // For b
332 let usize_pos = b / BITS_PER_USIZE;
333 let bit_pos = b % BITS_PER_USIZE;
334 let mask = swap_bit << bit_pos;
335 *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
336
337 // Finally swap the actual elements
338 ptr::swap(
339 self.elem_ptr.as_ptr().add(a),
340 self.elem_ptr.as_ptr().add(b),
341 );
342 }
343}
344
345impl<T> Drop for BitVecCore<T> {
346 fn drop(&mut self) {
347 // Drop all elements
348 self.clear();
349
350 unsafe {
351 // Deallocate the memory. `clear()` sets the length to 0 and drops
352 // all existing elements, so it's fine to call `dealloc`.
353 self.dealloc();
354 }
355 }
356}
357
358impl<T: Clone> Clone for BitVecCore<T> {
359 fn clone(&self) -> Self {
360 let mut out = Self::new();
361
362 if self.cap != 0 {
363 // All of this is scary
364 unsafe {
365 out.realloc(self.cap);
366
367 // Copy element data over
368 if size_of::<T>() != 0 {
369 let mut idx = 0;
370 while let Some(next) = self.first_filled_slot_from(idx) {
371 let clone = self.get_unchecked(next).clone();
372 ptr::write(out.elem_ptr.as_ptr().add(next), clone);
373
374 idx = next + 1;
375 }
376 }
377
378 // Copy bitvec data over
379 ptr::copy_nonoverlapping(
380 self.bit_ptr.as_ptr(),
381 out.bit_ptr.as_ptr(),
382 num_usizes_for(self.cap)
383 );
384
385 out.set_len(self.len);
386 }
387 }
388
389 out
390 }
391}
392
393// This impl is usually not used. `StableVec` has its own impl which doesn't
394// use this one.
395impl<T> fmt::Debug for BitVecCore<T> {
396 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
397 f.debug_struct("BitVecCore")
398 .field("len", &self.len())
399 .field("cap", &self.cap())
400 .finish()
401 }
402}
403
404// Implement `Send` and `Sync`. These are not automatically implemented as we
405// use raw pointers. But they are safe to implement (given that `T` implements
406// them). We do not have interior mutability, thus we can implement `Sync`. We
407// also do not share any data with other instance of this type, meaning that
408// `Send` can be implemented.
409unsafe impl<T: Send> Send for BitVecCore<T> {}
410unsafe impl<T: Sync> Sync for BitVecCore<T> {}
411
412#[inline(always)]
413fn num_usizes_for(cap: usize) -> usize {
414 // We need ⌈new_cap / BITS_PER_USIZE⌉ many usizes to store all required
415 // bits. We do rounding up by first adding the (BITS_PER_USIZE - 1).
416 (cap + (BITS_PER_USIZE - 1)) / BITS_PER_USIZE
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422
423 #[test]
424 fn num_usizes() {
425 assert_eq!(num_usizes_for(0), 0);
426 assert_eq!(num_usizes_for(1), 1);
427 assert_eq!(num_usizes_for(2), 1);
428 assert_eq!(num_usizes_for(3), 1);
429
430 #[cfg(target_pointer_width = "64")]
431 {
432 assert_eq!(num_usizes_for(63), 1);
433 assert_eq!(num_usizes_for(64), 1);
434 assert_eq!(num_usizes_for(65), 2);
435 assert_eq!(num_usizes_for(66), 2);
436 assert_eq!(num_usizes_for(66), 2);
437
438 assert_eq!(num_usizes_for(255), 4);
439 assert_eq!(num_usizes_for(256), 4);
440 assert_eq!(num_usizes_for(257), 5);
441 assert_eq!(num_usizes_for(258), 5);
442 assert_eq!(num_usizes_for(259), 5);
443 }
444
445 #[cfg(target_pointer_width = "32")]
446 {
447 assert_eq!(num_usizes_for(31), 1);
448 assert_eq!(num_usizes_for(32), 1);
449 assert_eq!(num_usizes_for(33), 2);
450 assert_eq!(num_usizes_for(34), 2);
451 assert_eq!(num_usizes_for(35), 2);
452
453 assert_eq!(num_usizes_for(127), 4);
454 assert_eq!(num_usizes_for(128), 4);
455 assert_eq!(num_usizes_for(129), 5);
456 assert_eq!(num_usizes_for(130), 5);
457 assert_eq!(num_usizes_for(131), 5);
458 }
459 }
460}