stable_vec/core/bitvec.rs
1use ::core::{
2 fmt,
3 mem::{align_of, size_of},
4 ptr::{self, NonNull},
5};
6
7use alloc::alloc::{alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc, Layout};
8
9use super::Core;
10
11
12/// A `Core` implementation that is conceptually a `BitVec` and a `Vec<T>`.
13///
14/// This is the default core as it has quite a few advantages. For one, it does
15/// not waste memory due to padding. The information about whether a slot is
16/// filled or not is stored in a `BitVec` and thus only takes one bit per slot.
17///
18/// Using a `BitVec` has another important advantage: iterating over the
19/// indices of a stable vector (i.e. without touching the actual data) is very
20/// cache-friendly. This is due to the dense packing of information. It's also
21/// possible to very quickly scan the bit vector in order to find filled/empty
22/// slots.
23///
24/// However, this core implementation has disadvantages, too. It manages two
25/// allocations which means that reallocating (growing or shrinking) has to
26/// perform two allocations with the underlying memory allocator. Potentially
27/// more important is the decrease of cache-friendliness when accessing
28/// elements at random. Because in the worst case, this means that each element
29/// access results in two cache-misses instead of only one.
30///
31/// For most use cases, this is a good choice. That's why it's default.
32pub struct BitVecCore<T> {
33 /// This is the memory that stores the actual slots/elements. If a slot is
34 /// empty, the memory at that index is undefined.
35 elem_ptr: NonNull<T>,
36
37 /// Stores whether or not slots are filled (1) or empty (0). Stores one bit
38 /// per slot. Is stored as `usize` instead of `u8` to potentially improve
39 /// performance when finding a hole or counting the elements.
40 bit_ptr: NonNull<usize>,
41
42 /// The capacity: the length of the `elem_ptr` buffer. Corresponse to the
43 /// `cap` of the `Core` definition.
44 cap: usize,
45
46 /// The `len`: corresponse to the `len` of the `Core` definition.
47 len: usize,
48}
49
50const BITS_PER_USIZE: usize = size_of::<usize>() * 8;
51
52impl<T> BitVecCore<T> {
53 /// Deallocates both pointers, sets them to the same value as `new()` does
54 /// and sets `cap` to 0.
55 ///
56 /// # Formal
57 ///
58 /// **Preconditions**:
59 /// - `self.len == 0`
60 /// - All slots are empty
61 unsafe fn dealloc(&mut self) {
62 if self.cap != 0 {
63 if size_of::<T>() != 0 {
64 dealloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout());
65 }
66
67 dealloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout());
68 self.cap = 0;
69 }
70 }
71
72 /// Returns the layout that was used for the last allocation of `elem_ptr`.
73 /// `self.cap` must not be 0 and `T` must not be a ZST, or else this
74 /// method's behavior is undefined.
75 unsafe fn old_elem_layout(&self) -> Layout {
76 Layout::from_size_align_unchecked(
77 // This can't overflow due to being previously allocated.
78 self.cap * size_of::<T>(),
79 align_of::<T>(),
80 )
81 }
82
83 /// Returns the layout that was used for the last allocation of `bit_ptr`.
84 /// `self.cap` must not be 0 or else this method's behavior is undefined.
85 unsafe fn old_bit_layout(&self) -> Layout {
86 Layout::from_size_align_unchecked(
87 size_of::<usize>() * num_usizes_for(self.cap),
88 align_of::<usize>(),
89 )
90 }
91}
92
93impl<T> Core<T> for BitVecCore<T> {
94 fn new() -> Self {
95 Self {
96 elem_ptr: NonNull::dangling(),
97 bit_ptr: NonNull::dangling(),
98 cap: 0,
99 len: 0,
100 }
101 }
102
103 fn len(&self) -> usize {
104 self.len
105 }
106
107 unsafe fn set_len(&mut self, new_len: usize) {
108 debug_assert!(new_len <= self.cap());
109 // Other precondition is too expensive to test, even in debug:
110 // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
111
112 self.len = new_len;
113
114 // The formal requirements of this method hold:
115 //
116 // **Invariants**:
117 // - *slot data* -> trivially holds, we do not touch that
118 // - `len ≤ cap` -> that's a precondition
119 //
120 // **Postconditons**:
121 // - `self.len() == new_len`: trivially holds
122 }
123
124 fn cap(&self) -> usize {
125 self.cap
126 }
127
128 #[inline(never)]
129 #[cold]
130 unsafe fn realloc(&mut self, new_cap: usize) {
131 debug_assert!(new_cap >= self.len());
132 debug_assert!(new_cap <= isize::max_value() as usize);
133
134 #[inline(never)]
135 #[cold]
136 fn capacity_overflow() -> ! {
137 panic!("capacity overflow in `stable_vec::BitVecCore::realloc` (attempt \
138 to allocate more than `usize::MAX` bytes");
139 }
140
141 // Handle special case
142 if new_cap == 0 {
143 // Due to preconditions, we know that `self.len == 0` and that in
144 // turn tells us that there aren't any filled slots. So we can just
145 // deallocate the memory.
146 self.dealloc();
147 return;
148 }
149
150
151 // ----- (Re)allocate element memory ---------------------------------
152
153 // We only have to allocate if our size are not zero-sized. Else, we
154 // just don't do anything.
155 if size_of::<T>() != 0 {
156 // Get the new number of bytes for the allocation and create the
157 // memory layout.
158 let size = new_cap.checked_mul(size_of::<T>())
159 .unwrap_or_else(|| capacity_overflow());
160 let new_elem_layout = Layout::from_size_align_unchecked(size, align_of::<T>());
161
162 // (Re)allocate memory.
163 let ptr = if self.cap == 0 {
164 alloc(new_elem_layout)
165 } else {
166 realloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout(), size)
167 };
168
169 // If the element allocation failed, we quit the program with an
170 // OOM error.
171 if ptr.is_null() {
172 handle_alloc_error(new_elem_layout);
173 }
174
175 // We already overwrite the pointer here. It is not read/changed
176 // anywhere else in this function.
177 self.elem_ptr = NonNull::new_unchecked(ptr as *mut _);
178 };
179
180
181 // ----- (Re)allocate bitvec memory ----------------------------------
182 {
183 // Get the new number of required bytes for the allocation and
184 // create the memory layout.
185 let size = size_of::<usize>() * num_usizes_for(new_cap);
186 let new_bit_layout = Layout::from_size_align_unchecked(size, align_of::<usize>());
187
188 // (Re)allocate memory.
189 let ptr = if self.cap == 0 {
190 alloc_zeroed(new_bit_layout)
191 } else {
192 realloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout(), size)
193 };
194 let ptr = ptr as *mut usize;
195
196 // If the element allocation failed, we quit the program with an
197 // OOM error.
198 if ptr.is_null() {
199 handle_alloc_error(new_bit_layout);
200 }
201
202 // If we reallocated, the new memory is not necessarily zeroed, so
203 // we need to do it. TODO: if `alloc` offers a `realloc_zeroed`
204 // in the future, we should use that.
205 if self.cap != 0 {
206 let initialized_usizes = num_usizes_for(self.cap);
207 let new_usizes = num_usizes_for(new_cap);
208 if new_usizes > initialized_usizes {
209 ptr::write_bytes(
210 ptr.add(initialized_usizes),
211 0,
212 new_usizes - initialized_usizes,
213 );
214 }
215 }
216
217 self.bit_ptr = NonNull::new_unchecked(ptr as *mut _);
218 }
219
220 self.cap = new_cap;
221
222 // All formal requirements are met now:
223 //
224 // **Invariants**:
225 // - *slot data*: by using `realloc` if `self.cap != 0`, the slot data
226 // (including deleted-flag) was correctly copied.
227 // - `self.len()`: indeed didn't change
228 //
229 // **Postconditons**:
230 // - `self.cap() == new_cap`: trivially holds due to last line.
231 }
232
233 unsafe fn has_element_at(&self, idx: usize) -> bool {
234 debug_assert!(idx < self.cap());
235
236 // The divisions will be turned into shift and 'and'-instructions.
237 let usize_pos = idx / BITS_PER_USIZE;
238 let bit_pos = idx % BITS_PER_USIZE;
239
240 let block = *self.bit_ptr.as_ptr().add(usize_pos);
241 ((block >> bit_pos) & 0b1) != 0
242 }
243
244 unsafe fn insert_at(&mut self, idx: usize, elem: T) {
245 debug_assert!(idx < self.cap());
246 debug_assert!(self.has_element_at(idx) == false);
247
248 // We first write the value and then update the bitvector to avoid
249 // potential double drops if a random panic appears.
250 ptr::write(self.elem_ptr.as_ptr().add(idx), elem);
251
252 let usize_pos = idx / BITS_PER_USIZE;
253 let bit_pos = idx % BITS_PER_USIZE;
254
255 let mask = 1 << bit_pos;
256 *self.bit_ptr.as_ptr().add(usize_pos) |= mask;
257 }
258
259 unsafe fn remove_at(&mut self, idx: usize) -> T {
260 debug_assert!(idx < self.cap());
261 debug_assert!(self.has_element_at(idx));
262
263 // We first mark the value as deleted and then read the value.
264 // Otherwise, a random panic could lead to a double drop.
265 let usize_pos = idx / BITS_PER_USIZE;
266 let bit_pos = idx % BITS_PER_USIZE;
267
268 let mask = !(1 << bit_pos);
269 *self.bit_ptr.as_ptr().add(usize_pos) &= mask;
270
271 ptr::read(self.elem_ptr.as_ptr().add(idx))
272 }
273
274 unsafe fn get_unchecked(&self, idx: usize) -> &T {
275 debug_assert!(idx < self.cap());
276 debug_assert!(self.has_element_at(idx));
277
278 // The preconditions of this function guarantees us that all
279 // preconditions for `add` are met and that we can safely dereference
280 // the pointer.
281 &*self.elem_ptr.as_ptr().add(idx)
282 }
283
284 unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
285 debug_assert!(idx < self.cap());
286 debug_assert!(self.has_element_at(idx));
287
288 // The preconditions of this function guarantees us that all
289 // preconditions for `add` are met and that we can safely dereference
290 // the pointer.
291 &mut *self.elem_ptr.as_ptr().add(idx)
292 }
293
294 fn clear(&mut self) {
295 unsafe {
296 // We can assume that all existing elements have an index lower than
297 // `len` (this is one of the invariants of the `Core` interface).
298 for idx in 0..self.len {
299 if self.has_element_at(idx) {
300 ptr::drop_in_place(self.get_unchecked_mut(idx));
301 }
302 }
303 for bit_idx in 0..num_usizes_for(self.len) {
304 *self.bit_ptr.as_ptr().add(bit_idx) = 0;
305 }
306 self.len = 0;
307 }
308 }
309
310 // TODO: maybe override `{next|prev}_{hole|index}_from` for performance? In
311 // principle we could scan the bitvector very quickly with specialized
312 // instructions. Needs benchmarking.
313
314 unsafe fn swap(&mut self, a: usize, b: usize) {
315 // Swapping the bits is a bit annoying. To avoid branches we first xor
316 // both previous bits.
317 let a_existed = self.has_element_at(a);
318 let b_existed = self.has_element_at(b);
319
320 // `swap_bit` is 0 if both slots were empty of filled, and 1 if only
321 // only one slot was empty. That also means the mask is 0 if both were
322 // empty/filled before, otherwise the mask has one bit set. We xor with
323 // this mask, meaning that we will flip the corresponding bit.
324 let swap_bit = (a_existed ^ b_existed) as usize;
325
326 // For a
327 let usize_pos = a / BITS_PER_USIZE;
328 let bit_pos = a % BITS_PER_USIZE;
329 let mask = swap_bit << bit_pos;
330 *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
331
332 // For b
333 let usize_pos = b / BITS_PER_USIZE;
334 let bit_pos = b % BITS_PER_USIZE;
335 let mask = swap_bit << bit_pos;
336 *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
337
338 // Finally swap the actual elements
339 ptr::swap(
340 self.elem_ptr.as_ptr().add(a),
341 self.elem_ptr.as_ptr().add(b),
342 );
343 }
344}
345
346impl<T> Drop for BitVecCore<T> {
347 fn drop(&mut self) {
348 // Drop all elements
349 self.clear();
350
351 unsafe {
352 // Deallocate the memory. `clear()` sets the length to 0 and drops
353 // all existing elements, so it's fine to call `dealloc`.
354 self.dealloc();
355 }
356 }
357}
358
359impl<T: Clone> Clone for BitVecCore<T> {
360 fn clone(&self) -> Self {
361 let mut out = Self::new();
362
363 if self.cap != 0 {
364 // All of this is scary
365 unsafe {
366 out.realloc(self.cap);
367
368 // Copy element data over
369 if size_of::<T>() != 0 {
370 let mut idx = 0;
371 while let Some(next) = self.first_filled_slot_from(idx) {
372 let clone = self.get_unchecked(next).clone();
373 ptr::write(out.elem_ptr.as_ptr().add(next), clone);
374
375 idx = next + 1;
376 }
377 }
378
379 // Copy bitvec data over
380 ptr::copy_nonoverlapping(
381 self.bit_ptr.as_ptr(),
382 out.bit_ptr.as_ptr(),
383 num_usizes_for(self.cap)
384 );
385
386 out.set_len(self.len);
387 }
388 }
389
390 out
391 }
392}
393
394// This impl is usually not used. `StableVec` has its own impl which doesn't
395// use this one.
396impl<T> fmt::Debug for BitVecCore<T> {
397 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
398 f.debug_struct("BitVecCore")
399 .field("len", &self.len())
400 .field("cap", &self.cap())
401 .finish()
402 }
403}
404
405// Implement `Send` and `Sync`. These are not automatically implemented as we
406// use raw pointers. But they are safe to implement (given that `T` implements
407// them). We do not have interior mutability, thus we can implement `Sync`. We
408// also do not share any data with other instance of this type, meaning that
409// `Send` can be implemented.
410unsafe impl<T: Send> Send for BitVecCore<T> {}
411unsafe impl<T: Sync> Sync for BitVecCore<T> {}
412
413#[inline(always)]
414fn num_usizes_for(cap: usize) -> usize {
415 // We need ⌈new_cap / BITS_PER_USIZE⌉ many usizes to store all required
416 // bits. We do rounding up by first adding the (BITS_PER_USIZE - 1).
417 (cap + (BITS_PER_USIZE - 1)) / BITS_PER_USIZE
418}
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423
424 #[test]
425 fn num_usizes() {
426 assert_eq!(num_usizes_for(0), 0);
427 assert_eq!(num_usizes_for(1), 1);
428 assert_eq!(num_usizes_for(2), 1);
429 assert_eq!(num_usizes_for(3), 1);
430
431 #[cfg(target_pointer_width = "64")]
432 {
433 assert_eq!(num_usizes_for(63), 1);
434 assert_eq!(num_usizes_for(64), 1);
435 assert_eq!(num_usizes_for(65), 2);
436 assert_eq!(num_usizes_for(66), 2);
437 assert_eq!(num_usizes_for(66), 2);
438
439 assert_eq!(num_usizes_for(255), 4);
440 assert_eq!(num_usizes_for(256), 4);
441 assert_eq!(num_usizes_for(257), 5);
442 assert_eq!(num_usizes_for(258), 5);
443 assert_eq!(num_usizes_for(259), 5);
444 }
445
446 #[cfg(target_pointer_width = "32")]
447 {
448 assert_eq!(num_usizes_for(31), 1);
449 assert_eq!(num_usizes_for(32), 1);
450 assert_eq!(num_usizes_for(33), 2);
451 assert_eq!(num_usizes_for(34), 2);
452 assert_eq!(num_usizes_for(35), 2);
453
454 assert_eq!(num_usizes_for(127), 4);
455 assert_eq!(num_usizes_for(128), 4);
456 assert_eq!(num_usizes_for(129), 5);
457 assert_eq!(num_usizes_for(130), 5);
458 assert_eq!(num_usizes_for(131), 5);
459 }
460 }
461}