Skip to main content

stable_vec/core/
bitvec.rs

1use ::core::{
2    fmt,
3    mem::{align_of, size_of},
4    ptr::{self, NonNull},
5};
6
7use alloc::alloc::{alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc, Layout};
8
9use super::Core;
10
11
12/// A `Core` implementation that is conceptually a `BitVec` and a `Vec<T>`.
13///
14/// This is the default core as it has quite a few advantages. For one, it does
15/// not waste memory due to padding. The information about whether a slot is
16/// filled or not is stored in a `BitVec` and thus only takes one bit per slot.
17///
18/// Using a `BitVec` has another important advantage: iterating over the
19/// indices of a stable vector (i.e. without touching the actual data) is very
20/// cache-friendly. This is due to the dense packing of information. It's also
21/// possible to very quickly scan the bit vector in order to find filled/empty
22/// slots.
23///
24/// However, this core implementation has disadvantages, too. It manages two
25/// allocations which means that reallocating (growing or shrinking) has to
26/// perform two allocations with the underlying memory allocator. Potentially
27/// more important is the decrease of cache-friendliness when accessing
28/// elements at random. Because in the worst case, this means that each element
29/// access results in two cache-misses instead of only one.
30///
31/// For most use cases, this is a good choice. That's why it's default.
32pub struct BitVecCore<T> {
33    /// This is the memory that stores the actual slots/elements. If a slot is
34    /// empty, the memory at that index is undefined.
35    elem_ptr: NonNull<T>,
36
37    /// Stores whether or not slots are filled (1) or empty (0). Stores one bit
38    /// per slot. Is stored as `usize` instead of `u8` to potentially improve
39    /// performance when finding a hole or counting the elements.
40    bit_ptr: NonNull<usize>,
41
42    /// The capacity: the length of the `elem_ptr` buffer. Corresponse to the
43    /// `cap` of the `Core` definition.
44    cap: usize,
45
46    /// The `len`: corresponse to the `len` of the `Core` definition.
47    len: usize,
48}
49
50const BITS_PER_USIZE: usize = size_of::<usize>() * 8;
51
52impl<T> BitVecCore<T> {
53    /// Deallocates both pointers, sets them to the same value as `new()` does
54    /// and sets `cap` to 0.
55    ///
56    /// # Formal
57    ///
58    /// **Preconditions**:
59    /// - `self.len == 0`
60    /// - All slots are empty
61    unsafe fn dealloc(&mut self) {
62        if self.cap != 0 {
63            if size_of::<T>() != 0 {
64                dealloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout());
65            }
66
67            dealloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout());
68            self.cap = 0;
69        }
70    }
71
72    /// Returns the layout that was used for the last allocation of `elem_ptr`.
73    /// `self.cap` must not be 0 and `T` must not be a ZST, or else this
74    /// method's behavior is undefined.
75    unsafe fn old_elem_layout(&self) -> Layout {
76        Layout::from_size_align_unchecked(
77            // This can't overflow due to being previously allocated.
78            self.cap * size_of::<T>(),
79            align_of::<T>(),
80        )
81    }
82
83    /// Returns the layout that was used for the last allocation of `bit_ptr`.
84    /// `self.cap` must not be 0 or else this method's behavior is undefined.
85    unsafe fn old_bit_layout(&self) -> Layout {
86        Layout::from_size_align_unchecked(
87            size_of::<usize>() * num_usizes_for(self.cap),
88            align_of::<usize>(),
89        )
90    }
91}
92
93impl<T> Core<T> for BitVecCore<T> {
94    fn new() -> Self {
95        Self {
96            elem_ptr: NonNull::dangling(),
97            bit_ptr: NonNull::dangling(),
98            cap: 0,
99            len: 0,
100        }
101    }
102
103    fn len(&self) -> usize {
104        self.len
105    }
106
107    unsafe fn set_len(&mut self, new_len: usize) {
108        debug_assert!(new_len <= self.cap());
109        // Other precondition is too expensive to test, even in debug:
110        // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
111
112        self.len = new_len;
113
114        // The formal requirements of this method hold:
115        //
116        // **Invariants**:
117        // - *slot data* -> trivially holds, we do not touch that
118        // - `len ≤ cap` -> that's a precondition
119        //
120        // **Postconditons**:
121        // - `self.len() == new_len`: trivially holds
122    }
123
124    fn cap(&self) -> usize {
125        self.cap
126    }
127
128    #[inline(never)]
129    #[cold]
130    unsafe fn realloc(&mut self, new_cap: usize) {
131        debug_assert!(new_cap >= self.len());
132        debug_assert!(new_cap <= isize::max_value() as usize);
133
134        #[inline(never)]
135        #[cold]
136        fn capacity_overflow() -> ! {
137            panic!("capacity overflow in `stable_vec::BitVecCore::realloc` (attempt \
138                to allocate more than `usize::MAX` bytes");
139        }
140
141        // Handle special case
142        if new_cap == 0 {
143            // Due to preconditions, we know that `self.len == 0` and that in
144            // turn tells us that there aren't any filled slots. So we can just
145            // deallocate the memory.
146            self.dealloc();
147            return;
148        }
149
150
151        // ----- (Re)allocate element memory ---------------------------------
152
153        // We only have to allocate if our size are not zero-sized. Else, we
154        // just don't do anything.
155        if size_of::<T>() != 0 {
156            // Get the new number of bytes for the allocation and create the
157            // memory layout.
158            let size = new_cap.checked_mul(size_of::<T>())
159                .unwrap_or_else(|| capacity_overflow());
160            let new_elem_layout = Layout::from_size_align_unchecked(size, align_of::<T>());
161
162            // (Re)allocate memory.
163            let ptr = if self.cap == 0 {
164                alloc(new_elem_layout)
165            } else {
166                realloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout(), size)
167            };
168
169            // If the element allocation failed, we quit the program with an
170            // OOM error.
171            if ptr.is_null() {
172                 handle_alloc_error(new_elem_layout);
173            }
174
175            // We already overwrite the pointer here. It is not read/changed
176            // anywhere else in this function.
177            self.elem_ptr = NonNull::new_unchecked(ptr as *mut _);
178        };
179
180
181        // ----- (Re)allocate bitvec memory ----------------------------------
182        {
183            // Get the new number of required bytes for the allocation and
184            // create the memory layout.
185            let size = size_of::<usize>() * num_usizes_for(new_cap);
186            let new_bit_layout = Layout::from_size_align_unchecked(size, align_of::<usize>());
187
188            // (Re)allocate memory.
189            let ptr = if self.cap == 0 {
190                alloc_zeroed(new_bit_layout)
191            } else {
192                realloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout(), size)
193            };
194            let ptr = ptr as *mut usize;
195
196            // If the element allocation failed, we quit the program with an
197            // OOM error.
198            if ptr.is_null() {
199                 handle_alloc_error(new_bit_layout);
200            }
201
202            // If we reallocated, the new memory is not necessarily zeroed, so
203            // we need to do it. TODO: if `alloc` offers a `realloc_zeroed`
204            // in the future, we should use that.
205            if self.cap != 0 {
206                let initialized_usizes = num_usizes_for(self.cap);
207                let new_usizes = num_usizes_for(new_cap);
208                if new_usizes > initialized_usizes {
209                    ptr::write_bytes(
210                        ptr.add(initialized_usizes),
211                        0,
212                        new_usizes - initialized_usizes,
213                    );
214                }
215            }
216
217            self.bit_ptr = NonNull::new_unchecked(ptr as *mut _);
218        }
219
220        self.cap = new_cap;
221
222        // All formal requirements are met now:
223        //
224        // **Invariants**:
225        // - *slot data*: by using `realloc` if `self.cap != 0`, the slot data
226        //   (including deleted-flag) was correctly copied.
227        // - `self.len()`: indeed didn't change
228        //
229        // **Postconditons**:
230        // - `self.cap() == new_cap`: trivially holds due to last line.
231    }
232
233    unsafe fn has_element_at(&self, idx: usize) -> bool {
234        debug_assert!(idx < self.cap());
235
236        // The divisions will be turned into shift and 'and'-instructions.
237        let usize_pos = idx / BITS_PER_USIZE;
238        let bit_pos = idx % BITS_PER_USIZE;
239
240        let block = *self.bit_ptr.as_ptr().add(usize_pos);
241        ((block >> bit_pos) & 0b1) != 0
242    }
243
244    unsafe fn insert_at(&mut self, idx: usize, elem: T) {
245        debug_assert!(idx < self.cap());
246        debug_assert!(self.has_element_at(idx) == false);
247
248        // We first write the value and then update the bitvector to avoid
249        // potential double drops if a random panic appears.
250        ptr::write(self.elem_ptr.as_ptr().add(idx), elem);
251
252        let usize_pos = idx / BITS_PER_USIZE;
253        let bit_pos = idx % BITS_PER_USIZE;
254
255        let mask = 1 << bit_pos;
256        *self.bit_ptr.as_ptr().add(usize_pos) |= mask;
257    }
258
259    unsafe fn remove_at(&mut self, idx: usize) -> T {
260        debug_assert!(idx < self.cap());
261        debug_assert!(self.has_element_at(idx));
262
263        // We first mark the value as deleted and then read the value.
264        // Otherwise, a random panic could lead to a double drop.
265        let usize_pos = idx / BITS_PER_USIZE;
266        let bit_pos = idx % BITS_PER_USIZE;
267
268        let mask = !(1 << bit_pos);
269        *self.bit_ptr.as_ptr().add(usize_pos) &= mask;
270
271        ptr::read(self.elem_ptr.as_ptr().add(idx))
272    }
273
274    unsafe fn get_unchecked(&self, idx: usize) -> &T {
275        debug_assert!(idx < self.cap());
276        debug_assert!(self.has_element_at(idx));
277
278        // The preconditions of this function guarantees us that all
279        // preconditions for `add` are met and that we can safely dereference
280        // the pointer.
281        &*self.elem_ptr.as_ptr().add(idx)
282    }
283
284    unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
285        debug_assert!(idx < self.cap());
286        debug_assert!(self.has_element_at(idx));
287
288        // The preconditions of this function guarantees us that all
289        // preconditions for `add` are met and that we can safely dereference
290        // the pointer.
291        &mut *self.elem_ptr.as_ptr().add(idx)
292    }
293
294    fn clear(&mut self) {
295        unsafe {
296            // We can assume that all existing elements have an index lower than
297            // `len` (this is one of the invariants of the `Core` interface).
298            for idx in 0..self.len {
299                if self.has_element_at(idx) {
300                    ptr::drop_in_place(self.get_unchecked_mut(idx));
301                }
302            }
303            for bit_idx in 0..num_usizes_for(self.len) {
304                *self.bit_ptr.as_ptr().add(bit_idx) = 0;
305            }
306            self.len = 0;
307        }
308    }
309
310    // TODO: maybe override `{next|prev}_{hole|index}_from` for performance? In
311    // principle we could scan the bitvector very quickly with specialized
312    // instructions. Needs benchmarking.
313
314    unsafe fn swap(&mut self, a: usize, b: usize) {
315        // Swapping the bits is a bit annoying. To avoid branches we first xor
316        // both previous bits.
317        let a_existed = self.has_element_at(a);
318        let b_existed = self.has_element_at(b);
319
320        // `swap_bit` is 0 if both slots were empty of filled, and 1 if only
321        // only one slot was empty. That also means the mask is 0 if both were
322        // empty/filled before, otherwise the mask has one bit set. We xor with
323        // this mask, meaning that we will flip the corresponding bit.
324        let swap_bit = (a_existed ^ b_existed) as usize;
325
326        // For a
327        let usize_pos = a / BITS_PER_USIZE;
328        let bit_pos = a % BITS_PER_USIZE;
329        let mask = swap_bit << bit_pos;
330        *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
331
332        // For b
333        let usize_pos = b / BITS_PER_USIZE;
334        let bit_pos = b % BITS_PER_USIZE;
335        let mask = swap_bit << bit_pos;
336        *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
337
338        // Finally swap the actual elements
339        ptr::swap(
340            self.elem_ptr.as_ptr().add(a),
341            self.elem_ptr.as_ptr().add(b),
342        );
343    }
344}
345
346impl<T> Drop for BitVecCore<T> {
347    fn drop(&mut self) {
348        // Drop all elements
349        self.clear();
350
351        unsafe {
352            // Deallocate the memory. `clear()` sets the length to 0 and drops
353            // all existing elements, so it's fine to call `dealloc`.
354            self.dealloc();
355        }
356    }
357}
358
359impl<T: Clone> Clone for BitVecCore<T> {
360    fn clone(&self) -> Self {
361        let mut out = Self::new();
362
363        if self.cap != 0 {
364            // All of this is scary
365            unsafe {
366                out.realloc(self.cap);
367
368                // Copy element data over
369                if size_of::<T>() != 0 {
370                    let mut idx = 0;
371                    while let Some(next) = self.first_filled_slot_from(idx) {
372                        let clone = self.get_unchecked(next).clone();
373                        ptr::write(out.elem_ptr.as_ptr().add(next), clone);
374
375                        idx = next + 1;
376                    }
377                }
378
379                // Copy bitvec data over
380                ptr::copy_nonoverlapping(
381                    self.bit_ptr.as_ptr(),
382                    out.bit_ptr.as_ptr(),
383                    num_usizes_for(self.cap)
384                );
385
386                out.set_len(self.len);
387            }
388        }
389
390        out
391    }
392}
393
394// This impl is usually not used. `StableVec` has its own impl which doesn't
395// use this one.
396impl<T> fmt::Debug for BitVecCore<T> {
397    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
398        f.debug_struct("BitVecCore")
399            .field("len", &self.len())
400            .field("cap", &self.cap())
401            .finish()
402    }
403}
404
405// Implement `Send` and `Sync`. These are not automatically implemented as we
406// use raw pointers. But they are safe to implement (given that `T` implements
407// them). We do not have interior mutability, thus we can implement `Sync`. We
408// also do not share any data with other instance of this type, meaning that
409// `Send` can be implemented.
410unsafe impl<T: Send> Send for BitVecCore<T> {}
411unsafe impl<T: Sync> Sync for BitVecCore<T> {}
412
413#[inline(always)]
414fn num_usizes_for(cap: usize) -> usize {
415    // We need ⌈new_cap / BITS_PER_USIZE⌉ many usizes to store all required
416    // bits. We do rounding up by first adding the (BITS_PER_USIZE - 1).
417    (cap + (BITS_PER_USIZE - 1)) / BITS_PER_USIZE
418}
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423
424    #[test]
425    fn num_usizes() {
426        assert_eq!(num_usizes_for(0), 0);
427        assert_eq!(num_usizes_for(1), 1);
428        assert_eq!(num_usizes_for(2), 1);
429        assert_eq!(num_usizes_for(3), 1);
430
431        #[cfg(target_pointer_width = "64")]
432        {
433            assert_eq!(num_usizes_for(63), 1);
434            assert_eq!(num_usizes_for(64), 1);
435            assert_eq!(num_usizes_for(65), 2);
436            assert_eq!(num_usizes_for(66), 2);
437            assert_eq!(num_usizes_for(66), 2);
438
439            assert_eq!(num_usizes_for(255), 4);
440            assert_eq!(num_usizes_for(256), 4);
441            assert_eq!(num_usizes_for(257), 5);
442            assert_eq!(num_usizes_for(258), 5);
443            assert_eq!(num_usizes_for(259), 5);
444        }
445
446        #[cfg(target_pointer_width = "32")]
447        {
448            assert_eq!(num_usizes_for(31), 1);
449            assert_eq!(num_usizes_for(32), 1);
450            assert_eq!(num_usizes_for(33), 2);
451            assert_eq!(num_usizes_for(34), 2);
452            assert_eq!(num_usizes_for(35), 2);
453
454            assert_eq!(num_usizes_for(127), 4);
455            assert_eq!(num_usizes_for(128), 4);
456            assert_eq!(num_usizes_for(129), 5);
457            assert_eq!(num_usizes_for(130), 5);
458            assert_eq!(num_usizes_for(131), 5);
459        }
460    }
461}