stable_vec/core/
bitvec.rs

1use std::{
2    alloc::{alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc, Layout},
3    fmt,
4    mem::{align_of, size_of},
5    ptr::{self, NonNull},
6};
7
8use super::Core;
9
10
11/// A `Core` implementation that is conceptually a `BitVec` and a `Vec<T>`.
12///
13/// This is the default core as it has quite a few advantages. For one, it does
14/// not waste memory due to padding. The information about whether a slot is
15/// filled or not is stored in a `BitVec` and thus only takes one bit per slot.
16///
17/// Using a `BitVec` has another important advantage: iterating over the
18/// indices of a stable vector (i.e. without touching the actual data) is very
19/// cache-friendly. This is due to the dense packing of information. It's also
20/// possible to very quickly scan the bit vector in order to find filled/empty
21/// slots.
22///
23/// However, this core implementation has disadvantages, too. It manages two
24/// allocations which means that reallocating (growing or shrinking) has to
25/// perform two allocations with the underlying memory allocator. Potentially
26/// more important is the decrease of cache-friendliness when accessing
27/// elements at random. Because in the worst case, this means that each element
28/// access results in two cache-misses instead of only one.
29///
30/// For most use cases, this is a good choice. That's why it's default.
31pub struct BitVecCore<T> {
32    /// This is the memory that stores the actual slots/elements. If a slot is
33    /// empty, the memory at that index is undefined.
34    elem_ptr: NonNull<T>,
35
36    /// Stores whether or not slots are filled (1) or empty (0). Stores one bit
37    /// per slot. Is stored as `usize` instead of `u8` to potentially improve
38    /// performance when finding a hole or counting the elements.
39    bit_ptr: NonNull<usize>,
40
41    /// The capacity: the length of the `elem_ptr` buffer. Corresponse to the
42    /// `cap` of the `Core` definition.
43    cap: usize,
44
45    /// The `len`: corresponse to the `len` of the `Core` definition.
46    len: usize,
47}
48
49const BITS_PER_USIZE: usize = size_of::<usize>() * 8;
50
51impl<T> BitVecCore<T> {
52    /// Deallocates both pointers, sets them to the same value as `new()` does
53    /// and sets `cap` to 0.
54    ///
55    /// # Formal
56    ///
57    /// **Preconditions**:
58    /// - `self.len == 0`
59    /// - All slots are empty
60    unsafe fn dealloc(&mut self) {
61        if self.cap != 0 {
62            if size_of::<T>() != 0 {
63                dealloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout());
64            }
65
66            dealloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout());
67            self.cap = 0;
68        }
69    }
70
71    /// Returns the layout that was used for the last allocation of `elem_ptr`.
72    /// `self.cap` must not be 0 and `T` must not be a ZST, or else this
73    /// method's behavior is undefined.
74    unsafe fn old_elem_layout(&self) -> Layout {
75        Layout::from_size_align_unchecked(
76            // This can't overflow due to being previously allocated.
77            self.cap * size_of::<T>(),
78            align_of::<T>(),
79        )
80    }
81
82    /// Returns the layout that was used for the last allocation of `bit_ptr`.
83    /// `self.cap` must not be 0 or else this method's behavior is undefined.
84    unsafe fn old_bit_layout(&self) -> Layout {
85        Layout::from_size_align_unchecked(
86            size_of::<usize>() * num_usizes_for(self.cap),
87            align_of::<usize>(),
88        )
89    }
90}
91
92impl<T> Core<T> for BitVecCore<T> {
93    fn new() -> Self {
94        Self {
95            elem_ptr: NonNull::dangling(),
96            bit_ptr: NonNull::dangling(),
97            cap: 0,
98            len: 0,
99        }
100    }
101
102    fn len(&self) -> usize {
103        self.len
104    }
105
106    unsafe fn set_len(&mut self, new_len: usize) {
107        debug_assert!(new_len <= self.cap());
108        // Other precondition is too expensive to test, even in debug:
109        // ∀ i in `new_len..self.cap()` ⇒ `self.has_element_at(i) == false`
110
111        self.len = new_len;
112
113        // The formal requirements of this method hold:
114        //
115        // **Invariants**:
116        // - *slot data* -> trivially holds, we do not touch that
117        // - `len ≤ cap` -> that's a precondition
118        //
119        // **Postconditons**:
120        // - `self.len() == new_len`: trivially holds
121    }
122
123    fn cap(&self) -> usize {
124        self.cap
125    }
126
127    #[inline(never)]
128    #[cold]
129    unsafe fn realloc(&mut self, new_cap: usize) {
130        debug_assert!(new_cap >= self.len());
131        debug_assert!(new_cap <= isize::max_value() as usize);
132
133        #[inline(never)]
134        #[cold]
135        fn capacity_overflow() -> ! {
136            panic!("capacity overflow in `stable_vec::BitVecCore::realloc` (attempt \
137                to allocate more than `usize::MAX` bytes");
138        }
139
140        // Handle special case
141        if new_cap == 0 {
142            // Due to preconditions, we know that `self.len == 0` and that in
143            // turn tells us that there aren't any filled slots. So we can just
144            // deallocate the memory.
145            self.dealloc();
146            return;
147        }
148
149
150        // ----- (Re)allocate element memory ---------------------------------
151
152        // We only have to allocate if our size are not zero-sized. Else, we
153        // just don't do anything.
154        if size_of::<T>() != 0 {
155            // Get the new number of bytes for the allocation and create the
156            // memory layout.
157            let size = new_cap.checked_mul(size_of::<T>())
158                .unwrap_or_else(|| capacity_overflow());
159            let new_elem_layout = Layout::from_size_align_unchecked(size, align_of::<T>());
160
161            // (Re)allocate memory.
162            let ptr = if self.cap == 0 {
163                alloc(new_elem_layout)
164            } else {
165                realloc(self.elem_ptr.as_ptr() as *mut _, self.old_elem_layout(), size)
166            };
167
168            // If the element allocation failed, we quit the program with an
169            // OOM error.
170            if ptr.is_null() {
171                 handle_alloc_error(new_elem_layout);
172            }
173
174            // We already overwrite the pointer here. It is not read/changed
175            // anywhere else in this function.
176            self.elem_ptr = NonNull::new_unchecked(ptr as *mut _);
177        };
178
179
180        // ----- (Re)allocate bitvec memory ----------------------------------
181        {
182            // Get the new number of required bytes for the allocation and
183            // create the memory layout.
184            let size = size_of::<usize>() * num_usizes_for(new_cap);
185            let new_bit_layout = Layout::from_size_align_unchecked(size, align_of::<usize>());
186
187            // (Re)allocate memory.
188            let ptr = if self.cap == 0 {
189                alloc_zeroed(new_bit_layout)
190            } else {
191                realloc(self.bit_ptr.as_ptr() as *mut _, self.old_bit_layout(), size)
192            };
193            let ptr = ptr as *mut usize;
194
195            // If the element allocation failed, we quit the program with an
196            // OOM error.
197            if ptr.is_null() {
198                 handle_alloc_error(new_bit_layout);
199            }
200
201            // If we reallocated, the new memory is not necessarily zeroed, so
202            // we need to do it. TODO: if `alloc` offers a `realloc_zeroed`
203            // in the future, we should use that.
204            if self.cap != 0 {
205                let initialized_usizes = num_usizes_for(self.cap);
206                let new_usizes = num_usizes_for(new_cap);
207                if new_usizes > initialized_usizes {
208                    ptr::write_bytes(
209                        ptr.add(initialized_usizes),
210                        0,
211                        new_usizes - initialized_usizes,
212                    );
213                }
214            }
215
216            self.bit_ptr = NonNull::new_unchecked(ptr as *mut _);
217        }
218
219        self.cap = new_cap;
220
221        // All formal requirements are met now:
222        //
223        // **Invariants**:
224        // - *slot data*: by using `realloc` if `self.cap != 0`, the slot data
225        //   (including deleted-flag) was correctly copied.
226        // - `self.len()`: indeed didn't change
227        //
228        // **Postconditons**:
229        // - `self.cap() == new_cap`: trivially holds due to last line.
230    }
231
232    unsafe fn has_element_at(&self, idx: usize) -> bool {
233        debug_assert!(idx < self.cap());
234
235        // The divisions will be turned into shift and 'and'-instructions.
236        let usize_pos = idx / BITS_PER_USIZE;
237        let bit_pos = idx % BITS_PER_USIZE;
238
239        let block = *self.bit_ptr.as_ptr().add(usize_pos);
240        ((block >> bit_pos) & 0b1) != 0
241    }
242
243    unsafe fn insert_at(&mut self, idx: usize, elem: T) {
244        debug_assert!(idx < self.cap());
245        debug_assert!(self.has_element_at(idx) == false);
246
247        // We first write the value and then update the bitvector to avoid
248        // potential double drops if a random panic appears.
249        ptr::write(self.elem_ptr.as_ptr().add(idx), elem);
250
251        let usize_pos = idx / BITS_PER_USIZE;
252        let bit_pos = idx % BITS_PER_USIZE;
253
254        let mask = 1 << bit_pos;
255        *self.bit_ptr.as_ptr().add(usize_pos) |= mask;
256    }
257
258    unsafe fn remove_at(&mut self, idx: usize) -> T {
259        debug_assert!(idx < self.cap());
260        debug_assert!(self.has_element_at(idx));
261
262        // We first mark the value as deleted and then read the value.
263        // Otherwise, a random panic could lead to a double drop.
264        let usize_pos = idx / BITS_PER_USIZE;
265        let bit_pos = idx % BITS_PER_USIZE;
266
267        let mask = !(1 << bit_pos);
268        *self.bit_ptr.as_ptr().add(usize_pos) &= mask;
269
270        ptr::read(self.elem_ptr.as_ptr().add(idx))
271    }
272
273    unsafe fn get_unchecked(&self, idx: usize) -> &T {
274        debug_assert!(idx < self.cap());
275        debug_assert!(self.has_element_at(idx));
276
277        // The preconditions of this function guarantees us that all
278        // preconditions for `add` are met and that we can safely dereference
279        // the pointer.
280        &*self.elem_ptr.as_ptr().add(idx)
281    }
282
283    unsafe fn get_unchecked_mut(&mut self, idx: usize) -> &mut T {
284        debug_assert!(idx < self.cap());
285        debug_assert!(self.has_element_at(idx));
286
287        // The preconditions of this function guarantees us that all
288        // preconditions for `add` are met and that we can safely dereference
289        // the pointer.
290        &mut *self.elem_ptr.as_ptr().add(idx)
291    }
292
293    fn clear(&mut self) {
294        unsafe {
295            // We can assume that all existing elements have an index lower than
296            // `len` (this is one of the invariants of the `Core` interface).
297            for idx in 0..self.len {
298                if self.has_element_at(idx) {
299                    ptr::drop_in_place(self.get_unchecked_mut(idx));
300                }
301            }
302            for bit_idx in 0..num_usizes_for(self.len) {
303                *self.bit_ptr.as_ptr().add(bit_idx) = 0;
304            }
305            self.len = 0;
306        }
307    }
308
309    // TODO: maybe override `{next|prev}_{hole|index}_from` for performance? In
310    // principle we could scan the bitvector very quickly with specialized
311    // instructions. Needs benchmarking.
312
313    unsafe fn swap(&mut self, a: usize, b: usize) {
314        // Swapping the bits is a bit annoying. To avoid branches we first xor
315        // both previous bits.
316        let a_existed = self.has_element_at(a);
317        let b_existed = self.has_element_at(b);
318
319        // `swap_bit` is 0 if both slots were empty of filled, and 1 if only
320        // only one slot was empty. That also means the mask is 0 if both were
321        // empty/filled before, otherwise the mask has one bit set. We xor with
322        // this mask, meaning that we will flip the corresponding bit.
323        let swap_bit = (a_existed ^ b_existed) as usize;
324
325        // For a
326        let usize_pos = a / BITS_PER_USIZE;
327        let bit_pos = a % BITS_PER_USIZE;
328        let mask = swap_bit << bit_pos;
329        *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
330
331        // For b
332        let usize_pos = b / BITS_PER_USIZE;
333        let bit_pos = b % BITS_PER_USIZE;
334        let mask = swap_bit << bit_pos;
335        *self.bit_ptr.as_ptr().add(usize_pos) ^= mask;
336
337        // Finally swap the actual elements
338        ptr::swap(
339            self.elem_ptr.as_ptr().add(a),
340            self.elem_ptr.as_ptr().add(b),
341        );
342    }
343}
344
345impl<T> Drop for BitVecCore<T> {
346    fn drop(&mut self) {
347        // Drop all elements
348        self.clear();
349
350        unsafe {
351            // Deallocate the memory. `clear()` sets the length to 0 and drops
352            // all existing elements, so it's fine to call `dealloc`.
353            self.dealloc();
354        }
355    }
356}
357
358impl<T: Clone> Clone for BitVecCore<T> {
359    fn clone(&self) -> Self {
360        let mut out = Self::new();
361
362        if self.cap != 0 {
363            // All of this is scary
364            unsafe {
365                out.realloc(self.cap);
366
367                // Copy element data over
368                if size_of::<T>() != 0 {
369                    let mut idx = 0;
370                    while let Some(next) = self.first_filled_slot_from(idx) {
371                        let clone = self.get_unchecked(next).clone();
372                        ptr::write(out.elem_ptr.as_ptr().add(next), clone);
373
374                        idx = next + 1;
375                    }
376                }
377
378                // Copy bitvec data over
379                ptr::copy_nonoverlapping(
380                    self.bit_ptr.as_ptr(),
381                    out.bit_ptr.as_ptr(),
382                    num_usizes_for(self.cap)
383                );
384
385                out.set_len(self.len);
386            }
387        }
388
389        out
390    }
391}
392
393// This impl is usually not used. `StableVec` has its own impl which doesn't
394// use this one.
395impl<T> fmt::Debug for BitVecCore<T> {
396    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
397        f.debug_struct("BitVecCore")
398            .field("len", &self.len())
399            .field("cap", &self.cap())
400            .finish()
401    }
402}
403
404// Implement `Send` and `Sync`. These are not automatically implemented as we
405// use raw pointers. But they are safe to implement (given that `T` implements
406// them). We do not have interior mutability, thus we can implement `Sync`. We
407// also do not share any data with other instance of this type, meaning that
408// `Send` can be implemented.
409unsafe impl<T: Send> Send for BitVecCore<T> {}
410unsafe impl<T: Sync> Sync for BitVecCore<T> {}
411
412#[inline(always)]
413fn num_usizes_for(cap: usize) -> usize {
414    // We need ⌈new_cap / BITS_PER_USIZE⌉ many usizes to store all required
415    // bits. We do rounding up by first adding the (BITS_PER_USIZE - 1).
416    (cap + (BITS_PER_USIZE - 1)) / BITS_PER_USIZE
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn num_usizes() {
425        assert_eq!(num_usizes_for(0), 0);
426        assert_eq!(num_usizes_for(1), 1);
427        assert_eq!(num_usizes_for(2), 1);
428        assert_eq!(num_usizes_for(3), 1);
429
430        #[cfg(target_pointer_width = "64")]
431        {
432            assert_eq!(num_usizes_for(63), 1);
433            assert_eq!(num_usizes_for(64), 1);
434            assert_eq!(num_usizes_for(65), 2);
435            assert_eq!(num_usizes_for(66), 2);
436            assert_eq!(num_usizes_for(66), 2);
437
438            assert_eq!(num_usizes_for(255), 4);
439            assert_eq!(num_usizes_for(256), 4);
440            assert_eq!(num_usizes_for(257), 5);
441            assert_eq!(num_usizes_for(258), 5);
442            assert_eq!(num_usizes_for(259), 5);
443        }
444
445        #[cfg(target_pointer_width = "32")]
446        {
447            assert_eq!(num_usizes_for(31), 1);
448            assert_eq!(num_usizes_for(32), 1);
449            assert_eq!(num_usizes_for(33), 2);
450            assert_eq!(num_usizes_for(34), 2);
451            assert_eq!(num_usizes_for(35), 2);
452
453            assert_eq!(num_usizes_for(127), 4);
454            assert_eq!(num_usizes_for(128), 4);
455            assert_eq!(num_usizes_for(129), 5);
456            assert_eq!(num_usizes_for(130), 5);
457            assert_eq!(num_usizes_for(131), 5);
458        }
459    }
460}