dense_heap/
dheap.rs

1// dheap.rs --- dense heap implementation.
2
3// Copyright (c) 2023 Sam Belliveau. All rights reserved.
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23use std::{
24    cell::{Cell, UnsafeCell},
25    hint::unreachable_unchecked,
26    marker::PhantomData,
27    mem::{replace, ManuallyDrop},
28    ops::{Deref, DerefMut, Drop},
29};
30
31/// The DHeapNode contains all the metadata required to keep the DHeap organized.
32enum DHeapNode<T: Sized> {
33    /// Edge is always the last element of the vector. When the
34    /// head points to the edge, new memory must be allocated.
35    Edge(),
36
37    /// Empty represents a previously occupied slot that has
38    /// been freed. It points to the previous head when
39    /// it was freed, creating a chain of free blocks for
40    /// future allocations.
41    Empty(usize),
42
43    /// Holding represents a memory slot in use by a DBox<_>.
44    /// The memory is owned by the DBox<_> pointing to it,
45    /// which is why it is wrapped in a ManuallyDrop<_>. The DBox<_>
46    /// is guaranteed to drop before the DHeap<_>.
47    Holding(ManuallyDrop<T>),
48
49    /// When calling DBox.into_inner(), memory is moved out of the
50    /// DHeap<_> before the DBox<_> has dropped. This serves as an indicator
51    /// for the DBox<_> not to panic when it finds its memory moved during
52    /// the dropping process.
53    Moved(),
54}
55
56use DHeapNode::*;
57
58/// A DHeap is a dense heap data structure that efficiently manages memory allocation and deallocation.
59///
60/// The heap has an overhead of 24 bytes per element, and it will never use more memory than what is allocated
61/// at any given point in time, no matter which elements are freed and in which order. The linking nature of the
62/// indices will always backfill optimally, ensuring that the memory usage is as efficient as possible.
63pub struct DHeap<T: Sized> {
64    buffer: UnsafeCell<Vec<DHeapNode<T>>>,
65    head: Cell<usize>,
66}
67
68impl<T> DHeap<T> {
69    /// Creates a new `DHeap` with a specified initial capacity.
70    ///
71    /// Allocates a buffer with the requested capacity, plus one additional element to account for the `Edge`.
72    /// The `Edge` is a sentinel element used to facilitate certain heap operations.
73    ///
74    /// # Arguments
75    ///
76    /// * `capacity` - The desired initial capacity for the heap.
77    ///
78    /// # Panics
79    ///
80    /// Panics if `capacity` is less than or equal to 1, as the heap requires at least 2 elements to function properly.
81    pub fn with_capacity(capacity: usize) -> Self {
82        assert!(capacity > 1);
83
84        DHeap {
85            buffer: {
86                // We add one more element than requested to account for the Edge.
87                let mut memory = Vec::with_capacity(capacity + 1);
88                memory.push(Edge());
89                memory.into()
90            },
91            head: Cell::new(0),
92        }
93    }
94
95    // internally used to make life easy
96    fn memory(&self) -> &mut Vec<DHeapNode<T>> {
97        unsafe { &mut *self.buffer.get() }
98    }
99
100    /// Allocates memory for the given value `v` in the `DHeap` and returns a `DBox` pointing to it.
101    ///
102    /// This function is marked `unsafe` because it may potentially invalidate existing references
103    /// if the underlying vector needs to be resized. However, `DBox` instances will still function correctly.
104    ///
105    /// When the end of the free block list is reached, a new element is pushed during allocation. If this
106    /// new element requires the vector to grow, any existing references to elements within the dense heap
107    /// might become invalid. This risk should be carefully considered when using this heap.
108    ///
109    /// One approach to mitigate this risk is to use safe_new().
110    ///
111    /// # Safety
112    ///
113    /// Users must ensure that no references to elements within the dense heap are held when calling this function.
114    /// If references are held, they may become invalid after the function call.
115    pub unsafe fn unsafe_new(&self, v: T) -> DBox<T> {
116        let index = self.head.get();
117
118        match self.memory()[index] {
119            Edge() => {
120                // The implementation's weak point lies in this push operation, which is unavoidable.
121                // When the end of the free block list is reached, a new element must be pushed
122                // during allocation. If the new element causes the vector to grow, it leads to a problem:
123                // any references to elements within the dense heap become invalid.
124                // It's crucial to carefully consider this risk when using this heap.
125                self.head.set(self.size());
126                self.memory().push(Edge());
127            }
128
129            Empty(next) => self.head.set(next),
130            _ => panic!("invalid head pointer! [corrupted memory]"),
131        }
132
133        self.memory()[index] = Holding(ManuallyDrop::new(v));
134
135        DBox {
136            heap: self,
137            index,
138            _marker: PhantomData,
139        }
140    }
141
142    /// Provides a safe alternative to `DHeap::new()` by attempting to allocate
143    /// memory without resizing the underlying vector.
144    ///
145    /// This function ensures that no existing references will be invalidated during
146    /// the allocation process, as it only allocates memory when there is available
147    /// capacity within the reserved memory. However, if the reserved memory is
148    /// exhausted, an error is returned.
149    ///
150    /// # Returns
151    ///
152    /// - `Ok(DBox<T>)` if the allocation was successful.
153    /// - `Err(&'static str)` if there is no available capacity within the reserved memory.
154    pub fn safe_new(&self, v: T) -> Result<DBox<T>, &'static str> {
155        if self.memory().len() == self.memory().capacity() {
156            Err("out of reserved memory!")
157        } else {
158            // SAFETY: The vector is not resized, so no existing references are invalidated.
159            unsafe { Ok(self.unsafe_new(v)) }
160        }
161    }
162
163    /// Retrieves the current memory usage of the `DHeap`.
164    ///
165    /// This function returns the number of elements in the underlying vector,
166    /// which represents the total memory occupied by the `DHeap`.
167    ///
168    /// # Returns
169    ///
170    /// - A `usize` representing the memory usage of the `DHeap`.
171    pub fn size(&self) -> usize {
172        self.memory().len()
173    }
174}
175
176/// DBox is a smart pointer designed to work with the DHeap allocator.
177///
178/// It provides similar functionality to Box in the Rust standard library but is specifically tailored
179/// for use with the dense heap implementation (DHeap).
180pub struct DBox<'a, T> {
181    heap: &'a DHeap<T>,
182    index: usize,
183    _marker: PhantomData<T>,
184}
185
186impl<'a, T> DBox<'a, T> {
187    fn data(&self) -> &'a DHeapNode<T> {
188        &self.heap.memory()[self.index]
189    }
190
191    fn mut_data(&mut self) -> &'a mut DHeapNode<T> {
192        &mut self.heap.memory()[self.index]
193    }
194
195    /// Consumes the `DBox` and retrieves the inner value `T`.
196    ///
197    /// This function replaces the `DBox`'s memory cell with a `Moved` state, indicating
198    /// that the memory has been moved out of the `DHeap` before the `DBox` is dropped.
199    /// After replacing the cell, it returns the inner value of the `DBox`.
200    ///
201    /// # Returns
202    ///
203    /// - The inner value `T` contained within the `DBox`.
204    pub fn into_inner(mut self) -> T {
205        match replace(self.mut_data(), Moved()) {
206            Holding(value) => ManuallyDrop::into_inner(value),
207            _ => panic!("use after free! [corrupted memory]"),
208        }
209    }
210}
211
212impl<'a, T> Drop for DBox<'a, T> {
213    fn drop(&mut self) {
214        match self.mut_data() {
215            Holding(value) => {
216                // SAFETY: The memory cell is immediately replaced with an empty cell after dropping.
217                unsafe { ManuallyDrop::drop(value) }
218            }
219            Moved() => {}
220            _ => panic!("double free! [corrupted memory]"),
221        }
222
223        *self.mut_data() = Empty(self.heap.head.replace(self.index));
224    }
225}
226
227impl<'a, T> Deref for DBox<'a, T> {
228    type Target = T;
229
230    fn deref(&self) -> &Self::Target {
231        if let Holding(value) = self.data() {
232            value.deref()
233        } else {
234            // SAFETY:
235            // This code is frequently executed, so we use unsafe code to bypass the match.
236            // This should never be reached unless memory corruption occurs, but the
237            // compiler isn't aware of this guarantee.
238            unsafe { unreachable_unchecked() }
239        }
240    }
241}
242
243impl<'a, T> DerefMut for DBox<'a, T> {
244    fn deref_mut(&mut self) -> &mut Self::Target {
245        if let Holding(value) = self.mut_data() {
246            value.deref_mut()
247        } else {
248            // SAFETY:
249            // This code is frequently executed, so we use unsafe code to bypass the match.
250            // This should never be reached unless memory corruption occurs, but the
251            // compiler isn't aware of this guarantee.
252            unsafe { unreachable_unchecked() }
253        }
254    }
255}
256
257impl<'a, T> AsRef<T> for DBox<'a, T> {
258    fn as_ref(&self) -> &T {
259        self.deref()
260    }
261}
262
263impl<'a, T> AsMut<T> for DBox<'a, T> {
264    fn as_mut(&mut self) -> &mut T {
265        self.deref_mut()
266    }
267}