deallocate_zeroed/
zero_aware_allocator.rs

1//! The zero-aware memory allocator.
2//!
3//! Zeroed memory is organized into a 2-layer freelist to facillitate fast and
4//! easy allocation:
5//!
6//! 1. By alignment, in a fixed-size array.
7//! 2. By size, in a dynamically-sized splay tree.
8//!
9//! When allocating a block, we use a hybrid best- and first-fit search:
10//!
11//! * First, we prioritize memory blocks with the minimum-satisfying alignment
12//!   before considering blocks with alignment greater than requested. This
13//!   helps us avoid "wasting" a rare, very-aligned block on an allocation that
14//!   doesn't have any particular alignment requirements.
15//!
16//!   This prioritization is implemented via a loop over each entry in the
17//!   fixed-size array (or "align-class") starting with the align-class that
18//!   exactly matches the requested allocation's alignment and moving towards
19//!   larger and larger alignments until we find a block that satisfies the
20//!   allocation constraints or we fail to allocate.
21//!
22//! * Second, we search for memory blocks that will fit the requested allocation
23//!   size, without introducing too much internal fragmentation. We care about
24//!   fragmentation because we do not split blocks. Splitting blocks would
25//!   require either that the underlying allocator support block splitting
26//!   (which it might not and the `Allocator` trait doesn't have methods for
27//!   deallocating a split-off chunk of previously allocated block anyways) or
28//!   else additional bookkeeping on our part to track when a block is split or
29//!   not and whether it can be merged again and then returned to the underlying
30//!   allocator.
31
32use core::ptr;
33
34use super::*;
35use metadata::{BlockInfo, FreeList};
36
37mod mutex;
38pub use mutex::{LockingMechanism, Mutex, MutexGuard, SingleThreadedLockingMechanism};
39
40/// The maximum alignment for which we have an align-class.
41const MAX_ALIGN_WITH_CLASS: usize = 4096;
42
43/// The number of align-classes we have.
44const NUM_ALIGN_CLASSES: usize = MAX_ALIGN_WITH_CLASS.ilog2() as usize + 1;
45
46/// We allow satisfying an allocation with a block that is only up to
47///
48/// ```ignore
49/// 1 / ACCEPTABLE_WASTE_DIVISOR
50/// ```
51///
52/// bytes larger than the requested allocation's size.
53///
54/// This balances fragmentation (because we do not split blocks) with allocation
55/// efficiency and how deep in a freelist we will keep searching despite having
56/// seen a block that technically could satisfy the allocation (i.e. best- vs
57/// first-fit). Making this value larger will decrease fragmentation but
58/// increase time spent searching freelists and the probability we will ask the
59/// underlying allocator for a new zeroed block, rather than reusing an
60/// already-zeroed block; making it smaller has the opposite effects.
61///
62/// NB: keep this a power of two so that the compiler can strength-reduce the
63/// division into a shift.
64const ACCEPTABLE_WASTE_DIVISOR: usize = 8;
65
66/// A memory allocator that keeps track of already-zeroed memory blocks.
67///
68/// This lets applications move zeroing off of the `allocate_zeroed` critical
69/// path. Instead they can, for example, bulk-zero memory blocks in the
70/// background before returning them to the allocator.
71///
72/// This allocator wraps an underinglying, inner allocator of type `A`, layering
73/// the bookkeeping of already-zeroed blocks on top of it.
74///
75/// Because this crate is `no_std` and does not assume the presence of an
76/// operating system, you must provide your own locking mechanism via the `L`
77/// type parameter. See the [`LockingMechanism`] trait for details.
78#[derive(Default)]
79pub struct ZeroAwareAllocator<A, L>
80where
81    A: Allocator,
82    L: LockingMechanism,
83{
84    /// The underlying allocator.
85    inner: A,
86
87    /// Metadata keeping track of already-zeroed memory.
88    zeroed: Mutex<Zeroed, L>,
89}
90
91#[derive(Default)]
92struct Zeroed {
93    /// Already-zeroed memory blocks, organized into align-classes.
94    align_classes: [FreeList; NUM_ALIGN_CLASSES],
95
96    /// A fallback freelist for already-zeroed memory blocks that have alignment
97    /// larger than we have an align-class for.
98    very_large_aligns: FreeList,
99}
100
101impl<A, L> ZeroAwareAllocator<A, L>
102where
103    A: Allocator,
104    L: LockingMechanism,
105{
106    /// Create a new `ZeroAwareAllocator` that wraps the given `inner`
107    /// allocator.
108    #[inline]
109    pub const fn new(inner: A, lock: L) -> Self {
110        let zeroed = Zeroed {
111            align_classes: [const { FreeList::new() }; NUM_ALIGN_CLASSES],
112            very_large_aligns: FreeList::new(),
113        };
114        let zeroed = Mutex::new(zeroed, lock);
115        ZeroAwareAllocator { inner, zeroed }
116    }
117
118    /// Get a shared reference to the inner allocator.
119    #[inline]
120    pub fn inner(&self) -> &A {
121        &self.inner
122    }
123
124    /// Get an exclusive reference to the inner allocator.
125    #[inline]
126    pub fn inner_mut(&mut self) -> &mut A {
127        &mut self.inner
128    }
129
130    /// Return all of the known-zeroed memory blocks to the underlying
131    /// allocator.
132    pub fn return_zeroed_memory_to_inner(&mut self) {
133        let mut zeroed = self.zeroed.lock();
134        let zeroed = &mut *zeroed;
135
136        for freelist in zeroed
137            .align_classes
138            .iter_mut()
139            .chain(Some(&mut zeroed.very_large_aligns))
140        {
141            while let Some(node) = freelist.pop_root() {
142                let block_ptr = node.ptr();
143                let block_layout = node.layout();
144
145                // Safety: the node has been removed from the freelist, was
146                // allocated with `self.inner`, and is currently allocated.
147                unsafe {
148                    self.deallocate_block_info(node);
149                }
150
151                // Safety: the block is currently allocated (from the inner
152                // allocator's point of view), has the associated layout, and
153                // nothing else is referencing this block anymore now that its
154                // node has been deallocated.
155                unsafe { self.inner.deallocate(block_ptr, block_layout) };
156            }
157        }
158    }
159
160    /// Allocate a new `BlockInfo` for our internal bookkeeping for the
161    /// given already-zeroed block described by `ptr` and `layout`.
162    fn allocate_block_info(
163        &self,
164        ptr: NonNull<u8>,
165        layout: Layout,
166    ) -> Result<&'static BlockInfo<'static>, AllocError> {
167        let node_ptr = self.inner.allocate(Layout::new::<BlockInfo<'_>>())?;
168        let node_ptr = node_ptr.cast::<BlockInfo<'_>>();
169        // Safety: `node_ptr` is valid for writes, is properly aligned, and is
170        // valid for conversion to a reference.
171        unsafe {
172            node_ptr.write(BlockInfo::new(ptr, layout));
173            Ok(node_ptr.as_ref())
174        }
175    }
176
177    /// Deallocate a `BlockInfo` freelist node.
178    ///
179    /// ### Safety
180    ///
181    /// * The node must not be in a freelist or otherwise referenced by anything
182    ///   else.
183    ///
184    /// * The node must have been allocated with `self.inner`.
185    ///
186    /// * The node must be currently allocated.
187    unsafe fn deallocate_block_info(&self, node: &'static BlockInfo<'static>) {
188        self.inner
189            .deallocate(NonNull::from(node).cast(), Layout::new::<BlockInfo<'_>>());
190    }
191
192    /// Attempt to allocate an already-zeroed block of the given layout.
193    ///
194    /// Does not fallback to the inner allocator upon allocation failure.
195    #[inline]
196    fn allocate_already_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
197        if layout.size() == 0 {
198            return Ok(NonNull::from(&[]));
199        }
200
201        // The index of this layout's align-class, if we have one.
202        let align_class = layout.align().ilog2() as usize;
203
204        {
205            let mut zeroed = self.zeroed.lock();
206            let zeroed = &mut *zeroed;
207
208            // Start at the layout's align-class and try to find a suitably-sized
209            // block for the layout, moving to larger and larger align-classes as
210            // necessary.
211            let freelists = zeroed.align_classes[align_class..NUM_ALIGN_CLASSES]
212                .iter_mut()
213                .chain(Some(&mut zeroed.very_large_aligns));
214
215            for freelist in freelists {
216                // Look for a block that satisfies this allocation layout. If we
217                // find one, then deallocate our metadata with the underlying
218                // allocator and return the block.
219                if let Some(node) = freelist.remove(&layout) {
220                    let ret = node.non_null_slice_ptr();
221
222                    // Safety: the node was allocated from `self.inner`, is
223                    // currently allocated, and was removed from its freelist.
224                    unsafe {
225                        self.deallocate_block_info(node);
226                    }
227
228                    return Ok(ret);
229                }
230            }
231        }
232
233        // Failed to find an already-zeroed block that satisfied the requested
234        // layout.
235        Err(AllocError)
236    }
237
238    #[inline]
239    unsafe fn deallocate_already_zeroed(&self, ptr: NonNull<u8>, layout: Layout) {
240        if layout.size() == 0 {
241            return;
242        }
243
244        match self.allocate_block_info(ptr, layout) {
245            // If the inner allocator fails to allocate the node that we need
246            // for bookkeeping, then we can't keep track of this already-zeroed
247            // block, so simply eagerly return it to the inner allocator.
248            Err(_) => self.inner.deallocate(ptr, layout),
249
250            Ok(node) => {
251                let mut zeroed = self.zeroed.lock();
252                let zeroed = &mut *zeroed;
253
254                // Get the appropriate freelist for this block's align-class.
255                let align_class = layout.align().ilog2() as usize;
256                let freelist = zeroed
257                    .align_classes
258                    .get_mut(align_class)
259                    .unwrap_or_else(|| &mut zeroed.very_large_aligns);
260
261                // Insert the block into its freelist.
262                freelist.insert(node);
263            }
264        }
265    }
266}
267
268impl<A, L> Drop for ZeroAwareAllocator<A, L>
269where
270    A: Allocator,
271    L: LockingMechanism,
272{
273    fn drop(&mut self) {
274        self.return_zeroed_memory_to_inner();
275    }
276}
277
278unsafe impl<A, L> Allocator for ZeroAwareAllocator<A, L>
279where
280    A: Allocator,
281    L: LockingMechanism,
282{
283    #[inline]
284    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
285        self.inner
286            .allocate(layout)
287            .or_else(|_| self.allocate_already_zeroed(layout))
288    }
289
290    #[inline]
291    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
292        self.inner.deallocate(ptr, layout);
293    }
294
295    #[inline]
296    fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
297        self.allocate_already_zeroed(layout)
298            .or_else(|_| self.inner.allocate_zeroed(layout))
299    }
300
301    #[inline]
302    unsafe fn grow(
303        &self,
304        ptr: NonNull<u8>,
305        old_layout: Layout,
306        new_layout: Layout,
307    ) -> Result<NonNull<[u8]>, AllocError> {
308        self.inner.grow(ptr, old_layout, new_layout).or_else(|_| {
309            let new = self.allocate_already_zeroed(new_layout)?;
310            ptr::copy_nonoverlapping(
311                ptr.as_ptr().cast_const(),
312                new.cast().as_ptr(),
313                old_layout.size(),
314            );
315            Ok(new)
316        })
317    }
318
319    #[inline]
320    unsafe fn grow_zeroed(
321        &self,
322        ptr: NonNull<u8>,
323        old_layout: Layout,
324        new_layout: Layout,
325    ) -> Result<NonNull<[u8]>, AllocError> {
326        let bytes_to_zero = new_layout.size() - old_layout.size();
327        let bytes_to_copy = new_layout.size() - bytes_to_zero;
328
329        // We can't split or grow blocks in place so if we don't want to use the
330        // underlying allocator, we have to do a new zeroed allocation and copy
331        // over the old data. As a heuristic, if that will end up copying way
332        // more bytes than the new allocation would need to zero (assuming the
333        // old allocation could be grown in place) then just defer to the
334        // underlying allocator.
335        if bytes_to_copy > 2usize.saturating_mul(bytes_to_zero) {
336            if let Ok(p) = self.inner.grow_zeroed(ptr, old_layout, new_layout) {
337                return Ok(p);
338            }
339        }
340
341        let new = self.allocate_zeroed(new_layout)?;
342        ptr::copy_nonoverlapping(
343            ptr.as_ptr().cast_const(),
344            new.cast().as_ptr(),
345            old_layout.size(),
346        );
347        Ok(new)
348    }
349
350    #[inline]
351    unsafe fn shrink(
352        &self,
353        ptr: NonNull<u8>,
354        old_layout: Layout,
355        new_layout: Layout,
356    ) -> Result<NonNull<[u8]>, AllocError> {
357        // We can't split allocations ourselves, so just defer to the inner
358        // allocator.
359        self.inner.shrink(ptr, old_layout, new_layout)
360    }
361}
362
363impl<A, L> DeallocateZeroed for ZeroAwareAllocator<A, L>
364where
365    A: Allocator,
366    L: LockingMechanism,
367{
368    unsafe fn deallocate_zeroed(&self, pointer: NonNull<u8>, layout: Layout) {
369        self.deallocate_already_zeroed(pointer, layout);
370    }
371}
372
373mod metadata {
374    use super::*;
375    use core::cmp::Ordering;
376    use intrusive_splay_tree::{Node, SplayTree, TreeOrd};
377
378    /// Metadata about an already-zeroed block of memory, allocated from our
379    /// underlying allocator.
380    ///
381    /// Note: the `'a` lifetime is used internally to this module as much as
382    /// possible to keep things free of `unsafe` when possible, but outside this
383    /// module is always erased to `'static` and the lifetimes are manually
384    /// managed.
385    #[derive(Debug)]
386    pub(super) struct BlockInfo<'a> {
387        ptr: NonNull<u8>,
388        layout: Layout,
389        node: Node<'a>,
390    }
391
392    impl<'a> BlockInfo<'a> {
393        pub(super) unsafe fn new(ptr: NonNull<u8>, layout: Layout) -> Self {
394            debug_assert!(
395                core::slice::from_raw_parts(ptr.as_ptr(), layout.size())
396                    .iter()
397                    .all(|b| *b == 0),
398                "supposedly already-zeroed block contains non-zero memory"
399            );
400            BlockInfo {
401                ptr,
402                layout,
403                node: Node::default(),
404            }
405        }
406
407        fn size(&self) -> usize {
408            self.layout.size()
409        }
410
411        fn align(&self) -> usize {
412            1 << (self.ptr.as_ptr() as usize).trailing_zeros()
413        }
414
415        pub(super) fn ptr(&self) -> NonNull<u8> {
416            self.ptr
417        }
418
419        pub(super) fn non_null_slice_ptr(&self) -> NonNull<[u8]> {
420            NonNull::slice_from_raw_parts(self.ptr, self.size())
421        }
422
423        pub(super) fn layout(&self) -> Layout {
424            self.layout
425        }
426    }
427
428    /// Comparison between two `BlockInfo`s.
429    ///
430    /// This is used for ordering blocks within a tree.
431    impl<'a> TreeOrd<'a, BlockInfo<'a>> for BlockInfo<'a> {
432        fn tree_cmp(&self, other: &'a BlockInfo<'a>) -> Ordering {
433            // Compare first by size, since that is the first hard constraint we
434            // must satisfy and property we query for.
435            self.size()
436                .cmp(&other.size())
437                // Then by alignment, since that is the other hard constraint.
438                .then_with(|| {
439                    let self_align = (self.ptr.as_ptr() as usize).trailing_zeros();
440                    let other_align = (other.ptr.as_ptr() as usize).trailing_zeros();
441                    self_align.cmp(&other_align)
442                })
443                // And finally by address. This final tie breaker should
444                // generally improve spatial locality between a sequence of
445                // allocations.
446                .then_with(|| {
447                    let self_addr = self.ptr.as_ptr() as usize;
448                    let other_addr = other.ptr.as_ptr() as usize;
449                    self_addr.cmp(&other_addr)
450                })
451        }
452    }
453
454    /// Comparison between a `Layout` and a `BlockInfo`.
455    ///
456    /// This is used when searching for a block to satisfy a requested
457    /// allocation.
458    impl<'a> TreeOrd<'a, BlockInfo<'a>> for Layout {
459        fn tree_cmp(&self, block: &'a BlockInfo<'a>) -> Ordering {
460            // Compare sizes. Allow for some fragmentation, but not too much,
461            // because we can't split blocks ourselves. See the doc comment for
462            // `ACCEPTABLE_WASTE_DIVISOR` for more info.
463            let by_size = match self.size().cmp(&block.size()) {
464                Ordering::Greater => Ordering::Greater,
465                Ordering::Equal => Ordering::Equal,
466                Ordering::Less => {
467                    let acceptable_waste = self.size() / ACCEPTABLE_WASTE_DIVISOR;
468                    let potential_waste = block.size() - self.size();
469                    if potential_waste <= acceptable_waste {
470                        Ordering::Equal
471                    } else {
472                        Ordering::Less
473                    }
474                }
475            };
476            by_size
477                // Compare alignments. If the block matches the requested
478                // alignment at all, even if it is actually over-aligned, then
479                // return `Ordering::Equal` to signal that it can satisfy the
480                // allocation.
481                .then_with(|| match self.align().cmp(&block.align()) {
482                    Ordering::Less | Ordering::Equal => Ordering::Equal,
483                    Ordering::Greater => Ordering::Greater,
484                })
485        }
486    }
487
488    intrusive_splay_tree::impl_intrusive_node! {
489        impl<'a> IntrusiveNode<'a> for BlockInfo<'a>
490        where
491            type Elem = BlockInfo<'a>,
492            node = node;
493    }
494
495    pub(super) type FreeList = SplayTree<'static, BlockInfo<'static>>;
496}