deallocate_zeroed/zero_aware_allocator.rs
1//! The zero-aware memory allocator.
2//!
3//! Zeroed memory is organized into a 2-layer freelist to facillitate fast and
4//! easy allocation:
5//!
6//! 1. By alignment, in a fixed-size array.
7//! 2. By size, in a dynamically-sized splay tree.
8//!
9//! When allocating a block, we use a hybrid best- and first-fit search:
10//!
11//! * First, we prioritize memory blocks with the minimum-satisfying alignment
12//! before considering blocks with alignment greater than requested. This
13//! helps us avoid "wasting" a rare, very-aligned block on an allocation that
14//! doesn't have any particular alignment requirements.
15//!
16//! This prioritization is implemented via a loop over each entry in the
17//! fixed-size array (or "align-class") starting with the align-class that
18//! exactly matches the requested allocation's alignment and moving towards
19//! larger and larger alignments until we find a block that satisfies the
20//! allocation constraints or we fail to allocate.
21//!
22//! * Second, we search for memory blocks that will fit the requested allocation
23//! size, without introducing too much internal fragmentation. We care about
24//! fragmentation because we do not split blocks. Splitting blocks would
25//! require either that the underlying allocator support block splitting
26//! (which it might not and the `Allocator` trait doesn't have methods for
27//! deallocating a split-off chunk of previously allocated block anyways) or
28//! else additional bookkeeping on our part to track when a block is split or
29//! not and whether it can be merged again and then returned to the underlying
30//! allocator.
31
32use core::ptr;
33
34use super::*;
35use metadata::{BlockInfo, FreeList};
36
37mod mutex;
38pub use mutex::{LockingMechanism, Mutex, MutexGuard, SingleThreadedLockingMechanism};
39
40/// The maximum alignment for which we have an align-class.
41const MAX_ALIGN_WITH_CLASS: usize = 4096;
42
43/// The number of align-classes we have.
44const NUM_ALIGN_CLASSES: usize = MAX_ALIGN_WITH_CLASS.ilog2() as usize + 1;
45
46/// We allow satisfying an allocation with a block that is only up to
47///
48/// ```ignore
49/// 1 / ACCEPTABLE_WASTE_DIVISOR
50/// ```
51///
52/// bytes larger than the requested allocation's size.
53///
54/// This balances fragmentation (because we do not split blocks) with allocation
55/// efficiency and how deep in a freelist we will keep searching despite having
56/// seen a block that technically could satisfy the allocation (i.e. best- vs
57/// first-fit). Making this value larger will decrease fragmentation but
58/// increase time spent searching freelists and the probability we will ask the
59/// underlying allocator for a new zeroed block, rather than reusing an
60/// already-zeroed block; making it smaller has the opposite effects.
61///
62/// NB: keep this a power of two so that the compiler can strength-reduce the
63/// division into a shift.
64const ACCEPTABLE_WASTE_DIVISOR: usize = 8;
65
66/// A memory allocator that keeps track of already-zeroed memory blocks.
67///
68/// This lets applications move zeroing off of the `allocate_zeroed` critical
69/// path. Instead they can, for example, bulk-zero memory blocks in the
70/// background before returning them to the allocator.
71///
72/// This allocator wraps an underinglying, inner allocator of type `A`, layering
73/// the bookkeeping of already-zeroed blocks on top of it.
74///
75/// Because this crate is `no_std` and does not assume the presence of an
76/// operating system, you must provide your own locking mechanism via the `L`
77/// type parameter. See the [`LockingMechanism`] trait for details.
78#[derive(Default)]
79pub struct ZeroAwareAllocator<A, L>
80where
81 A: Allocator,
82 L: LockingMechanism,
83{
84 /// The underlying allocator.
85 inner: A,
86
87 /// Metadata keeping track of already-zeroed memory.
88 zeroed: Mutex<Zeroed, L>,
89}
90
91#[derive(Default)]
92struct Zeroed {
93 /// Already-zeroed memory blocks, organized into align-classes.
94 align_classes: [FreeList; NUM_ALIGN_CLASSES],
95
96 /// A fallback freelist for already-zeroed memory blocks that have alignment
97 /// larger than we have an align-class for.
98 very_large_aligns: FreeList,
99}
100
101impl<A, L> ZeroAwareAllocator<A, L>
102where
103 A: Allocator,
104 L: LockingMechanism,
105{
106 /// Create a new `ZeroAwareAllocator` that wraps the given `inner`
107 /// allocator.
108 #[inline]
109 pub const fn new(inner: A, lock: L) -> Self {
110 let zeroed = Zeroed {
111 align_classes: [const { FreeList::new() }; NUM_ALIGN_CLASSES],
112 very_large_aligns: FreeList::new(),
113 };
114 let zeroed = Mutex::new(zeroed, lock);
115 ZeroAwareAllocator { inner, zeroed }
116 }
117
118 /// Get a shared reference to the inner allocator.
119 #[inline]
120 pub fn inner(&self) -> &A {
121 &self.inner
122 }
123
124 /// Get an exclusive reference to the inner allocator.
125 #[inline]
126 pub fn inner_mut(&mut self) -> &mut A {
127 &mut self.inner
128 }
129
130 /// Return all of the known-zeroed memory blocks to the underlying
131 /// allocator.
132 pub fn return_zeroed_memory_to_inner(&mut self) {
133 let mut zeroed = self.zeroed.lock();
134 let zeroed = &mut *zeroed;
135
136 for freelist in zeroed
137 .align_classes
138 .iter_mut()
139 .chain(Some(&mut zeroed.very_large_aligns))
140 {
141 while let Some(node) = freelist.pop_root() {
142 let block_ptr = node.ptr();
143 let block_layout = node.layout();
144
145 // Safety: the node has been removed from the freelist, was
146 // allocated with `self.inner`, and is currently allocated.
147 unsafe {
148 self.deallocate_block_info(node);
149 }
150
151 // Safety: the block is currently allocated (from the inner
152 // allocator's point of view), has the associated layout, and
153 // nothing else is referencing this block anymore now that its
154 // node has been deallocated.
155 unsafe { self.inner.deallocate(block_ptr, block_layout) };
156 }
157 }
158 }
159
160 /// Allocate a new `BlockInfo` for our internal bookkeeping for the
161 /// given already-zeroed block described by `ptr` and `layout`.
162 fn allocate_block_info(
163 &self,
164 ptr: NonNull<u8>,
165 layout: Layout,
166 ) -> Result<&'static BlockInfo<'static>, AllocError> {
167 let node_ptr = self.inner.allocate(Layout::new::<BlockInfo<'_>>())?;
168 let node_ptr = node_ptr.cast::<BlockInfo<'_>>();
169 // Safety: `node_ptr` is valid for writes, is properly aligned, and is
170 // valid for conversion to a reference.
171 unsafe {
172 node_ptr.write(BlockInfo::new(ptr, layout));
173 Ok(node_ptr.as_ref())
174 }
175 }
176
177 /// Deallocate a `BlockInfo` freelist node.
178 ///
179 /// ### Safety
180 ///
181 /// * The node must not be in a freelist or otherwise referenced by anything
182 /// else.
183 ///
184 /// * The node must have been allocated with `self.inner`.
185 ///
186 /// * The node must be currently allocated.
187 unsafe fn deallocate_block_info(&self, node: &'static BlockInfo<'static>) {
188 self.inner
189 .deallocate(NonNull::from(node).cast(), Layout::new::<BlockInfo<'_>>());
190 }
191
192 /// Attempt to allocate an already-zeroed block of the given layout.
193 ///
194 /// Does not fallback to the inner allocator upon allocation failure.
195 #[inline]
196 fn allocate_already_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
197 if layout.size() == 0 {
198 return Ok(NonNull::from(&[]));
199 }
200
201 // The index of this layout's align-class, if we have one.
202 let align_class = layout.align().ilog2() as usize;
203
204 {
205 let mut zeroed = self.zeroed.lock();
206 let zeroed = &mut *zeroed;
207
208 // Start at the layout's align-class and try to find a suitably-sized
209 // block for the layout, moving to larger and larger align-classes as
210 // necessary.
211 let freelists = zeroed.align_classes[align_class..NUM_ALIGN_CLASSES]
212 .iter_mut()
213 .chain(Some(&mut zeroed.very_large_aligns));
214
215 for freelist in freelists {
216 // Look for a block that satisfies this allocation layout. If we
217 // find one, then deallocate our metadata with the underlying
218 // allocator and return the block.
219 if let Some(node) = freelist.remove(&layout) {
220 let ret = node.non_null_slice_ptr();
221
222 // Safety: the node was allocated from `self.inner`, is
223 // currently allocated, and was removed from its freelist.
224 unsafe {
225 self.deallocate_block_info(node);
226 }
227
228 return Ok(ret);
229 }
230 }
231 }
232
233 // Failed to find an already-zeroed block that satisfied the requested
234 // layout.
235 Err(AllocError)
236 }
237
238 #[inline]
239 unsafe fn deallocate_already_zeroed(&self, ptr: NonNull<u8>, layout: Layout) {
240 if layout.size() == 0 {
241 return;
242 }
243
244 match self.allocate_block_info(ptr, layout) {
245 // If the inner allocator fails to allocate the node that we need
246 // for bookkeeping, then we can't keep track of this already-zeroed
247 // block, so simply eagerly return it to the inner allocator.
248 Err(_) => self.inner.deallocate(ptr, layout),
249
250 Ok(node) => {
251 let mut zeroed = self.zeroed.lock();
252 let zeroed = &mut *zeroed;
253
254 // Get the appropriate freelist for this block's align-class.
255 let align_class = layout.align().ilog2() as usize;
256 let freelist = zeroed
257 .align_classes
258 .get_mut(align_class)
259 .unwrap_or_else(|| &mut zeroed.very_large_aligns);
260
261 // Insert the block into its freelist.
262 freelist.insert(node);
263 }
264 }
265 }
266}
267
268impl<A, L> Drop for ZeroAwareAllocator<A, L>
269where
270 A: Allocator,
271 L: LockingMechanism,
272{
273 fn drop(&mut self) {
274 self.return_zeroed_memory_to_inner();
275 }
276}
277
278unsafe impl<A, L> Allocator for ZeroAwareAllocator<A, L>
279where
280 A: Allocator,
281 L: LockingMechanism,
282{
283 #[inline]
284 fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
285 self.inner
286 .allocate(layout)
287 .or_else(|_| self.allocate_already_zeroed(layout))
288 }
289
290 #[inline]
291 unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
292 self.inner.deallocate(ptr, layout);
293 }
294
295 #[inline]
296 fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
297 self.allocate_already_zeroed(layout)
298 .or_else(|_| self.inner.allocate_zeroed(layout))
299 }
300
301 #[inline]
302 unsafe fn grow(
303 &self,
304 ptr: NonNull<u8>,
305 old_layout: Layout,
306 new_layout: Layout,
307 ) -> Result<NonNull<[u8]>, AllocError> {
308 self.inner.grow(ptr, old_layout, new_layout).or_else(|_| {
309 let new = self.allocate_already_zeroed(new_layout)?;
310 ptr::copy_nonoverlapping(
311 ptr.as_ptr().cast_const(),
312 new.cast().as_ptr(),
313 old_layout.size(),
314 );
315 Ok(new)
316 })
317 }
318
319 #[inline]
320 unsafe fn grow_zeroed(
321 &self,
322 ptr: NonNull<u8>,
323 old_layout: Layout,
324 new_layout: Layout,
325 ) -> Result<NonNull<[u8]>, AllocError> {
326 let bytes_to_zero = new_layout.size() - old_layout.size();
327 let bytes_to_copy = new_layout.size() - bytes_to_zero;
328
329 // We can't split or grow blocks in place so if we don't want to use the
330 // underlying allocator, we have to do a new zeroed allocation and copy
331 // over the old data. As a heuristic, if that will end up copying way
332 // more bytes than the new allocation would need to zero (assuming the
333 // old allocation could be grown in place) then just defer to the
334 // underlying allocator.
335 if bytes_to_copy > 2usize.saturating_mul(bytes_to_zero) {
336 if let Ok(p) = self.inner.grow_zeroed(ptr, old_layout, new_layout) {
337 return Ok(p);
338 }
339 }
340
341 let new = self.allocate_zeroed(new_layout)?;
342 ptr::copy_nonoverlapping(
343 ptr.as_ptr().cast_const(),
344 new.cast().as_ptr(),
345 old_layout.size(),
346 );
347 Ok(new)
348 }
349
350 #[inline]
351 unsafe fn shrink(
352 &self,
353 ptr: NonNull<u8>,
354 old_layout: Layout,
355 new_layout: Layout,
356 ) -> Result<NonNull<[u8]>, AllocError> {
357 // We can't split allocations ourselves, so just defer to the inner
358 // allocator.
359 self.inner.shrink(ptr, old_layout, new_layout)
360 }
361}
362
363impl<A, L> DeallocateZeroed for ZeroAwareAllocator<A, L>
364where
365 A: Allocator,
366 L: LockingMechanism,
367{
368 unsafe fn deallocate_zeroed(&self, pointer: NonNull<u8>, layout: Layout) {
369 self.deallocate_already_zeroed(pointer, layout);
370 }
371}
372
373mod metadata {
374 use super::*;
375 use core::cmp::Ordering;
376 use intrusive_splay_tree::{Node, SplayTree, TreeOrd};
377
378 /// Metadata about an already-zeroed block of memory, allocated from our
379 /// underlying allocator.
380 ///
381 /// Note: the `'a` lifetime is used internally to this module as much as
382 /// possible to keep things free of `unsafe` when possible, but outside this
383 /// module is always erased to `'static` and the lifetimes are manually
384 /// managed.
385 #[derive(Debug)]
386 pub(super) struct BlockInfo<'a> {
387 ptr: NonNull<u8>,
388 layout: Layout,
389 node: Node<'a>,
390 }
391
392 impl<'a> BlockInfo<'a> {
393 pub(super) unsafe fn new(ptr: NonNull<u8>, layout: Layout) -> Self {
394 debug_assert!(
395 core::slice::from_raw_parts(ptr.as_ptr(), layout.size())
396 .iter()
397 .all(|b| *b == 0),
398 "supposedly already-zeroed block contains non-zero memory"
399 );
400 BlockInfo {
401 ptr,
402 layout,
403 node: Node::default(),
404 }
405 }
406
407 fn size(&self) -> usize {
408 self.layout.size()
409 }
410
411 fn align(&self) -> usize {
412 1 << (self.ptr.as_ptr() as usize).trailing_zeros()
413 }
414
415 pub(super) fn ptr(&self) -> NonNull<u8> {
416 self.ptr
417 }
418
419 pub(super) fn non_null_slice_ptr(&self) -> NonNull<[u8]> {
420 NonNull::slice_from_raw_parts(self.ptr, self.size())
421 }
422
423 pub(super) fn layout(&self) -> Layout {
424 self.layout
425 }
426 }
427
428 /// Comparison between two `BlockInfo`s.
429 ///
430 /// This is used for ordering blocks within a tree.
431 impl<'a> TreeOrd<'a, BlockInfo<'a>> for BlockInfo<'a> {
432 fn tree_cmp(&self, other: &'a BlockInfo<'a>) -> Ordering {
433 // Compare first by size, since that is the first hard constraint we
434 // must satisfy and property we query for.
435 self.size()
436 .cmp(&other.size())
437 // Then by alignment, since that is the other hard constraint.
438 .then_with(|| {
439 let self_align = (self.ptr.as_ptr() as usize).trailing_zeros();
440 let other_align = (other.ptr.as_ptr() as usize).trailing_zeros();
441 self_align.cmp(&other_align)
442 })
443 // And finally by address. This final tie breaker should
444 // generally improve spatial locality between a sequence of
445 // allocations.
446 .then_with(|| {
447 let self_addr = self.ptr.as_ptr() as usize;
448 let other_addr = other.ptr.as_ptr() as usize;
449 self_addr.cmp(&other_addr)
450 })
451 }
452 }
453
454 /// Comparison between a `Layout` and a `BlockInfo`.
455 ///
456 /// This is used when searching for a block to satisfy a requested
457 /// allocation.
458 impl<'a> TreeOrd<'a, BlockInfo<'a>> for Layout {
459 fn tree_cmp(&self, block: &'a BlockInfo<'a>) -> Ordering {
460 // Compare sizes. Allow for some fragmentation, but not too much,
461 // because we can't split blocks ourselves. See the doc comment for
462 // `ACCEPTABLE_WASTE_DIVISOR` for more info.
463 let by_size = match self.size().cmp(&block.size()) {
464 Ordering::Greater => Ordering::Greater,
465 Ordering::Equal => Ordering::Equal,
466 Ordering::Less => {
467 let acceptable_waste = self.size() / ACCEPTABLE_WASTE_DIVISOR;
468 let potential_waste = block.size() - self.size();
469 if potential_waste <= acceptable_waste {
470 Ordering::Equal
471 } else {
472 Ordering::Less
473 }
474 }
475 };
476 by_size
477 // Compare alignments. If the block matches the requested
478 // alignment at all, even if it is actually over-aligned, then
479 // return `Ordering::Equal` to signal that it can satisfy the
480 // allocation.
481 .then_with(|| match self.align().cmp(&block.align()) {
482 Ordering::Less | Ordering::Equal => Ordering::Equal,
483 Ordering::Greater => Ordering::Greater,
484 })
485 }
486 }
487
488 intrusive_splay_tree::impl_intrusive_node! {
489 impl<'a> IntrusiveNode<'a> for BlockInfo<'a>
490 where
491 type Elem = BlockInfo<'a>,
492 node = node;
493 }
494
495 pub(super) type FreeList = SplayTree<'static, BlockInfo<'static>>;
496}