zeroizing_alloc/lib.rs
1#![no_std]
2
3//! An example crate showing how to safely and performantly zero out all heap allocations in a process.
4//!
5//!
6//! This crates makes the following changes from common zeroizing alloc implementations:
7//!
8//! - Introduce a faster zeroization implementation (original kept behind feature "reference_impl" for perf testing)
9//! - Fix a potential casting bug
10//! - Remove unit tests: although passing locally, they trigger UAF and UB, leading to inconsistency, which we don't want.
11//! - Used `MIRIFLAGS="-Zmiri-ignore-leaks" cargo +nightly miri test -p op-alloc`
12//!
13//! <https://rust.godbolt.org> was a tool used to partially verify that zeroization will NOT be optimized out at `-Copt-level=3`
14
15use core::alloc::{GlobalAlloc, Layout};
16
17/// Allocator wrapper that zeros on free
18pub struct ZeroAlloc<Alloc: GlobalAlloc>(pub Alloc);
19
20// Reference implementation. Performance-wise, this is the same as using the `zeroize` crate,
21// because it uses the same logic:
22//
23// ```rust
24// unsafe fn zero(ptr: *mut u8, size: usize) {
25// use zeroize::Zeroize;
26// core::slice::from_raw_parts_mut(ptr, size).zeroize();
27// }
28// ```
29//
30// SAFETY: exactly one callsite (below), always passes the correct size
31#[cfg(feature = "reference_impl")]
32#[inline]
33unsafe fn zero(ptr: *mut u8, len: usize) {
34 for i in 0..len {
35 core::ptr::write_volatile(ptr.add(i), 0);
36 }
37 core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
38}
39
40#[cfg(not(feature = "reference_impl"))]
41unsafe fn clear_bytes(ptr: *mut u8, len: usize) {
42 // We expect this to optimize into a `memset` for performance. Due to this function only being used via `read_volatile`,
43 // the compiler doesn't know that the slice this function will be wiping is about to be destroyed anyway.
44 //
45 // SAFETY: The caller must only pass a valid allocated object.
46 ptr.write_bytes(0x0, len);
47}
48
49// This is meant to avoid compiler optimizations while still retaining performance.
50//
51// By storing a function to a performant `memset(0, dest)` call, we can performantly zero out bytes
52// without the compiler realizing the values being cleared aren't going to be read from again since it does
53// not know either the source of the bytes or the source of our clearing function.
54//
55// - By loading this function pointer volatilely, we ensure the compiler does not optimize thinking about the
56// source of the function pointer.
57// - `#[used]` presents an extra optimization barrier since it forces the compiler to keep it around (won't take part in codegen optimization)
58// until it reaches the linker. Even if the linker removes it though, its still fine because that can't optimize code that depends on it.
59#[cfg(not(feature = "reference_impl"))]
60#[used]
61static WIPER: unsafe fn(*mut u8, usize) = clear_bytes;
62
63// SAFETY: exactly one callsite (below), always passes the correct size
64#[cfg(not(feature = "reference_impl"))]
65#[inline]
66unsafe fn zero(ptr: *mut u8, len: usize) {
67 // The compiler may not predict anything about the clearing function we load due to the `read_volatile`, so
68 // it must always load it from the static's address instead of directly calling the `clear_bytes` function (which
69 // might allow optimizing away clearing).
70 //
71 // SAFETY: This static is always initialized to the correct value.
72 let wipe = unsafe { core::ptr::addr_of!(WIPER).read_volatile() };
73 wipe(ptr, len);
74}
75
76// SAFETY: wrapper for system allocator, zeroizes on free but otherwise re-uses system logic
77unsafe impl<T> GlobalAlloc for ZeroAlloc<T>
78where
79 T: GlobalAlloc,
80{
81 #[inline]
82 unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
83 self.0.alloc(layout)
84 }
85
86 #[inline]
87 unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
88 zero(ptr, layout.size());
89 self.0.dealloc(ptr, layout);
90 }
91
92 #[inline]
93 unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
94 self.0.alloc_zeroed(layout)
95 }
96}