forge_alloc/hardening/
cache_jitter.rs

1//! `CacheJitter<I>` — randomized per-allocation displacement to spread
2//! metadata across cache associativity sets.
3//!
4//! Allocators that always return pointers at fixed alignments concentrate
5//! metadata into the same cache-set indices. On an 8-way L1 with 64-byte
6//! lines, every page-aligned pointer hashes to the same set, so an attacker
7//! probing a free list can deterministically evict victim data with O(8)
8//! probes. `CacheJitter` shifts each allocation by a multiple of one cache
9//! line within the associativity window — `(rng() % assoc) * line_size`
10//! bytes — so different allocations land in different sets.
11//!
12//! Overhead: one xorshift64 per `allocate`, plus `line_size + max_disp`
13//! bytes of prefix per allocation (used for displacement-header storage so
14//! `deallocate` can recover the inner pointer).
15//!
16//! See `docs/ARCHITECTURE.md` for the composable-wrapper design.
17
18use core::cell::Cell;
19use core::ptr::NonNull;
20
21use forge_alloc_core::{AllocError, Allocator, Deallocator, FixedRange, NonZeroLayout};
22
23/// CacheJitter wrapper.
24///
25/// `cache_line_size` and `associativity` are fixed at construction. The
26/// xorshift64 state lives in an interior-mutable `Cell` so `allocate` can
27/// take `&self`; this also makes the type `!Sync`.
28///
29/// # Cross-thread use
30///
31/// `CacheJitter<I>` is **not** thread-safe by itself — the `Cell<u64>`
32/// rng state and the `Cell` on the per-instance MAC verification path
33/// both prohibit `&CacheJitter` from being shared across threads.
34/// Wrapping the *inner* allocator with `Statistics` or similar does
35/// **not** help; the cell is inside `CacheJitter` and is what blocks
36/// `Sync`.
37///
38/// For cross-thread use, pick one of:
39/// - **Per-thread instance** — give each thread its own
40///   `CacheJitter<I>`. Each instance has its own rng + MAC key, which
41///   actually improves the wrapper's threat-model (the MAC key is
42///   thread-private).
43/// - **External `Mutex<CacheJitter<I>>`** — serializes all access
44///   through the lock. Use only if a single shared instance is
45///   architecturally required; per-thread is faster.
46///
47/// # Randomness model
48///
49/// CacheJitter uses **xorshift64**, a fast non-cryptographic PRNG. The
50/// goal of the wrapper is to *diversify cache-set occupancy* across
51/// allocations so an attacker who controls allocation timing can't
52/// deterministically evict a victim line. It is **not** designed to
53/// resist an attacker who can observe several user pointers and solve
54/// for the RNG state — xorshift64 is fully invertible from ~3
55/// consecutive 64-bit outputs.
56///
57/// If your threat model includes that adversary, swap the RNG for a
58/// CSPRNG (e.g. ChaCha20) at the cost of ~10× per-allocation overhead.
59/// For the typical anti-spray use case, xorshift64 is appropriate.
60///
61/// # Composition
62///
63/// Layout requests with `align > cache_line_size` are forwarded to the
64/// inner allocator *without* jitter — the jitter granularity is one cache
65/// line, which can't preserve a larger alignment. The vast majority of
66/// requests have `align <= 16`, so jitter applies in the common case.
67///
68/// # Inner-allocator alignment requirement
69///
70/// **The inner allocator MUST be able to satisfy alignment requests up
71/// to `cache_line_size` (64 bytes on x86/ARM, 128 on Apple Silicon).**
72/// For jittered requests, this wrapper inflates the inner's alignment
73/// requirement up to `cache_line_size` so the user pointer (placed at
74/// `inner_ptr + cache_line_size + displacement`) inherits the caller's
75/// requested alignment. Backings that cap alignment below
76/// `cache_line_size` — notably [`InlineBacked`](crate::backing::InlineBacked),
77/// whose `MAX_ALIGN` is 16 — will reject the inflated request and the
78/// wrapped allocation will fail.
79///
80/// Practical implication: `CacheJitter<MmapBacked>` and
81/// `CacheJitter<BumpArena<MmapBacked>>` work; `CacheJitter<BumpArena<
82/// InlineBacked<N>>>` compiles but cannot actually allocate jittered
83/// blocks. The pattern is mainly useful for production heaps over the
84/// OS allocator, not for stack-buffer arenas.
85pub struct CacheJitter<I> {
86    inner: I,
87    cache_line_size: usize,
88    associativity: usize,
89    /// `trailing_zeros(cache_line_size)`. Cached to encode the displacement
90    /// in cache-line units (compact, fits the 16-bit header field).
91    line_shift: u32,
92    /// Per-instance secret used to MAC the on-disk displacement header.
93    /// Initialized once from the caller-supplied seed (or OS entropy on
94    /// `new`) and never exposed. Compromising this defeats the
95    /// header-integrity check; protecting it is therefore as important as
96    /// the SipHashMAC key in `Slab`.
97    mac_key: u64,
98    rng: Cell<u64>,
99}
100
101/// Header size in bytes prefixing each jittered allocation. We pack the
102/// applied displacement (low 16 bits, as a multiple of `cache_line_size`)
103/// and a 48-bit keyed MAC (high 48 bits) into the same 8-byte slot. The
104/// MAC is computed over `(user_ptr_addr, displacement_in_lines)` with a
105/// per-instance key so an attacker who controls only the prefix bytes
106/// (linear underflow from an adjacent allocation, or UAF write into a
107/// freed slot's prefix) cannot forge a header that survives
108/// `deallocate`'s verification — see `CacheJitter::unpack_header`.
109const JITTER_HEADER_SIZE: usize = 8;
110/// Width of the displacement field in the packed header. 16 bits stores a
111/// displacement of up to `(2^16 - 1) * cache_line_size` bytes — 4 MiB for
112/// 64-byte lines, 8 MiB for 128-byte lines — far above any realistic
113/// associativity window. Construction rejects configurations that would
114/// overflow.
115const HEADER_DISP_BITS: u32 = 16;
116const HEADER_DISP_MASK: u64 = (1u64 << HEADER_DISP_BITS) - 1;
117
118/// Maximum associativity the encoding admits. With 16-bit
119/// displacement-in-lines, displacement-in-lines ranges over
120/// `0..associativity`, so `associativity` must fit in 16 bits.
121const MAX_ASSOCIATIVITY: usize = (1usize << HEADER_DISP_BITS) - 1;
122
123impl<I> CacheJitter<I> {
124    /// Construct with explicit cache parameters and a caller-supplied seed.
125    /// Required for `no_std` builds.
126    ///
127    /// `cache_line_size` must be a power of two and `>= 8` so a `u64`
128    /// header fits within one line. `associativity` must be `>= 1`.
129    /// Returns `None` if either constraint is violated, or if
130    /// `cache_line_size * associativity` would overflow.
131    pub fn with_params(
132        inner: I,
133        cache_line_size: usize,
134        associativity: usize,
135        seed: u64,
136    ) -> Option<Self> {
137        if !cache_line_size.is_power_of_two() || cache_line_size < 8 {
138            return None;
139        }
140        if associativity == 0 || associativity > MAX_ASSOCIATIVITY {
141            return None;
142        }
143        cache_line_size.checked_mul(associativity)?;
144        // Avoid trivially-zero seed — xorshift64 would output zero
145        // forever. The first transformation in next_rng() would then
146        // produce d=0 every call, which defeats the wrapper. Substitute
147        // a fixed nonzero seed if the caller passes 0.
148        let seed = if seed == 0 {
149            0x9E37_79B9_7F4A_7C15
150        } else {
151            seed
152        };
153        // Derive a MAC key distinct from the RNG state so that observing
154        // displacements doesn't directly leak the MAC key. Two independent
155        // splitmix64 steps from the seed give us a key that's
156        // statistically uncorrelated with the RNG sequence the caller
157        // might observe.
158        let mac_key = {
159            let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
160            x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
161            x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
162            x ^ (x >> 31)
163        };
164        Some(Self {
165            inner,
166            cache_line_size,
167            associativity,
168            line_shift: cache_line_size.trailing_zeros(),
169            mac_key,
170            rng: Cell::new(seed),
171        })
172    }
173
174    /// Construct with OS-derived entropy. Available on `std` builds.
175    ///
176    /// Uses the same entropy strategy as `Canary::new` — a single
177    /// `RandomState`-derived 64-bit value seeded by the OS RNG.
178    #[cfg(feature = "std")]
179    pub fn new(inner: I, cache_line_size: usize, associativity: usize) -> Option<Self> {
180        use std::collections::hash_map::RandomState;
181        use std::hash::BuildHasher;
182        let seed = RandomState::new().hash_one(0u64);
183        Self::with_params(inner, cache_line_size, associativity, seed)
184    }
185
186    /// Borrow the inner allocator.
187    #[inline]
188    pub fn inner(&self) -> &I {
189        &self.inner
190    }
191
192    /// Cache-line size in bytes (e.g. 64 on x86/ARM, 128 on Apple
193    /// Silicon).
194    #[inline]
195    pub fn cache_line_size(&self) -> usize {
196        self.cache_line_size
197    }
198
199    /// Associativity window in cache lines — the jitter range.
200    #[inline]
201    pub fn associativity(&self) -> usize {
202        self.associativity
203    }
204
205    /// Maximum displacement applied by this wrapper, in bytes.
206    /// Equals `(associativity - 1) * cache_line_size`.
207    #[inline]
208    fn max_displacement(&self) -> usize {
209        (self.associativity - 1) * self.cache_line_size
210    }
211
212    /// Total prefix added to each jittered allocation: one cache line for
213    /// the displacement header + room for the maximum displacement.
214    /// `user_ptr = inner_ptr + cache_line_size + actual_displacement`.
215    #[inline]
216    fn jitter_prefix(&self) -> usize {
217        self.cache_line_size + self.max_displacement()
218    }
219
220    /// Step the xorshift64 generator and return its next output.
221    #[inline]
222    fn next_rng(&self) -> u64 {
223        let mut x = self.rng.get();
224        x ^= x << 13;
225        x ^= x >> 7;
226        x ^= x << 17;
227        self.rng.set(x);
228        x
229    }
230
231    /// Compute the displacement for the next allocation: a non-negative
232    /// multiple of `cache_line_size` in `[0, associativity * cache_line_size)`,
233    /// i.e. `0, cache_line_size, …, (associativity - 1) * cache_line_size`.
234    ///
235    /// Uses the unbiased "multiply-shift" reduction
236    /// `(rng() * assoc) >> 64` rather than `rng() % assoc` — the modulo
237    /// form has a small bias when `assoc` is not a power of two (e.g. the
238    /// 12-way L2 on certain Intel parts, 6-way on some older AMD), which
239    /// would weaken the cache-set spreading the wrapper relies on. The
240    /// multiply-shift form is unbiased and cheaper on x86_64 (single
241    /// `mul` instead of `div`).
242    #[inline]
243    fn next_displacement(&self) -> usize {
244        let assoc = self.associativity as u128;
245        let r = ((self.next_rng() as u128 * assoc) >> 64) as usize;
246        r * self.cache_line_size
247    }
248
249    /// Compute a 64-bit mixed value over `(user_ptr_addr, disp_lines)`
250    /// keyed by `self.mac_key`. The high 48 bits are used as the MAC.
251    ///
252    /// This is a SplitMix-style avalanche — not a cryptographic MAC, but
253    /// the construction is non-linear and key-dependent. An attacker who
254    /// can only blind-write the 8-byte header (linear underflow from an
255    /// adjacent allocation, UAF write into a freed slot's prefix) faces a
256    /// 2^-48 forgery probability per attempt. The MAC binds the
257    /// displacement to `user_ptr_addr` so a header copied from one
258    /// allocation cannot be replayed against a different one.
259    ///
260    /// Honest threat-model caveats:
261    ///
262    /// - **Direct key disclosure.** An attacker who can read the
263    ///   `mac_key` field itself (arbitrary heap or stack read primitive
264    ///   that reaches inside the `CacheJitter` struct) can forge
265    ///   arbitrary headers. No keyed-MAC construction — cryptographic
266    ///   or otherwise — survives direct key disclosure.
267    /// - **Observed-pair key recovery.** An attacker who can read many
268    ///   `(user_ptr_addr, header)` pairs from the live process *but
269    ///   cannot read `mac_key` directly* faces a weaker barrier with
270    ///   our SplitMix-style mixer than they would with a cryptographic
271    ///   MAC like SipHash: the mixer is a small algebraic circuit and
272    ///   an offline SAT / symbolic-execution attack on a few thousand
273    ///   observations is plausible for a well-resourced adversary. A
274    ///   cryptographic MAC remains key-recovery-hard under the same
275    ///   read access. If your threat model includes a heap-disclosure
276    ///   attacker who cannot read `mac_key` directly but can observe
277    ///   many pairs, swap this mixer for a SipHash MAC at the cost of
278    ///   roughly 5-10x per-allocate work.
279    ///
280    /// CacheJitter is one layer in a defense-in-depth stack, not a
281    /// standalone barrier against arbitrary read+write primitives.
282    #[inline]
283    fn header_mix(&self, user_ptr_addr: usize, disp_lines: u64) -> u64 {
284        let mut x = self.mac_key ^ (user_ptr_addr as u64);
285        x = x.wrapping_mul(0x9E37_79B9_7F4A_7C15);
286        x ^= x.rotate_left(31);
287        x ^= disp_lines;
288        x = x.wrapping_mul(0xBF58_476D_1CE4_E5B9);
289        x ^= x >> 27;
290        x = x.wrapping_mul(0x94D0_49BB_1331_11EB);
291        x ^ (x >> 31)
292    }
293
294    /// Pack the displacement and its MAC into the 8-byte header value.
295    #[inline]
296    fn pack_header(&self, user_ptr_addr: usize, disp_bytes: usize) -> u64 {
297        let disp_lines = (disp_bytes >> self.line_shift) as u64;
298        debug_assert!(
299            disp_lines & !HEADER_DISP_MASK == 0,
300            "displacement-in-lines exceeds 16-bit header field — \
301             construction should have rejected this associativity"
302        );
303        let mac48 = self.header_mix(user_ptr_addr, disp_lines) >> HEADER_DISP_BITS;
304        (mac48 << HEADER_DISP_BITS) | (disp_lines & HEADER_DISP_MASK)
305    }
306
307    /// Verify the header and recover the displacement in bytes. Returns
308    /// `Err(())` if the MAC fails or the recovered displacement is out
309    /// of range — both indicate corruption.
310    #[inline]
311    fn unpack_header(&self, user_ptr_addr: usize, header: u64) -> Result<usize, ()> {
312        let disp_lines = header & HEADER_DISP_MASK;
313        let expected_mac48 = self.header_mix(user_ptr_addr, disp_lines) >> HEADER_DISP_BITS;
314        let stored_mac48 = header >> HEADER_DISP_BITS;
315        // Constant-time compare via `subtle`. For a 48-bit scalar this is
316        // essentially one CMP on modern CPUs, but documenting intent and
317        // surviving future refactor regressions matters more here than
318        // the cycle. Same rationale as `SipHashMAC::verify`.
319        use subtle::ConstantTimeEq;
320        if !bool::from(stored_mac48.ct_eq(&expected_mac48)) {
321            return Err(());
322        }
323        let disp_bytes = (disp_lines as usize) << self.line_shift;
324        // Defense-in-depth: even if the MAC verified, the recovered
325        // displacement must lie in the legitimate range. A MAC collision
326        // outside the range is rejected before we touch `inner.deallocate`.
327        // Bound against `max_displacement()` — the single source of truth for
328        // the legal max `(associativity - 1) * cache_line_size` — rather than
329        // recomputing `associativity * cache_line_size` here, so the check
330        // can't drift from the encoding if either changes.
331        if disp_bytes > self.max_displacement() {
332            return Err(());
333        }
334        Ok(disp_bytes)
335    }
336}
337
338impl<I> Drop for CacheJitter<I> {
339    fn drop(&mut self) {
340        // Zeroize the MAC key (and RNG state, which would let an attacker
341        // predict future displacements if leaked) on drop so the values
342        // don't linger in deallocated stack frames or freed allocator
343        // headers. Same rationale as `Canary::drop` — a per-process
344        // secret leaving the wrapper's storage when the wrapper drops
345        // would let an attacker forge headers on still-live wrappers
346        // that share a derivation source, and would let an attacker
347        // who later examines the freed region read the secret.
348        // Volatile write + compiler fence keeps the clear from being
349        // optimized away.
350        // SAFETY: `&mut self.mac_key` / `self.rng` are valid pointers
351        // to our own fields, and `&mut self` gives exclusive access.
352        unsafe {
353            core::ptr::write_volatile(&mut self.mac_key, 0);
354            // `Cell::as_ptr` returns *mut T into the cell's storage;
355            // safe to write through it via volatile because we have
356            // exclusive access via &mut self.
357            core::ptr::write_volatile(self.rng.as_ptr(), 0);
358        }
359        core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
360    }
361}
362
363/// Returns the inner layout to request for a user-facing layout, or
364/// `None` if jitter cannot be applied (caller forwards untouched).
365///
366/// `None` means the user's `align > cache_line_size` — jitter granularity
367/// is one cache line, so we can't preserve the larger alignment.
368fn inner_layout_for(
369    layout: NonZeroLayout,
370    cache_line_size: usize,
371    jitter_prefix: usize,
372) -> Option<Result<NonZeroLayout, AllocError>> {
373    if layout.align().get() > cache_line_size {
374        return None;
375    }
376    let total = match layout.size().get().checked_add(jitter_prefix) {
377        Some(t) => t,
378        None => return Some(Err(AllocError)),
379    };
380    // Inner alignment must be at least `cache_line_size` so that
381    // `inner_ptr + cache_line_size + k*cache_line_size` preserves the
382    // caller's requested alignment (which is `<= cache_line_size`).
383    let inner_align = cache_line_size;
384    Some(NonZeroLayout::from_size_align(total, inner_align).map_err(|_| AllocError))
385}
386
387unsafe impl<I: Allocator> Deallocator for CacheJitter<I> {
388    #[inline]
389    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: NonZeroLayout) {
390        // If jitter wasn't applied (oversized align), the request was
391        // forwarded straight through. The same condition holds on
392        // dealloc, so forward unchanged.
393        let Some(inner_layout) =
394            inner_layout_for(layout, self.cache_line_size, self.jitter_prefix())
395        else {
396            // SAFETY: forwarded; caller upholds Deallocator contract.
397            unsafe { self.inner.deallocate(ptr, layout) };
398            return;
399        };
400        let inner_layout = match inner_layout {
401            Ok(l) => l,
402            // `inner_layout_for` is a pure function of `layout`,
403            // `cache_line_size`, and `jitter_prefix`. The latter two are
404            // immutable post-construction; the former is supplied by the
405            // caller. If the original `allocate(layout)` succeeded, this
406            // arm cannot be reached for the SAME layout — reaching it
407            // means the caller passed a different layout to deallocate
408            // than to allocate, which is itself a Deallocator-contract
409            // violation. Match `Canary::deallocate`'s policy: panic
410            // rather than forward `(user_ptr, user_layout)` to inner
411            // (which would be a wrong-ptr / wrong-layout free).
412            Err(_) => panic!(
413                "CacheJitter::deallocate: inner_layout_for(layout) failed for a \
414                 layout that succeeded at allocate-time — caller passed a \
415                 different layout than the one used to allocate"
416            ),
417        };
418        // Read the stored 8-byte header from immediately before user_ptr.
419        // Verify the MAC before trusting the embedded displacement —
420        // without this, an attacker who can write the prefix (linear
421        // underflow from adjacent alloc, or UAF prefix write) gets an
422        // arbitrary-free primitive against inner.
423        // SAFETY: allocate placed this header in the prefix bytes we
424        // own; caller's contract gives us a ptr we previously issued.
425        let header = unsafe {
426            core::ptr::read_unaligned(ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>())
427        };
428        let displacement = match self.unpack_header(ptr.as_ptr() as usize, header) {
429            Ok(d) => d,
430            Err(()) => {
431                // Header MAC failure or out-of-range displacement —
432                // memory corruption detected. We cannot safely recover
433                // inner_ptr (that's what the header told us, and the
434                // header is the value under attack), and we cannot
435                // forward the user_ptr (inner doesn't own it). The
436                // standard hardened-allocator response to detected
437                // corruption is to abort with a diagnostic; matches
438                // Canary's policy and the Quarantine corruption response.
439                //
440                // Diagnostic strategy: the observed header is logged so
441                // crash-reporter / core-dump scrape can correlate the
442                // corruption with the surrounding allocation context.
443                // The MAC key itself is NEVER printed (would let an
444                // attacker forge headers elsewhere — same threat model
445                // as the canary-seed-redaction rationale).
446                #[cfg(debug_assertions)]
447                panic!(
448                    "CacheJitter::deallocate: prefix header MAC failed at ptr {:p} \
449                     (observed header: {:#018x}) — heap corruption \
450                     (linear underflow into prefix, or UAF prefix write)",
451                    ptr.as_ptr(),
452                    header,
453                );
454                #[cfg(not(debug_assertions))]
455                panic!(
456                    "CacheJitter::deallocate: prefix header MAC failed — \
457                     heap corruption (linear underflow into prefix, or UAF prefix write)"
458                );
459            }
460        };
461        // Recover inner pointer: walk back past the displacement and the
462        // cache-line prefix.
463        // SAFETY: user_ptr - (cache_line_size + displacement) lies at
464        // the start of the inner allocation we received.
465        let inner_ptr = unsafe { ptr.as_ptr().sub(self.cache_line_size + displacement) };
466        // SAFETY: inner_ptr came from inner.allocate(inner_layout) at
467        // construction of this allocation.
468        unsafe {
469            self.inner
470                .deallocate(NonNull::new_unchecked(inner_ptr), inner_layout)
471        }
472    }
473}
474
475unsafe impl<I: Allocator> Allocator for CacheJitter<I> {
476    #[inline]
477    fn allocate(&self, layout: NonZeroLayout) -> Result<NonNull<[u8]>, AllocError> {
478        let prefix = self.jitter_prefix();
479        let Some(inner_layout) = inner_layout_for(layout, self.cache_line_size, prefix) else {
480            // align too large for jitter — pass through unchanged.
481            return self.inner.allocate(layout);
482        };
483        let inner_layout = inner_layout?;
484        let block = self.inner.allocate(inner_layout)?;
485        let inner_ptr = block.cast::<u8>().as_ptr();
486        let displacement = self.next_displacement();
487        // user_ptr = inner_ptr + cache_line_size + displacement.
488        // SAFETY: inner_layout reserves prefix + layout.size() bytes
489        // starting at inner_ptr; cache_line_size + displacement <= prefix
490        // by construction (displacement < associativity*cache_line_size,
491        // prefix = cache_line_size + (assoc-1)*cache_line_size).
492        let user_ptr = unsafe { inner_ptr.add(self.cache_line_size + displacement) };
493        // Store the MAC-protected header at user_ptr - JITTER_HEADER_SIZE
494        // so the deallocator can recover inner_ptr after verifying that
495        // the header has not been tampered with.
496        // SAFETY: user_ptr - JITTER_HEADER_SIZE = inner_ptr + cache_line_size
497        // + displacement - JITTER_HEADER_SIZE. With cache_line_size >= 8 =
498        // JITTER_HEADER_SIZE and displacement >= 0, this is >= inner_ptr.
499        // And user_ptr - JITTER_HEADER_SIZE < user_ptr <= inner_ptr +
500        // jitter_prefix <= inner_ptr + inner_layout.size(), so the full
501        // 8-byte write stays inside the inner allocation we own.
502        let header = self.pack_header(user_ptr as usize, displacement);
503        unsafe {
504            core::ptr::write_unaligned(user_ptr.sub(JITTER_HEADER_SIZE).cast::<u64>(), header);
505        }
506        // SAFETY: user_ptr derives from a valid &self; non-null.
507        Ok(NonNull::slice_from_raw_parts(
508            unsafe { NonNull::new_unchecked(user_ptr) },
509            layout.size().get(),
510        ))
511    }
512
513    #[inline]
514    fn capacity_bytes(&self) -> Option<usize> {
515        // Approximate: the inner capacity is consumed faster by our
516        // prefix overhead. Report inner capacity as an over-approximation
517        // so Watermark callers treat it as best-effort.
518        self.inner.capacity_bytes()
519    }
520
521    #[inline]
522    fn corruption_events(&self) -> u64 {
523        // CacheJitter's MAC verify failure path panics on detection
524        // (the header is the only state under attack; a wrong MAC means
525        // either a linear underflow into the prefix or a UAF prefix
526        // write — both unrecoverable). Same rationale as Canary:
527        // forward to inner so silent-disarm counts from underneath
528        // still surface.
529        self.inner.corruption_events()
530    }
531}
532
533/// `FixedRange` passthrough so this wrapper composes over a `lazy_commit`
534/// `MmapBacked` and similar backings.
535///
536/// **Footgun:** the displacement header is written and MAC-verified only in
537/// this wrapper's `allocate`/`deallocate`. If you nest it *as a backing under*
538/// an arena — `BumpArena<CacheJitter<..>>` — the arena carves directly from
539/// `base()`/`size()` and never calls `CacheJitter::allocate`/`deallocate`, so
540/// **no displacement is applied and no header is ever checked** while the type
541/// name still advertises the jitter. Keep the hardening wrapper **outermost**
542/// (wrapping the allocator), never as the `FixedRange` an arena consumes.
543impl<I: FixedRange> FixedRange for CacheJitter<I> {
544    #[inline]
545    fn base(&self) -> NonNull<u8> {
546        self.inner.base()
547    }
548
549    #[inline]
550    fn size(&self) -> usize {
551        self.inner.size()
552    }
553
554    /// Pass-through forward so a `commit`-aware consumer reaches the inner
555    /// backing when this wrapper sits over a `lazy_commit` `MmapBacked`.
556    #[inline]
557    fn commit(&self, offset: usize, len: usize) -> Result<(), AllocError> {
558        self.inner.commit(offset, len)
559    }
560}
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565    use crate::backing::InlineBacked;
566    use crate::layout::BumpArena;
567
568    #[cfg(feature = "std")]
569    use crate::backing::MmapBacked;
570
571    /// CacheJitter over MmapBacked — supports up to page alignment, so
572    /// jitter applies for all alignments up to cache_line_size (64).
573    #[cfg(feature = "std")]
574    fn build_mmap() -> CacheJitter<BumpArena<MmapBacked>> {
575        CacheJitter::with_params(
576            BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
577            64,
578            8,
579            0x1234_5678_9ABC_DEF0,
580        )
581        .expect("valid params")
582    }
583
584    #[test]
585    fn rejects_non_power_of_two_line() {
586        let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
587        assert!(CacheJitter::with_params(inner, 24, 8, 1).is_none());
588    }
589
590    #[test]
591    fn rejects_zero_associativity() {
592        let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
593        assert!(CacheJitter::with_params(inner, 64, 0, 1).is_none());
594    }
595
596    #[test]
597    fn rejects_too_small_line() {
598        let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
599        // Line of 4 can't hold the 8-byte displacement header.
600        assert!(CacheJitter::with_params(inner, 4, 8, 1).is_none());
601    }
602
603    #[cfg(feature = "std")]
604    #[test]
605    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
606    fn alloc_then_dealloc_round_trips() {
607        let cj = build_mmap();
608        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
609        let block = cj.allocate(layout).unwrap();
610        let ptr = block.cast::<u8>();
611        unsafe {
612            core::ptr::write_bytes(ptr.as_ptr(), 0x42, 32);
613            cj.deallocate(ptr, layout);
614        }
615    }
616
617    #[cfg(feature = "std")]
618    #[test]
619    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
620    fn user_ptr_aligned_for_layout() {
621        let cj = build_mmap();
622        let layout = NonZeroLayout::from_size_align(16, 16).unwrap();
623        for _ in 0..32 {
624            let block = cj.allocate(layout).unwrap();
625            let addr = block.cast::<u8>().as_ptr() as usize;
626            assert_eq!(addr % 16, 0, "user ptr must respect requested align");
627        }
628    }
629
630    #[cfg(feature = "std")]
631    #[test]
632    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
633    fn displacement_distribution_hits_multiple_sets() {
634        // With 8-way associativity and a fixed seed, repeated allocations
635        // should be assigned multiple distinct displacements. We read the
636        // *displacement itself* back out of each allocation's header rather
637        // than bucketing the user pointer: the inner BumpArena's cursor
638        // advances every allocation, so `user_ptr % 512` would vary even if
639        // the displacement were stuck at 0 — making a pointer-based test pass
640        // vacuously. Unpacking the header isolates the jitter contribution.
641        let cj = build_mmap();
642        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
643        let mut displacements = alloc::collections::BTreeSet::new();
644        for _ in 0..64 {
645            let block = cj.allocate(layout).unwrap();
646            let user_ptr = block.cast::<u8>().as_ptr();
647            // SAFETY: a jittered allocation stores its packed header in the
648            // 8 bytes at `user_ptr - JITTER_HEADER_SIZE`.
649            let disp = unsafe {
650                let header =
651                    core::ptr::read_unaligned(user_ptr.sub(JITTER_HEADER_SIZE).cast::<u64>());
652                cj.unpack_header(user_ptr as usize, header)
653                    .expect("freshly written header must verify")
654            };
655            displacements.insert(disp);
656        }
657        assert!(
658            displacements.len() >= 4,
659            "expected diverse displacements (jitter stuck?), got {}",
660            displacements.len(),
661        );
662    }
663
664    #[cfg(feature = "std")]
665    #[test]
666    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
667    fn oversized_align_passes_through_without_jitter() {
668        // align (128) > cache_line_size (64): jitter granularity is one cache
669        // line, so the wrapper cannot preserve the larger alignment and MUST
670        // forward untouched — no prefix, no displacement header. Use
671        // MmapBacked (page-aligned base, satisfies align=128) so the success
672        // path is deterministic instead of error-path-vacuous.
673        let cj = build_mmap();
674        let layout = NonZeroLayout::from_size_align(8, 128).unwrap();
675        let base = cj.inner().base().as_ptr() as usize;
676        // NOTE: this must be the FIRST allocation against the fresh arena — the
677        // `< 64` discriminator below assumes the bump cursor is at 0 so a
678        // pass-through alloc lands at `base + 0`. Do not add a warm-up alloc
679        // before this point, or the offset will exceed one cache line and the
680        // assertion will false-fail.
681        let block = cj.allocate(layout).expect("mmap base satisfies align=128");
682        let user_ptr = block.cast::<u8>().as_ptr();
683        let addr = user_ptr as usize;
684        assert_eq!(addr % 128, 0, "pass-through must honor the requested align");
685        // A *jittered* allocation always prepends at least one cache line
686        // (header + displacement >= cache_line_size = 64). Pass-through adds
687        // none, so the user pointer sits within alignment padding of the base.
688        // This would fail if the wrapper wrongly inflated/displaced the request.
689        assert!(
690            addr - base < 64,
691            "pass-through must not add a jitter prefix (offset {} >= one cache line)",
692            addr - base,
693        );
694        unsafe {
695            core::ptr::write_bytes(user_ptr, 0xAA, 8);
696            cj.deallocate(block.cast(), layout);
697        }
698    }
699
700    /// The resize path is inherited from the `Allocator` trait default
701    /// (allocate-copy-free), which routes through `CacheJitter::allocate`
702    /// (fresh displacement header on the new block) and `deallocate` (MAC-
703    /// verifies the old block). Guards that grow/shrink preserve user bytes
704    /// and keep the header MAC valid end-to-end — previously untested.
705    #[cfg(feature = "std")]
706    #[test]
707    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
708    fn grow_then_shrink_preserves_data_and_header() {
709        let cj = build_mmap();
710        let old = NonZeroLayout::from_size_align(16, 8).unwrap();
711        let mid = NonZeroLayout::from_size_align(64, 8).unwrap();
712        let new = NonZeroLayout::from_size_align(24, 8).unwrap();
713        unsafe {
714            let block = cj.allocate(old).unwrap();
715            let ptr = block.cast::<u8>();
716            core::ptr::write_bytes(ptr.as_ptr(), 0x77, 16);
717
718            let grown = cj.grow(ptr, old, mid).unwrap();
719            let gptr = grown.cast::<u8>();
720            for i in 0..16 {
721                assert_eq!(*gptr.as_ptr().add(i), 0x77, "grow lost byte {i}");
722            }
723
724            let shrunk = cj.shrink(gptr, mid, new).unwrap();
725            let sptr = shrunk.cast::<u8>();
726            for i in 0..16 {
727                assert_eq!(*sptr.as_ptr().add(i), 0x77, "shrink lost byte {i}");
728            }
729            // Deallocate MAC-verifies the final block's header — panics if
730            // grow/shrink failed to re-establish it.
731            cj.deallocate(sptr, new);
732        }
733    }
734
735    #[cfg(feature = "std")]
736    #[test]
737    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
738    fn deterministic_with_same_seed() {
739        // Two independent CacheJitter instances with the same seed must
740        // produce the same sequence of displacements. Verify by checking
741        // the offset-from-inner-base of the first allocation in each.
742        let cj_a = CacheJitter::with_params(
743            BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
744            64,
745            8,
746            0xDEAD_BEEF_CAFE_BABE,
747        )
748        .unwrap();
749        let cj_b = CacheJitter::with_params(
750            BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
751            64,
752            8,
753            0xDEAD_BEEF_CAFE_BABE,
754        )
755        .unwrap();
756        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
757        let a = cj_a.allocate(layout).unwrap().cast::<u8>().as_ptr() as usize;
758        let b = cj_b.allocate(layout).unwrap().cast::<u8>().as_ptr() as usize;
759        let base_a = cj_a.inner().base().as_ptr() as usize;
760        let base_b = cj_b.inner().base().as_ptr() as usize;
761        assert_eq!(
762            a - base_a,
763            b - base_b,
764            "same seed must give same displacement"
765        );
766    }
767
768    #[cfg(feature = "std")]
769    #[test]
770    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
771    fn os_seeded_constructor() {
772        let inner = BumpArena::new(MmapBacked::new(16 * 1024).unwrap()).unwrap();
773        let cj = CacheJitter::new(inner, 64, 8).expect("valid params");
774        let layout = NonZeroLayout::from_size_align(16, 8).unwrap();
775        let _ = cj.allocate(layout).unwrap();
776    }
777
778    /// Regression: a corrupted header (linear-underflow / UAF-prefix
779    /// write into `user_ptr - 8`) must trip the MAC check in
780    /// `deallocate` rather than steering `inner.deallocate` at the
781    /// attacker-chosen address (arbitrary-free primitive).
782    #[cfg(feature = "std")]
783    #[test]
784    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
785    #[should_panic(expected = "prefix header MAC failed")]
786    fn corrupted_header_panics_on_dealloc() {
787        let cj = build_mmap();
788        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
789        let block = cj.allocate(layout).unwrap();
790        let user_ptr = block.cast::<u8>();
791        // SAFETY: overwrite the 8-byte prefix that holds the packed
792        // (displacement | MAC) — mirrors what a linear-underflow write
793        // from an adjacent allocation would do.
794        unsafe {
795            core::ptr::write_unaligned(
796                user_ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>(),
797                0xDEAD_BEEF_CAFE_BABEu64, // arbitrary attacker-chosen value
798            );
799            // Must panic with the documented MAC-failure message.
800            cj.deallocate(user_ptr, layout);
801        }
802    }
803
804    /// Regression: rejecting the all-zero header would let an attacker
805    /// who can only zero the prefix region (memset(prefix, 0, ...))
806    /// keep a forged displacement of 0 surviving the check. Verify
807    /// that header = 0 fails the MAC (since 0 is not a legitimate MAC
808    /// output for any displacement, given the per-instance secret).
809    #[cfg(feature = "std")]
810    #[test]
811    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
812    #[should_panic(expected = "prefix header MAC failed")]
813    fn zeroed_header_panics_on_dealloc() {
814        let cj = build_mmap();
815        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
816        let block = cj.allocate(layout).unwrap();
817        let user_ptr = block.cast::<u8>();
818        unsafe {
819            core::ptr::write_unaligned(
820                user_ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>(),
821                0u64,
822            );
823            cj.deallocate(user_ptr, layout);
824        }
825    }
826
827    /// Boundary: maximum associativity admitted by the 16-bit displacement
828    /// field is `MAX_ASSOCIATIVITY = (1<<16) - 1 = 65535`. Construction must
829    /// succeed at exactly that value and reject one past.
830    #[cfg(feature = "std")]
831    #[test]
832    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
833    fn associativity_at_and_past_encoding_limit() {
834        let inner = || BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap();
835        // At the limit — admitted.
836        assert!(
837            CacheJitter::with_params(inner(), 64, MAX_ASSOCIATIVITY, 1).is_some(),
838            "MAX_ASSOCIATIVITY = {MAX_ASSOCIATIVITY} must be admitted",
839        );
840        // One past — rejected.
841        assert!(
842            CacheJitter::with_params(inner(), 64, MAX_ASSOCIATIVITY + 1, 1).is_none(),
843            "MAX_ASSOCIATIVITY + 1 must be rejected",
844        );
845    }
846
847    /// Boundary: `cache_line_size * associativity` must not overflow at
848    /// construction. A 2^31 cache_line_size with associativity 2 overflows
849    /// on 32-bit; on 64-bit it doesn't, but the construction-time
850    /// `checked_mul` guard is the gate either way. Try a value that's
851    /// guaranteed to overflow (which forces the guard to fire on every
852    /// target).
853    #[test]
854    fn rejects_cache_line_assoc_overflow() {
855        let inner = || BumpArena::new(InlineBacked::<256>::new()).unwrap();
856        // Pick `cache_line_size` so that `line * assoc` overflows usize on
857        // every target. `1 << (USIZE_BITS - 1)` * 4 overflows.
858        let line = 1usize << (usize::BITS - 1);
859        // Verify line is a power of two and large enough; if not (32-bit
860        // builds), substitute a smaller pow2 that still overflows the
861        // checked_mul.
862        if line.is_power_of_two() && line >= 8 {
863            assert!(
864                CacheJitter::with_params(inner(), line, 4, 1).is_none(),
865                "line * assoc overflow must be rejected",
866            );
867        }
868    }
869
870    /// `cache_line_size = 8` (the minimum admissible) combined with
871    /// `associativity = MAX_ASSOCIATIVITY` is the largest jitter window
872    /// the encoding allows. Construction must succeed and a first
873    /// allocate must round-trip (i.e. the prefix size of
874    /// `cache_line_size + (assoc - 1)*cache_line_size = 8 * 65535`
875    /// fits the backing budget).
876    #[cfg(feature = "std")]
877    #[test]
878    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
879    fn smallest_line_largest_assoc_round_trips() {
880        let inner = BumpArena::new(MmapBacked::new(8 * (MAX_ASSOCIATIVITY + 16)).unwrap()).unwrap();
881        let cj = CacheJitter::with_params(inner, 8, MAX_ASSOCIATIVITY, 1).unwrap();
882        let layout = NonZeroLayout::from_size_align(8, 8).unwrap();
883        let block = cj.allocate(layout).unwrap();
884        let ptr = block.cast::<u8>();
885        unsafe { cj.deallocate(ptr, layout) };
886    }
887
888    /// `cache_line_size = 0` is rejected (not power of two, also `< 8`).
889    #[test]
890    fn rejects_zero_cache_line() {
891        let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
892        assert!(CacheJitter::with_params(inner, 0, 8, 1).is_none());
893    }
894
895    /// `seed = 0` is substituted with the golden ratio constant. Verify
896    /// the substituted seed produces a working RNG (the wrapper is
897    /// usable and produces non-zero displacements at least once across
898    /// many allocations) and is distinguishable from the all-zero state
899    /// xorshift64 would otherwise be stuck in.
900    #[cfg(feature = "std")]
901    #[test]
902    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
903    fn zero_seed_substitution_yields_working_rng() {
904        let inner = BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap();
905        let cj = CacheJitter::with_params(inner, 64, 8, 0).expect("zero seed must work");
906        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
907        let base = cj.inner().base().as_ptr() as usize;
908        // Track the displacement across many allocations — if the RNG
909        // had been stuck at zero (the unmitigated xorshift64 zero
910        // state), every offset would be cache_line_size with no
911        // variation across the 8-way window. With the golden-ratio
912        // substitution we expect to see multiple distinct sets.
913        let mut sets = alloc::collections::BTreeSet::new();
914        for _ in 0..32 {
915            let p = cj.allocate(layout).unwrap().cast::<u8>().as_ptr() as usize;
916            sets.insert((p - base) % (8 * 64));
917        }
918        assert!(
919            sets.len() >= 2,
920            "zero-seed substitution must produce a non-stuck RNG; only saw {} \
921             distinct cache-set offsets",
922            sets.len(),
923        );
924    }
925
926    /// With `associativity = 1`, `disp_lines` is always 0 and the MAC
927    /// is over `(user_ptr_addr, 0)` with a per-instance key. The MAC
928    /// over the zero displacement must still differ from an attacker
929    /// who simply zeros the whole 8-byte header (which has both
930    /// disp_lines = 0 AND mac = 0). Verifies the MAC contributes
931    /// non-zero high bits for `disp = 0`.
932    #[cfg(feature = "std")]
933    #[test]
934    #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
935    fn assoc_one_disp_zero_mac_differs_from_zero_header() {
936        let cj = CacheJitter::with_params(
937            BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
938            64,
939            1,
940            0xDEAD_BEEF_CAFE_BABE,
941        )
942        .unwrap();
943        let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
944        let block = cj.allocate(layout).unwrap();
945        let user_ptr = block.cast::<u8>();
946        // Read the header that allocate just wrote.
947        let stored = unsafe {
948            core::ptr::read_unaligned(user_ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>())
949        };
950        assert_ne!(
951            stored, 0,
952            "MAC over (user_ptr, disp=0) must produce non-zero high bits — \
953             otherwise a zeroed-prefix forge would survive the check"
954        );
955        unsafe { cj.deallocate(user_ptr, layout) };
956    }
957}
forge_alloc/hardening/cache_jitter.rs

forge_alloc/hardening/
cache_jitter.rs