forge_alloc/hardening/cache_jitter.rs
1//! `CacheJitter<I>` — randomized per-allocation displacement to spread
2//! metadata across cache associativity sets.
3//!
4//! Allocators that always return pointers at fixed alignments concentrate
5//! metadata into the same cache-set indices. On an 8-way L1 with 64-byte
6//! lines, every page-aligned pointer hashes to the same set, so an attacker
7//! probing a free list can deterministically evict victim data with O(8)
8//! probes. `CacheJitter` shifts each allocation by a multiple of one cache
9//! line within the associativity window — `(rng() % assoc) * line_size`
10//! bytes — so different allocations land in different sets.
11//!
12//! Overhead: one xorshift64 per `allocate`, plus `line_size + max_disp`
13//! bytes of prefix per allocation (used for displacement-header storage so
14//! `deallocate` can recover the inner pointer).
15//!
16//! See `docs/ARCHITECTURE.md` for the composable-wrapper design.
17
18use core::cell::Cell;
19use core::ptr::NonNull;
20
21use forge_alloc_core::{AllocError, Allocator, Deallocator, FixedRange, NonZeroLayout};
22
23/// CacheJitter wrapper.
24///
25/// `cache_line_size` and `associativity` are fixed at construction. The
26/// xorshift64 state lives in an interior-mutable `Cell` so `allocate` can
27/// take `&self`; this also makes the type `!Sync`.
28///
29/// # Cross-thread use
30///
31/// `CacheJitter<I>` is **not** thread-safe by itself — the `Cell<u64>`
32/// rng state and the `Cell` on the per-instance MAC verification path
33/// both prohibit `&CacheJitter` from being shared across threads.
34/// Wrapping the *inner* allocator with `Statistics` or similar does
35/// **not** help; the cell is inside `CacheJitter` and is what blocks
36/// `Sync`.
37///
38/// For cross-thread use, pick one of:
39/// - **Per-thread instance** — give each thread its own
40/// `CacheJitter<I>`. Each instance has its own rng + MAC key, which
41/// actually improves the wrapper's threat-model (the MAC key is
42/// thread-private).
43/// - **External `Mutex<CacheJitter<I>>`** — serializes all access
44/// through the lock. Use only if a single shared instance is
45/// architecturally required; per-thread is faster.
46///
47/// # Randomness model
48///
49/// CacheJitter uses **xorshift64**, a fast non-cryptographic PRNG. The
50/// goal of the wrapper is to *diversify cache-set occupancy* across
51/// allocations so an attacker who controls allocation timing can't
52/// deterministically evict a victim line. It is **not** designed to
53/// resist an attacker who can observe several user pointers and solve
54/// for the RNG state — xorshift64 is fully invertible from ~3
55/// consecutive 64-bit outputs.
56///
57/// If your threat model includes that adversary, swap the RNG for a
58/// CSPRNG (e.g. ChaCha20) at the cost of ~10× per-allocation overhead.
59/// For the typical anti-spray use case, xorshift64 is appropriate.
60///
61/// # Composition
62///
63/// Layout requests with `align > cache_line_size` are forwarded to the
64/// inner allocator *without* jitter — the jitter granularity is one cache
65/// line, which can't preserve a larger alignment. The vast majority of
66/// requests have `align <= 16`, so jitter applies in the common case.
67///
68/// # Inner-allocator alignment requirement
69///
70/// **The inner allocator MUST be able to satisfy alignment requests up
71/// to `cache_line_size` (64 bytes on x86/ARM, 128 on Apple Silicon).**
72/// For jittered requests, this wrapper inflates the inner's alignment
73/// requirement up to `cache_line_size` so the user pointer (placed at
74/// `inner_ptr + cache_line_size + displacement`) inherits the caller's
75/// requested alignment. Backings that cap alignment below
76/// `cache_line_size` — notably [`InlineBacked`](crate::backing::InlineBacked),
77/// whose `MAX_ALIGN` is 16 — will reject the inflated request and the
78/// wrapped allocation will fail.
79///
80/// Practical implication: `CacheJitter<MmapBacked>` and
81/// `CacheJitter<BumpArena<MmapBacked>>` work; `CacheJitter<BumpArena<
82/// InlineBacked<N>>>` compiles but cannot actually allocate jittered
83/// blocks. The pattern is mainly useful for production heaps over the
84/// OS allocator, not for stack-buffer arenas.
85pub struct CacheJitter<I> {
86 inner: I,
87 cache_line_size: usize,
88 associativity: usize,
89 /// `trailing_zeros(cache_line_size)`. Cached to encode the displacement
90 /// in cache-line units (compact, fits the 16-bit header field).
91 line_shift: u32,
92 /// Per-instance secret used to MAC the on-disk displacement header.
93 /// Initialized once from the caller-supplied seed (or OS entropy on
94 /// `new`) and never exposed. Compromising this defeats the
95 /// header-integrity check; protecting it is therefore as important as
96 /// the SipHashMAC key in `Slab`.
97 mac_key: u64,
98 rng: Cell<u64>,
99}
100
101/// Header size in bytes prefixing each jittered allocation. We pack the
102/// applied displacement (low 16 bits, as a multiple of `cache_line_size`)
103/// and a 48-bit keyed MAC (high 48 bits) into the same 8-byte slot. The
104/// MAC is computed over `(user_ptr_addr, displacement_in_lines)` with a
105/// per-instance key so an attacker who controls only the prefix bytes
106/// (linear underflow from an adjacent allocation, or UAF write into a
107/// freed slot's prefix) cannot forge a header that survives
108/// `deallocate`'s verification — see `CacheJitter::unpack_header`.
109const JITTER_HEADER_SIZE: usize = 8;
110/// Width of the displacement field in the packed header. 16 bits stores a
111/// displacement of up to `(2^16 - 1) * cache_line_size` bytes — 4 MiB for
112/// 64-byte lines, 8 MiB for 128-byte lines — far above any realistic
113/// associativity window. Construction rejects configurations that would
114/// overflow.
115const HEADER_DISP_BITS: u32 = 16;
116const HEADER_DISP_MASK: u64 = (1u64 << HEADER_DISP_BITS) - 1;
117
118/// Maximum associativity the encoding admits. With 16-bit
119/// displacement-in-lines, displacement-in-lines ranges over
120/// `0..associativity`, so `associativity` must fit in 16 bits.
121const MAX_ASSOCIATIVITY: usize = (1usize << HEADER_DISP_BITS) - 1;
122
123impl<I> CacheJitter<I> {
124 /// Construct with explicit cache parameters and a caller-supplied seed.
125 /// Required for `no_std` builds.
126 ///
127 /// `cache_line_size` must be a power of two and `>= 8` so a `u64`
128 /// header fits within one line. `associativity` must be `>= 1`.
129 /// Returns `None` if either constraint is violated, or if
130 /// `cache_line_size * associativity` would overflow.
131 pub fn with_params(
132 inner: I,
133 cache_line_size: usize,
134 associativity: usize,
135 seed: u64,
136 ) -> Option<Self> {
137 if !cache_line_size.is_power_of_two() || cache_line_size < 8 {
138 return None;
139 }
140 if associativity == 0 || associativity > MAX_ASSOCIATIVITY {
141 return None;
142 }
143 cache_line_size.checked_mul(associativity)?;
144 // Avoid trivially-zero seed — xorshift64 would output zero
145 // forever. The first transformation in next_rng() would then
146 // produce d=0 every call, which defeats the wrapper. Substitute
147 // a fixed nonzero seed if the caller passes 0.
148 let seed = if seed == 0 {
149 0x9E37_79B9_7F4A_7C15
150 } else {
151 seed
152 };
153 // Derive a MAC key distinct from the RNG state so that observing
154 // displacements doesn't directly leak the MAC key. Two independent
155 // splitmix64 steps from the seed give us a key that's
156 // statistically uncorrelated with the RNG sequence the caller
157 // might observe.
158 let mac_key = {
159 let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
160 x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
161 x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
162 x ^ (x >> 31)
163 };
164 Some(Self {
165 inner,
166 cache_line_size,
167 associativity,
168 line_shift: cache_line_size.trailing_zeros(),
169 mac_key,
170 rng: Cell::new(seed),
171 })
172 }
173
174 /// Construct with OS-derived entropy. Available on `std` builds.
175 ///
176 /// Uses the same entropy strategy as `Canary::new` — a single
177 /// `RandomState`-derived 64-bit value seeded by the OS RNG.
178 #[cfg(feature = "std")]
179 pub fn new(inner: I, cache_line_size: usize, associativity: usize) -> Option<Self> {
180 use std::collections::hash_map::RandomState;
181 use std::hash::BuildHasher;
182 let seed = RandomState::new().hash_one(0u64);
183 Self::with_params(inner, cache_line_size, associativity, seed)
184 }
185
186 /// Borrow the inner allocator.
187 #[inline]
188 pub fn inner(&self) -> &I {
189 &self.inner
190 }
191
192 /// Cache-line size in bytes (e.g. 64 on x86/ARM, 128 on Apple
193 /// Silicon).
194 #[inline]
195 pub fn cache_line_size(&self) -> usize {
196 self.cache_line_size
197 }
198
199 /// Associativity window in cache lines — the jitter range.
200 #[inline]
201 pub fn associativity(&self) -> usize {
202 self.associativity
203 }
204
205 /// Maximum displacement applied by this wrapper, in bytes.
206 /// Equals `(associativity - 1) * cache_line_size`.
207 #[inline]
208 fn max_displacement(&self) -> usize {
209 (self.associativity - 1) * self.cache_line_size
210 }
211
212 /// Total prefix added to each jittered allocation: one cache line for
213 /// the displacement header + room for the maximum displacement.
214 /// `user_ptr = inner_ptr + cache_line_size + actual_displacement`.
215 #[inline]
216 fn jitter_prefix(&self) -> usize {
217 self.cache_line_size + self.max_displacement()
218 }
219
220 /// Step the xorshift64 generator and return its next output.
221 #[inline]
222 fn next_rng(&self) -> u64 {
223 let mut x = self.rng.get();
224 x ^= x << 13;
225 x ^= x >> 7;
226 x ^= x << 17;
227 self.rng.set(x);
228 x
229 }
230
231 /// Compute the displacement for the next allocation: a non-negative
232 /// multiple of `cache_line_size` in `[0, associativity * cache_line_size)`,
233 /// i.e. `0, cache_line_size, …, (associativity - 1) * cache_line_size`.
234 ///
235 /// Uses the unbiased "multiply-shift" reduction
236 /// `(rng() * assoc) >> 64` rather than `rng() % assoc` — the modulo
237 /// form has a small bias when `assoc` is not a power of two (e.g. the
238 /// 12-way L2 on certain Intel parts, 6-way on some older AMD), which
239 /// would weaken the cache-set spreading the wrapper relies on. The
240 /// multiply-shift form is unbiased and cheaper on x86_64 (single
241 /// `mul` instead of `div`).
242 #[inline]
243 fn next_displacement(&self) -> usize {
244 let assoc = self.associativity as u128;
245 let r = ((self.next_rng() as u128 * assoc) >> 64) as usize;
246 r * self.cache_line_size
247 }
248
249 /// Compute a 64-bit mixed value over `(user_ptr_addr, disp_lines)`
250 /// keyed by `self.mac_key`. The high 48 bits are used as the MAC.
251 ///
252 /// This is a SplitMix-style avalanche — not a cryptographic MAC, but
253 /// the construction is non-linear and key-dependent. An attacker who
254 /// can only blind-write the 8-byte header (linear underflow from an
255 /// adjacent allocation, UAF write into a freed slot's prefix) faces a
256 /// 2^-48 forgery probability per attempt. The MAC binds the
257 /// displacement to `user_ptr_addr` so a header copied from one
258 /// allocation cannot be replayed against a different one.
259 ///
260 /// Honest threat-model caveats:
261 ///
262 /// - **Direct key disclosure.** An attacker who can read the
263 /// `mac_key` field itself (arbitrary heap or stack read primitive
264 /// that reaches inside the `CacheJitter` struct) can forge
265 /// arbitrary headers. No keyed-MAC construction — cryptographic
266 /// or otherwise — survives direct key disclosure.
267 /// - **Observed-pair key recovery.** An attacker who can read many
268 /// `(user_ptr_addr, header)` pairs from the live process *but
269 /// cannot read `mac_key` directly* faces a weaker barrier with
270 /// our SplitMix-style mixer than they would with a cryptographic
271 /// MAC like SipHash: the mixer is a small algebraic circuit and
272 /// an offline SAT / symbolic-execution attack on a few thousand
273 /// observations is plausible for a well-resourced adversary. A
274 /// cryptographic MAC remains key-recovery-hard under the same
275 /// read access. If your threat model includes a heap-disclosure
276 /// attacker who cannot read `mac_key` directly but can observe
277 /// many pairs, swap this mixer for a SipHash MAC at the cost of
278 /// roughly 5-10x per-allocate work.
279 ///
280 /// CacheJitter is one layer in a defense-in-depth stack, not a
281 /// standalone barrier against arbitrary read+write primitives.
282 #[inline]
283 fn header_mix(&self, user_ptr_addr: usize, disp_lines: u64) -> u64 {
284 let mut x = self.mac_key ^ (user_ptr_addr as u64);
285 x = x.wrapping_mul(0x9E37_79B9_7F4A_7C15);
286 x ^= x.rotate_left(31);
287 x ^= disp_lines;
288 x = x.wrapping_mul(0xBF58_476D_1CE4_E5B9);
289 x ^= x >> 27;
290 x = x.wrapping_mul(0x94D0_49BB_1331_11EB);
291 x ^ (x >> 31)
292 }
293
294 /// Pack the displacement and its MAC into the 8-byte header value.
295 #[inline]
296 fn pack_header(&self, user_ptr_addr: usize, disp_bytes: usize) -> u64 {
297 let disp_lines = (disp_bytes >> self.line_shift) as u64;
298 debug_assert!(
299 disp_lines & !HEADER_DISP_MASK == 0,
300 "displacement-in-lines exceeds 16-bit header field — \
301 construction should have rejected this associativity"
302 );
303 let mac48 = self.header_mix(user_ptr_addr, disp_lines) >> HEADER_DISP_BITS;
304 (mac48 << HEADER_DISP_BITS) | (disp_lines & HEADER_DISP_MASK)
305 }
306
307 /// Verify the header and recover the displacement in bytes. Returns
308 /// `Err(())` if the MAC fails or the recovered displacement is out
309 /// of range — both indicate corruption.
310 #[inline]
311 fn unpack_header(&self, user_ptr_addr: usize, header: u64) -> Result<usize, ()> {
312 let disp_lines = header & HEADER_DISP_MASK;
313 let expected_mac48 = self.header_mix(user_ptr_addr, disp_lines) >> HEADER_DISP_BITS;
314 let stored_mac48 = header >> HEADER_DISP_BITS;
315 // Constant-time compare via `subtle`. For a 48-bit scalar this is
316 // essentially one CMP on modern CPUs, but documenting intent and
317 // surviving future refactor regressions matters more here than
318 // the cycle. Same rationale as `SipHashMAC::verify`.
319 use subtle::ConstantTimeEq;
320 if !bool::from(stored_mac48.ct_eq(&expected_mac48)) {
321 return Err(());
322 }
323 let disp_bytes = (disp_lines as usize) << self.line_shift;
324 // Defense-in-depth: even if the MAC verified, the recovered
325 // displacement must lie in the legitimate range. A MAC collision
326 // outside the range is rejected before we touch `inner.deallocate`.
327 // Bound against `max_displacement()` — the single source of truth for
328 // the legal max `(associativity - 1) * cache_line_size` — rather than
329 // recomputing `associativity * cache_line_size` here, so the check
330 // can't drift from the encoding if either changes.
331 if disp_bytes > self.max_displacement() {
332 return Err(());
333 }
334 Ok(disp_bytes)
335 }
336}
337
338impl<I> Drop for CacheJitter<I> {
339 fn drop(&mut self) {
340 // Zeroize the MAC key (and RNG state, which would let an attacker
341 // predict future displacements if leaked) on drop so the values
342 // don't linger in deallocated stack frames or freed allocator
343 // headers. Same rationale as `Canary::drop` — a per-process
344 // secret leaving the wrapper's storage when the wrapper drops
345 // would let an attacker forge headers on still-live wrappers
346 // that share a derivation source, and would let an attacker
347 // who later examines the freed region read the secret.
348 // Volatile write + compiler fence keeps the clear from being
349 // optimized away.
350 // SAFETY: `&mut self.mac_key` / `self.rng` are valid pointers
351 // to our own fields, and `&mut self` gives exclusive access.
352 unsafe {
353 core::ptr::write_volatile(&mut self.mac_key, 0);
354 // `Cell::as_ptr` returns *mut T into the cell's storage;
355 // safe to write through it via volatile because we have
356 // exclusive access via &mut self.
357 core::ptr::write_volatile(self.rng.as_ptr(), 0);
358 }
359 core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
360 }
361}
362
363/// Returns the inner layout to request for a user-facing layout, or
364/// `None` if jitter cannot be applied (caller forwards untouched).
365///
366/// `None` means the user's `align > cache_line_size` — jitter granularity
367/// is one cache line, so we can't preserve the larger alignment.
368fn inner_layout_for(
369 layout: NonZeroLayout,
370 cache_line_size: usize,
371 jitter_prefix: usize,
372) -> Option<Result<NonZeroLayout, AllocError>> {
373 if layout.align().get() > cache_line_size {
374 return None;
375 }
376 let total = match layout.size().get().checked_add(jitter_prefix) {
377 Some(t) => t,
378 None => return Some(Err(AllocError)),
379 };
380 // Inner alignment must be at least `cache_line_size` so that
381 // `inner_ptr + cache_line_size + k*cache_line_size` preserves the
382 // caller's requested alignment (which is `<= cache_line_size`).
383 let inner_align = cache_line_size;
384 Some(NonZeroLayout::from_size_align(total, inner_align).map_err(|_| AllocError))
385}
386
387unsafe impl<I: Allocator> Deallocator for CacheJitter<I> {
388 #[inline]
389 unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: NonZeroLayout) {
390 // If jitter wasn't applied (oversized align), the request was
391 // forwarded straight through. The same condition holds on
392 // dealloc, so forward unchanged.
393 let Some(inner_layout) =
394 inner_layout_for(layout, self.cache_line_size, self.jitter_prefix())
395 else {
396 // SAFETY: forwarded; caller upholds Deallocator contract.
397 unsafe { self.inner.deallocate(ptr, layout) };
398 return;
399 };
400 let inner_layout = match inner_layout {
401 Ok(l) => l,
402 // `inner_layout_for` is a pure function of `layout`,
403 // `cache_line_size`, and `jitter_prefix`. The latter two are
404 // immutable post-construction; the former is supplied by the
405 // caller. If the original `allocate(layout)` succeeded, this
406 // arm cannot be reached for the SAME layout — reaching it
407 // means the caller passed a different layout to deallocate
408 // than to allocate, which is itself a Deallocator-contract
409 // violation. Match `Canary::deallocate`'s policy: panic
410 // rather than forward `(user_ptr, user_layout)` to inner
411 // (which would be a wrong-ptr / wrong-layout free).
412 Err(_) => panic!(
413 "CacheJitter::deallocate: inner_layout_for(layout) failed for a \
414 layout that succeeded at allocate-time — caller passed a \
415 different layout than the one used to allocate"
416 ),
417 };
418 // Read the stored 8-byte header from immediately before user_ptr.
419 // Verify the MAC before trusting the embedded displacement —
420 // without this, an attacker who can write the prefix (linear
421 // underflow from adjacent alloc, or UAF prefix write) gets an
422 // arbitrary-free primitive against inner.
423 // SAFETY: allocate placed this header in the prefix bytes we
424 // own; caller's contract gives us a ptr we previously issued.
425 let header = unsafe {
426 core::ptr::read_unaligned(ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>())
427 };
428 let displacement = match self.unpack_header(ptr.as_ptr() as usize, header) {
429 Ok(d) => d,
430 Err(()) => {
431 // Header MAC failure or out-of-range displacement —
432 // memory corruption detected. We cannot safely recover
433 // inner_ptr (that's what the header told us, and the
434 // header is the value under attack), and we cannot
435 // forward the user_ptr (inner doesn't own it). The
436 // standard hardened-allocator response to detected
437 // corruption is to abort with a diagnostic; matches
438 // Canary's policy and the Quarantine corruption response.
439 //
440 // Diagnostic strategy: the observed header is logged so
441 // crash-reporter / core-dump scrape can correlate the
442 // corruption with the surrounding allocation context.
443 // The MAC key itself is NEVER printed (would let an
444 // attacker forge headers elsewhere — same threat model
445 // as the canary-seed-redaction rationale).
446 #[cfg(debug_assertions)]
447 panic!(
448 "CacheJitter::deallocate: prefix header MAC failed at ptr {:p} \
449 (observed header: {:#018x}) — heap corruption \
450 (linear underflow into prefix, or UAF prefix write)",
451 ptr.as_ptr(),
452 header,
453 );
454 #[cfg(not(debug_assertions))]
455 panic!(
456 "CacheJitter::deallocate: prefix header MAC failed — \
457 heap corruption (linear underflow into prefix, or UAF prefix write)"
458 );
459 }
460 };
461 // Recover inner pointer: walk back past the displacement and the
462 // cache-line prefix.
463 // SAFETY: user_ptr - (cache_line_size + displacement) lies at
464 // the start of the inner allocation we received.
465 let inner_ptr = unsafe { ptr.as_ptr().sub(self.cache_line_size + displacement) };
466 // SAFETY: inner_ptr came from inner.allocate(inner_layout) at
467 // construction of this allocation.
468 unsafe {
469 self.inner
470 .deallocate(NonNull::new_unchecked(inner_ptr), inner_layout)
471 }
472 }
473}
474
475unsafe impl<I: Allocator> Allocator for CacheJitter<I> {
476 #[inline]
477 fn allocate(&self, layout: NonZeroLayout) -> Result<NonNull<[u8]>, AllocError> {
478 let prefix = self.jitter_prefix();
479 let Some(inner_layout) = inner_layout_for(layout, self.cache_line_size, prefix) else {
480 // align too large for jitter — pass through unchanged.
481 return self.inner.allocate(layout);
482 };
483 let inner_layout = inner_layout?;
484 let block = self.inner.allocate(inner_layout)?;
485 let inner_ptr = block.cast::<u8>().as_ptr();
486 let displacement = self.next_displacement();
487 // user_ptr = inner_ptr + cache_line_size + displacement.
488 // SAFETY: inner_layout reserves prefix + layout.size() bytes
489 // starting at inner_ptr; cache_line_size + displacement <= prefix
490 // by construction (displacement < associativity*cache_line_size,
491 // prefix = cache_line_size + (assoc-1)*cache_line_size).
492 let user_ptr = unsafe { inner_ptr.add(self.cache_line_size + displacement) };
493 // Store the MAC-protected header at user_ptr - JITTER_HEADER_SIZE
494 // so the deallocator can recover inner_ptr after verifying that
495 // the header has not been tampered with.
496 // SAFETY: user_ptr - JITTER_HEADER_SIZE = inner_ptr + cache_line_size
497 // + displacement - JITTER_HEADER_SIZE. With cache_line_size >= 8 =
498 // JITTER_HEADER_SIZE and displacement >= 0, this is >= inner_ptr.
499 // And user_ptr - JITTER_HEADER_SIZE < user_ptr <= inner_ptr +
500 // jitter_prefix <= inner_ptr + inner_layout.size(), so the full
501 // 8-byte write stays inside the inner allocation we own.
502 let header = self.pack_header(user_ptr as usize, displacement);
503 unsafe {
504 core::ptr::write_unaligned(user_ptr.sub(JITTER_HEADER_SIZE).cast::<u64>(), header);
505 }
506 // SAFETY: user_ptr derives from a valid &self; non-null.
507 Ok(NonNull::slice_from_raw_parts(
508 unsafe { NonNull::new_unchecked(user_ptr) },
509 layout.size().get(),
510 ))
511 }
512
513 #[inline]
514 fn capacity_bytes(&self) -> Option<usize> {
515 // Approximate: the inner capacity is consumed faster by our
516 // prefix overhead. Report inner capacity as an over-approximation
517 // so Watermark callers treat it as best-effort.
518 self.inner.capacity_bytes()
519 }
520
521 #[inline]
522 fn corruption_events(&self) -> u64 {
523 // CacheJitter's MAC verify failure path panics on detection
524 // (the header is the only state under attack; a wrong MAC means
525 // either a linear underflow into the prefix or a UAF prefix
526 // write — both unrecoverable). Same rationale as Canary:
527 // forward to inner so silent-disarm counts from underneath
528 // still surface.
529 self.inner.corruption_events()
530 }
531}
532
533/// `FixedRange` passthrough so this wrapper composes over a `lazy_commit`
534/// `MmapBacked` and similar backings.
535///
536/// **Footgun:** the displacement header is written and MAC-verified only in
537/// this wrapper's `allocate`/`deallocate`. If you nest it *as a backing under*
538/// an arena — `BumpArena<CacheJitter<..>>` — the arena carves directly from
539/// `base()`/`size()` and never calls `CacheJitter::allocate`/`deallocate`, so
540/// **no displacement is applied and no header is ever checked** while the type
541/// name still advertises the jitter. Keep the hardening wrapper **outermost**
542/// (wrapping the allocator), never as the `FixedRange` an arena consumes.
543impl<I: FixedRange> FixedRange for CacheJitter<I> {
544 #[inline]
545 fn base(&self) -> NonNull<u8> {
546 self.inner.base()
547 }
548
549 #[inline]
550 fn size(&self) -> usize {
551 self.inner.size()
552 }
553
554 /// Pass-through forward so a `commit`-aware consumer reaches the inner
555 /// backing when this wrapper sits over a `lazy_commit` `MmapBacked`.
556 #[inline]
557 fn commit(&self, offset: usize, len: usize) -> Result<(), AllocError> {
558 self.inner.commit(offset, len)
559 }
560}
561
562#[cfg(test)]
563mod tests {
564 use super::*;
565 use crate::backing::InlineBacked;
566 use crate::layout::BumpArena;
567
568 #[cfg(feature = "std")]
569 use crate::backing::MmapBacked;
570
571 /// CacheJitter over MmapBacked — supports up to page alignment, so
572 /// jitter applies for all alignments up to cache_line_size (64).
573 #[cfg(feature = "std")]
574 fn build_mmap() -> CacheJitter<BumpArena<MmapBacked>> {
575 CacheJitter::with_params(
576 BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
577 64,
578 8,
579 0x1234_5678_9ABC_DEF0,
580 )
581 .expect("valid params")
582 }
583
584 #[test]
585 fn rejects_non_power_of_two_line() {
586 let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
587 assert!(CacheJitter::with_params(inner, 24, 8, 1).is_none());
588 }
589
590 #[test]
591 fn rejects_zero_associativity() {
592 let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
593 assert!(CacheJitter::with_params(inner, 64, 0, 1).is_none());
594 }
595
596 #[test]
597 fn rejects_too_small_line() {
598 let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
599 // Line of 4 can't hold the 8-byte displacement header.
600 assert!(CacheJitter::with_params(inner, 4, 8, 1).is_none());
601 }
602
603 #[cfg(feature = "std")]
604 #[test]
605 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
606 fn alloc_then_dealloc_round_trips() {
607 let cj = build_mmap();
608 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
609 let block = cj.allocate(layout).unwrap();
610 let ptr = block.cast::<u8>();
611 unsafe {
612 core::ptr::write_bytes(ptr.as_ptr(), 0x42, 32);
613 cj.deallocate(ptr, layout);
614 }
615 }
616
617 #[cfg(feature = "std")]
618 #[test]
619 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
620 fn user_ptr_aligned_for_layout() {
621 let cj = build_mmap();
622 let layout = NonZeroLayout::from_size_align(16, 16).unwrap();
623 for _ in 0..32 {
624 let block = cj.allocate(layout).unwrap();
625 let addr = block.cast::<u8>().as_ptr() as usize;
626 assert_eq!(addr % 16, 0, "user ptr must respect requested align");
627 }
628 }
629
630 #[cfg(feature = "std")]
631 #[test]
632 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
633 fn displacement_distribution_hits_multiple_sets() {
634 // With 8-way associativity and a fixed seed, repeated allocations
635 // should be assigned multiple distinct displacements. We read the
636 // *displacement itself* back out of each allocation's header rather
637 // than bucketing the user pointer: the inner BumpArena's cursor
638 // advances every allocation, so `user_ptr % 512` would vary even if
639 // the displacement were stuck at 0 — making a pointer-based test pass
640 // vacuously. Unpacking the header isolates the jitter contribution.
641 let cj = build_mmap();
642 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
643 let mut displacements = alloc::collections::BTreeSet::new();
644 for _ in 0..64 {
645 let block = cj.allocate(layout).unwrap();
646 let user_ptr = block.cast::<u8>().as_ptr();
647 // SAFETY: a jittered allocation stores its packed header in the
648 // 8 bytes at `user_ptr - JITTER_HEADER_SIZE`.
649 let disp = unsafe {
650 let header =
651 core::ptr::read_unaligned(user_ptr.sub(JITTER_HEADER_SIZE).cast::<u64>());
652 cj.unpack_header(user_ptr as usize, header)
653 .expect("freshly written header must verify")
654 };
655 displacements.insert(disp);
656 }
657 assert!(
658 displacements.len() >= 4,
659 "expected diverse displacements (jitter stuck?), got {}",
660 displacements.len(),
661 );
662 }
663
664 #[cfg(feature = "std")]
665 #[test]
666 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
667 fn oversized_align_passes_through_without_jitter() {
668 // align (128) > cache_line_size (64): jitter granularity is one cache
669 // line, so the wrapper cannot preserve the larger alignment and MUST
670 // forward untouched — no prefix, no displacement header. Use
671 // MmapBacked (page-aligned base, satisfies align=128) so the success
672 // path is deterministic instead of error-path-vacuous.
673 let cj = build_mmap();
674 let layout = NonZeroLayout::from_size_align(8, 128).unwrap();
675 let base = cj.inner().base().as_ptr() as usize;
676 // NOTE: this must be the FIRST allocation against the fresh arena — the
677 // `< 64` discriminator below assumes the bump cursor is at 0 so a
678 // pass-through alloc lands at `base + 0`. Do not add a warm-up alloc
679 // before this point, or the offset will exceed one cache line and the
680 // assertion will false-fail.
681 let block = cj.allocate(layout).expect("mmap base satisfies align=128");
682 let user_ptr = block.cast::<u8>().as_ptr();
683 let addr = user_ptr as usize;
684 assert_eq!(addr % 128, 0, "pass-through must honor the requested align");
685 // A *jittered* allocation always prepends at least one cache line
686 // (header + displacement >= cache_line_size = 64). Pass-through adds
687 // none, so the user pointer sits within alignment padding of the base.
688 // This would fail if the wrapper wrongly inflated/displaced the request.
689 assert!(
690 addr - base < 64,
691 "pass-through must not add a jitter prefix (offset {} >= one cache line)",
692 addr - base,
693 );
694 unsafe {
695 core::ptr::write_bytes(user_ptr, 0xAA, 8);
696 cj.deallocate(block.cast(), layout);
697 }
698 }
699
700 /// The resize path is inherited from the `Allocator` trait default
701 /// (allocate-copy-free), which routes through `CacheJitter::allocate`
702 /// (fresh displacement header on the new block) and `deallocate` (MAC-
703 /// verifies the old block). Guards that grow/shrink preserve user bytes
704 /// and keep the header MAC valid end-to-end — previously untested.
705 #[cfg(feature = "std")]
706 #[test]
707 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
708 fn grow_then_shrink_preserves_data_and_header() {
709 let cj = build_mmap();
710 let old = NonZeroLayout::from_size_align(16, 8).unwrap();
711 let mid = NonZeroLayout::from_size_align(64, 8).unwrap();
712 let new = NonZeroLayout::from_size_align(24, 8).unwrap();
713 unsafe {
714 let block = cj.allocate(old).unwrap();
715 let ptr = block.cast::<u8>();
716 core::ptr::write_bytes(ptr.as_ptr(), 0x77, 16);
717
718 let grown = cj.grow(ptr, old, mid).unwrap();
719 let gptr = grown.cast::<u8>();
720 for i in 0..16 {
721 assert_eq!(*gptr.as_ptr().add(i), 0x77, "grow lost byte {i}");
722 }
723
724 let shrunk = cj.shrink(gptr, mid, new).unwrap();
725 let sptr = shrunk.cast::<u8>();
726 for i in 0..16 {
727 assert_eq!(*sptr.as_ptr().add(i), 0x77, "shrink lost byte {i}");
728 }
729 // Deallocate MAC-verifies the final block's header — panics if
730 // grow/shrink failed to re-establish it.
731 cj.deallocate(sptr, new);
732 }
733 }
734
735 #[cfg(feature = "std")]
736 #[test]
737 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
738 fn deterministic_with_same_seed() {
739 // Two independent CacheJitter instances with the same seed must
740 // produce the same sequence of displacements. Verify by checking
741 // the offset-from-inner-base of the first allocation in each.
742 let cj_a = CacheJitter::with_params(
743 BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
744 64,
745 8,
746 0xDEAD_BEEF_CAFE_BABE,
747 )
748 .unwrap();
749 let cj_b = CacheJitter::with_params(
750 BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
751 64,
752 8,
753 0xDEAD_BEEF_CAFE_BABE,
754 )
755 .unwrap();
756 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
757 let a = cj_a.allocate(layout).unwrap().cast::<u8>().as_ptr() as usize;
758 let b = cj_b.allocate(layout).unwrap().cast::<u8>().as_ptr() as usize;
759 let base_a = cj_a.inner().base().as_ptr() as usize;
760 let base_b = cj_b.inner().base().as_ptr() as usize;
761 assert_eq!(
762 a - base_a,
763 b - base_b,
764 "same seed must give same displacement"
765 );
766 }
767
768 #[cfg(feature = "std")]
769 #[test]
770 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
771 fn os_seeded_constructor() {
772 let inner = BumpArena::new(MmapBacked::new(16 * 1024).unwrap()).unwrap();
773 let cj = CacheJitter::new(inner, 64, 8).expect("valid params");
774 let layout = NonZeroLayout::from_size_align(16, 8).unwrap();
775 let _ = cj.allocate(layout).unwrap();
776 }
777
778 /// Regression: a corrupted header (linear-underflow / UAF-prefix
779 /// write into `user_ptr - 8`) must trip the MAC check in
780 /// `deallocate` rather than steering `inner.deallocate` at the
781 /// attacker-chosen address (arbitrary-free primitive).
782 #[cfg(feature = "std")]
783 #[test]
784 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
785 #[should_panic(expected = "prefix header MAC failed")]
786 fn corrupted_header_panics_on_dealloc() {
787 let cj = build_mmap();
788 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
789 let block = cj.allocate(layout).unwrap();
790 let user_ptr = block.cast::<u8>();
791 // SAFETY: overwrite the 8-byte prefix that holds the packed
792 // (displacement | MAC) — mirrors what a linear-underflow write
793 // from an adjacent allocation would do.
794 unsafe {
795 core::ptr::write_unaligned(
796 user_ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>(),
797 0xDEAD_BEEF_CAFE_BABEu64, // arbitrary attacker-chosen value
798 );
799 // Must panic with the documented MAC-failure message.
800 cj.deallocate(user_ptr, layout);
801 }
802 }
803
804 /// Regression: rejecting the all-zero header would let an attacker
805 /// who can only zero the prefix region (memset(prefix, 0, ...))
806 /// keep a forged displacement of 0 surviving the check. Verify
807 /// that header = 0 fails the MAC (since 0 is not a legitimate MAC
808 /// output for any displacement, given the per-instance secret).
809 #[cfg(feature = "std")]
810 #[test]
811 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
812 #[should_panic(expected = "prefix header MAC failed")]
813 fn zeroed_header_panics_on_dealloc() {
814 let cj = build_mmap();
815 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
816 let block = cj.allocate(layout).unwrap();
817 let user_ptr = block.cast::<u8>();
818 unsafe {
819 core::ptr::write_unaligned(
820 user_ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>(),
821 0u64,
822 );
823 cj.deallocate(user_ptr, layout);
824 }
825 }
826
827 /// Boundary: maximum associativity admitted by the 16-bit displacement
828 /// field is `MAX_ASSOCIATIVITY = (1<<16) - 1 = 65535`. Construction must
829 /// succeed at exactly that value and reject one past.
830 #[cfg(feature = "std")]
831 #[test]
832 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
833 fn associativity_at_and_past_encoding_limit() {
834 let inner = || BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap();
835 // At the limit — admitted.
836 assert!(
837 CacheJitter::with_params(inner(), 64, MAX_ASSOCIATIVITY, 1).is_some(),
838 "MAX_ASSOCIATIVITY = {MAX_ASSOCIATIVITY} must be admitted",
839 );
840 // One past — rejected.
841 assert!(
842 CacheJitter::with_params(inner(), 64, MAX_ASSOCIATIVITY + 1, 1).is_none(),
843 "MAX_ASSOCIATIVITY + 1 must be rejected",
844 );
845 }
846
847 /// Boundary: `cache_line_size * associativity` must not overflow at
848 /// construction. A 2^31 cache_line_size with associativity 2 overflows
849 /// on 32-bit; on 64-bit it doesn't, but the construction-time
850 /// `checked_mul` guard is the gate either way. Try a value that's
851 /// guaranteed to overflow (which forces the guard to fire on every
852 /// target).
853 #[test]
854 fn rejects_cache_line_assoc_overflow() {
855 let inner = || BumpArena::new(InlineBacked::<256>::new()).unwrap();
856 // Pick `cache_line_size` so that `line * assoc` overflows usize on
857 // every target. `1 << (USIZE_BITS - 1)` * 4 overflows.
858 let line = 1usize << (usize::BITS - 1);
859 // Verify line is a power of two and large enough; if not (32-bit
860 // builds), substitute a smaller pow2 that still overflows the
861 // checked_mul.
862 if line.is_power_of_two() && line >= 8 {
863 assert!(
864 CacheJitter::with_params(inner(), line, 4, 1).is_none(),
865 "line * assoc overflow must be rejected",
866 );
867 }
868 }
869
870 /// `cache_line_size = 8` (the minimum admissible) combined with
871 /// `associativity = MAX_ASSOCIATIVITY` is the largest jitter window
872 /// the encoding allows. Construction must succeed and a first
873 /// allocate must round-trip (i.e. the prefix size of
874 /// `cache_line_size + (assoc - 1)*cache_line_size = 8 * 65535`
875 /// fits the backing budget).
876 #[cfg(feature = "std")]
877 #[test]
878 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
879 fn smallest_line_largest_assoc_round_trips() {
880 let inner = BumpArena::new(MmapBacked::new(8 * (MAX_ASSOCIATIVITY + 16)).unwrap()).unwrap();
881 let cj = CacheJitter::with_params(inner, 8, MAX_ASSOCIATIVITY, 1).unwrap();
882 let layout = NonZeroLayout::from_size_align(8, 8).unwrap();
883 let block = cj.allocate(layout).unwrap();
884 let ptr = block.cast::<u8>();
885 unsafe { cj.deallocate(ptr, layout) };
886 }
887
888 /// `cache_line_size = 0` is rejected (not power of two, also `< 8`).
889 #[test]
890 fn rejects_zero_cache_line() {
891 let inner = BumpArena::new(InlineBacked::<256>::new()).unwrap();
892 assert!(CacheJitter::with_params(inner, 0, 8, 1).is_none());
893 }
894
895 /// `seed = 0` is substituted with the golden ratio constant. Verify
896 /// the substituted seed produces a working RNG (the wrapper is
897 /// usable and produces non-zero displacements at least once across
898 /// many allocations) and is distinguishable from the all-zero state
899 /// xorshift64 would otherwise be stuck in.
900 #[cfg(feature = "std")]
901 #[test]
902 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
903 fn zero_seed_substitution_yields_working_rng() {
904 let inner = BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap();
905 let cj = CacheJitter::with_params(inner, 64, 8, 0).expect("zero seed must work");
906 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
907 let base = cj.inner().base().as_ptr() as usize;
908 // Track the displacement across many allocations — if the RNG
909 // had been stuck at zero (the unmitigated xorshift64 zero
910 // state), every offset would be cache_line_size with no
911 // variation across the 8-way window. With the golden-ratio
912 // substitution we expect to see multiple distinct sets.
913 let mut sets = alloc::collections::BTreeSet::new();
914 for _ in 0..32 {
915 let p = cj.allocate(layout).unwrap().cast::<u8>().as_ptr() as usize;
916 sets.insert((p - base) % (8 * 64));
917 }
918 assert!(
919 sets.len() >= 2,
920 "zero-seed substitution must produce a non-stuck RNG; only saw {} \
921 distinct cache-set offsets",
922 sets.len(),
923 );
924 }
925
926 /// With `associativity = 1`, `disp_lines` is always 0 and the MAC
927 /// is over `(user_ptr_addr, 0)` with a per-instance key. The MAC
928 /// over the zero displacement must still differ from an attacker
929 /// who simply zeros the whole 8-byte header (which has both
930 /// disp_lines = 0 AND mac = 0). Verifies the MAC contributes
931 /// non-zero high bits for `disp = 0`.
932 #[cfg(feature = "std")]
933 #[test]
934 #[cfg_attr(miri, ignore = "miri-incompatible: mmap / threads")]
935 fn assoc_one_disp_zero_mac_differs_from_zero_header() {
936 let cj = CacheJitter::with_params(
937 BumpArena::new(MmapBacked::new(64 * 1024).unwrap()).unwrap(),
938 64,
939 1,
940 0xDEAD_BEEF_CAFE_BABE,
941 )
942 .unwrap();
943 let layout = NonZeroLayout::from_size_align(32, 8).unwrap();
944 let block = cj.allocate(layout).unwrap();
945 let user_ptr = block.cast::<u8>();
946 // Read the header that allocate just wrote.
947 let stored = unsafe {
948 core::ptr::read_unaligned(user_ptr.as_ptr().sub(JITTER_HEADER_SIZE).cast::<u64>())
949 };
950 assert_ne!(
951 stored, 0,
952 "MAC over (user_ptr, disp=0) must produce non-zero high bits — \
953 otherwise a zeroed-prefix forge would survive the check"
954 );
955 unsafe { cj.deallocate(user_ptr, layout) };
956 }
957}