forge_alloc/backing/mmap.rs
1//! `MmapBacked` — OS-managed anonymous memory region.
2//!
3//! Linux/macOS: `mmap(MAP_ANONYMOUS | MAP_PRIVATE)`.
4//! Windows: `VirtualAlloc(MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE)`.
5//!
6//! On drop, the region is returned to the OS (`munmap` / `VirtualFree`). The
7//! region is laid out bump-style — each `allocate` advances a cursor through
8//! the mapping; `deallocate` is a no-op. Higher layers (`BumpArena`, `Slab`)
9//! impose their own structure on what they receive.
10//!
11//! # See also
12//!
13//! [`HeapBytes`](crate::HeapBytes) is the heap twin of `MmapBacked`'s
14//! region-ownership half: a `FixedRange`-only owner of a single
15//! global-allocator block, no syscalls. Use `HeapBytes` (under a
16//! `BumpArena`) when you need a contiguous bounded region but the
17//! mmap-level isolation (separate VM area, guard-page potential) isn't
18//! worth the ~10-50 µs `mmap` / `VirtualAlloc` cost.
19
20use core::cell::UnsafeCell;
21use core::ptr::NonNull;
22
23use forge_alloc_core::{
24 AllocError, Allocator, Deallocator, FixedRange, NonZeroLayout, OsBacked, ProtectFlags,
25};
26
27std::thread_local! {
28 /// Last OS error captured from a failing mmap-layer syscall on this thread.
29 /// Set immediately after a failure; read via [`mmap_last_os_error`].
30 ///
31 /// Stored as a raw error code (errno on Unix, `GetLastError` value on
32 /// Windows). `None` means no failure has been recorded on this thread yet,
33 /// or the slot was explicitly cleared.
34 static LAST_OS_ERROR: core::cell::Cell<Option<i32>> = const { core::cell::Cell::new(None) };
35}
36
37/// Record the most recent failing-syscall error from the platform's
38/// thread-local errno / GetLastError into this module's slot. Must be
39/// called *immediately* after the failing syscall, before any other libc /
40/// Win32 call can clobber the underlying thread-local.
41#[inline]
42fn capture_os_error() {
43 let raw = std::io::Error::last_os_error().raw_os_error();
44 LAST_OS_ERROR.with(|c| c.set(raw));
45}
46
47/// Return the most recent failing-syscall error captured into this
48/// module's slot on the current thread, or `None` if none has been
49/// recorded since thread start or [`mmap_clear_last_os_error`] was last
50/// called.
51///
52/// Code is platform-specific (errno on Unix, `GetLastError` on Windows).
53/// Read this *immediately* after a `MmapBacked` constructor or OS-call
54/// returns an error — subsequent libc/Win32 calls in other crates may
55/// overwrite the platform's underlying thread-local. The snapshot in
56/// THIS module is only updated when (a) a syscall inside `MmapBacked`
57/// fails, (b) a pre-syscall validation path in `MmapBacked::with_flags`
58/// rejects its argument (synthetic `EINVAL`), or (c) a composing crate
59/// pushes its own errno via [`mmap_record_os_error`].
60#[must_use]
61pub fn mmap_last_os_error() -> Option<std::io::Error> {
62 LAST_OS_ERROR
63 .with(|c| c.get())
64 .map(std::io::Error::from_raw_os_error)
65}
66
67/// Clear the per-thread last-error slot. Mainly useful in tests; callers
68/// in production typically just read [`mmap_last_os_error`] after a
69/// failure.
70pub fn mmap_clear_last_os_error() {
71 LAST_OS_ERROR.with(|c| c.set(None));
72}
73
74/// Record the *current* platform errno / GetLastError into the per-thread
75/// last-error slot from an external crate. Use this immediately after a
76/// failing syscall (e.g. `mbind`, `madvise`, `pthread_*`) in a crate that
77/// composes with `MmapBacked` so callers can read a single
78/// [`mmap_last_os_error`] regardless of which layer's syscall failed.
79///
80/// # Ordering contract
81///
82/// Must be called **immediately after the failing syscall returns** and
83/// **before any other libc / Win32 call**. The platform thread-local
84/// that backs `std::io::Error::last_os_error()` (errno on Unix,
85/// `GetLastError` on Windows) is volatile — any subsequent call (even a
86/// no-failure one — e.g. an allocator's bookkeeping `free` or a logging
87/// `write`) may clobber it before this function gets a chance to read
88/// the failing code.
89///
90/// # Thread safety
91///
92/// The slot is thread-local. Concurrent calls from different threads
93/// touch disjoint storage and cannot race; concurrent calls within the
94/// same thread are impossible (single-threaded execution within a
95/// thread). Each thread sees the most recent error captured *on that
96/// thread*, regardless of which crate captured it.
97#[inline]
98pub fn mmap_record_os_error() {
99 capture_os_error();
100}
101
102/// Record a synthetic `EINVAL` into the per-thread last-error slot. Used
103/// when a [`MmapBacked`] constructor rejects its argument (size==0, page-
104/// rounding overflow) without invoking the kernel — otherwise callers
105/// reading [`mmap_last_os_error`] would see whatever stale value a prior
106/// syscall failure left on this thread, or `None`, both of which are
107/// misleading. EINVAL is the universal "validation failed" signal on
108/// Unix (errno 22) and Windows (`ERROR_INVALID_PARAMETER`, 87).
109///
110/// `pub(super)` so the sibling `huge_page_backed` module can reuse the
111/// same per-thread slot for its own pre-syscall rejections without
112/// round-tripping through the platform's `errno` / `GetLastError`
113/// thread-local (which any allocator call in between could clobber).
114#[inline]
115pub(super) fn capture_synthetic_einval() {
116 #[cfg(unix)]
117 let code: i32 = libc::EINVAL;
118 #[cfg(windows)]
119 // `ERROR_INVALID_PARAMETER` is defined as `u32` in `windows-sys`; the
120 // crate-wide slot stores `i32` to match `std::io::Error::raw_os_error`,
121 // so cast at the boundary. The value (87) is within `i32::MAX` and
122 // round-trips through `Error::from_raw_os_error` without loss.
123 let code: i32 = windows_sys::Win32::Foundation::ERROR_INVALID_PARAMETER as i32;
124 LAST_OS_ERROR.with(|c| c.set(Some(code)));
125}
126
127/// Write a raw OS error code into the per-thread last-error slot. Used by
128/// composing crates that need to preserve a previously captured error across
129/// a Drop (e.g. `LockedMmapBacked` saving the lock errno before the inner
130/// `MmapBacked` drops and potentially overwrites it with a munmap error).
131///
132/// Mirrors [`capture_synthetic_einval`]: both write directly to `LAST_OS_ERROR`
133/// with a caller-supplied `i32` value.
134#[inline]
135pub(crate) fn mmap_set_last_os_error(code: i32) {
136 LAST_OS_ERROR.with(|c| c.set(Some(code)));
137}
138
139/// Optional flags for [`MmapBacked::with_flags`].
140///
141/// Most flags route to features not yet implemented (`HugePageAligned`,
142/// `NumaLocal`). Currently only `populate` is honored on platforms that support
143/// it; the rest are accepted for forward compatibility but currently no-op.
144///
145/// `#[non_exhaustive]` so future bits (`MAP_NORESERVE`, MTE enable) can be
146/// added without an API break.
147///
148/// **Note on `MAP_LOCKED` / `mlock`:** page-locking for cryptographic secrets
149/// is available as a separate backing type — [`LockedMmapBacked`] — rather
150/// than as a flag here. Using a distinct type enforces the fail-closed
151/// guarantee (no silent unlocked fallback) at the type level and makes the
152/// security intent visible in caller code.
153///
154/// [`LockedMmapBacked`]: crate::LockedMmapBacked
155#[derive(Copy, Clone, Debug, PartialEq, Eq)]
156#[non_exhaustive]
157pub struct MmapFlags {
158 /// Request transparent / explicit huge pages. Implemented via
159 /// `HugePageAligned`; ignored at this layer.
160 pub huge_pages: bool,
161 /// Fault all pages at allocation time so subsequent accesses don't take
162 /// page-fault latency.
163 ///
164 /// **Platform support is asymmetric** — setting this to `true` does
165 /// not guarantee eager paging on every platform:
166 ///
167 /// - **Linux**: maps to `MAP_POPULATE`. Kernel walks the page tables
168 /// at `mmap` time so subsequent accesses don't fault.
169 /// - **macOS / BSD**: **silently ignored**. There is no portable
170 /// equivalent that operates at `mmap` time; eager paging on Darwin
171 /// requires `madvise(MADV_WILLNEED)` over the region after mapping,
172 /// which `MmapBacked` does not currently perform.
173 /// - **Windows**: **silently ignored** — but *not* because eager
174 /// paging already happened. `VirtualAlloc(MEM_RESERVE|MEM_COMMIT)`
175 /// charges the full region against the system commit limit at
176 /// construction, yet does **not** make pages resident: the working
177 /// set is still populated lazily by demand-zero faults on first
178 /// access, exactly as on Unix. So the per-page fault latency this
179 /// flag targets is unchanged on Windows; there is nothing for it to
180 /// populate eagerly. The real Windows-specific divergence is a cost,
181 /// not a benefit: because Windows does not overcommit, that commit
182 /// charge is reserved whether or not the pages are ever touched, so a
183 /// large `MmapBacked` whose consumer writes only a fraction of it
184 /// (e.g. a bump arena sized for headroom) still consumes its full
185 /// size of commit budget — and the mapping fails at construction once
186 /// the cumulative charge exceeds the limit, even with physical RAM
187 /// free. Opt out of the up-front charge with [`lazy_commit`](Self::lazy_commit)
188 /// (reserve-only + incremental `MEM_COMMIT` driven by
189 /// [`FixedRange::commit`] as a [`BumpArena`](crate::BumpArena) cursor
190 /// advances).
191 ///
192 /// Use [`Self::populate_supported`] to test at runtime whether
193 /// setting this flag will have any effect, or branch on `cfg!` in
194 /// caller code.
195 pub populate: bool,
196 /// Bind to a specific NUMA node. Implemented via `NumaLocal`; ignored
197 /// at this layer.
198 pub numa_node: Option<u32>,
199 /// Append one unmapped guard page after the region. Implemented via
200 /// `GuardPage`; ignored at this layer.
201 pub guard_at_end: bool,
202 /// Reserve the address range without committing it up front, leaving
203 /// per-page commit to [`FixedRange::commit`] as a consumer's cursor
204 /// advances.
205 ///
206 /// **Windows-only effect.** On Windows, `VirtualAlloc(MEM_RESERVE)`
207 /// reserves address space without charging the system commit limit;
208 /// pages are committed lazily via `commit`. On Unix this flag is
209 /// inert — `mmap(MAP_ANONYMOUS|MAP_PRIVATE)` is already demand-paged,
210 /// so the region is created exactly as without the flag and `commit`
211 /// is a no-op.
212 ///
213 /// # Safety / usage contract
214 ///
215 /// A `lazy_commit` mapping hands back **reserved-but-uncommitted**
216 /// pages on Windows; writing one before [`FixedRange::commit`] has
217 /// committed it faults (access violation). Supported consumers commit
218 /// before any write reaches the page:
219 ///
220 /// - [`BumpArena`] / [`StackAlloc`](crate::StackAlloc) commit each block
221 /// as the cursor advances — true demand-commit, the intended use.
222 /// - A pass-through `FixedRange` wrapper interposed between the arena and
223 /// the mapping (`Statistics`, `PoisonOnFree`, `Quarantine`,
224 /// `Watermark`, `Canary`, `CacheJitter`, `Faulty`, `HugePageAligned`,
225 /// `NumaLocal`, `SplitMetadata`) forwards `commit`, so it stays safe.
226 /// - `Slab`, `SizeClassed`, and direct [`Allocator::allocate`] carve via
227 /// `allocate`, which commits the block up front — safe, but commits
228 /// *eagerly* (no demand-paging benefit on this path).
229 ///
230 /// Two consumers still fault and are **unsupported** over a lazy mapping
231 /// — use [`MmapBacked::new`] (eager) for them:
232 ///
233 /// - `SharedBumpArena` — it is `Sync` and would race the `!Sync` commit
234 /// watermark, so it deliberately does not call `commit`.
235 /// - `GuardPage` — its usable range starts past a guard page and its
236 /// inner bound is only `OsBacked`, so it has no `commit` to forward.
237 ///
238 /// [`BumpArena`]: crate::BumpArena
239 /// [`FixedRange::commit`]: forge_alloc_core::FixedRange::commit
240 pub lazy_commit: bool,
241}
242
243impl MmapFlags {
244 /// Empty flag set — equivalent to [`MmapBacked::new`].
245 pub const NONE: Self = Self {
246 huge_pages: false,
247 populate: false,
248 numa_node: None,
249 guard_at_end: false,
250 lazy_commit: false,
251 };
252
253 /// Returns `true` if `populate: true` will actually be honored on the
254 /// current platform — `true` on Linux, `false` on macOS / BSD /
255 /// Windows. Allows callers to branch on whether the eager-paging
256 /// performance hint is meaningful without resorting to `cfg!` checks
257 /// scattered through application code.
258 #[inline]
259 pub const fn populate_supported() -> bool {
260 cfg!(target_os = "linux")
261 }
262}
263
264impl Default for MmapFlags {
265 fn default() -> Self {
266 Self::NONE
267 }
268}
269
270/// OS-mapped anonymous region.
271///
272/// `len` is rounded up to a multiple of the page size at construction. The
273/// `Allocator` impl serves requests bump-style from the mapping.
274///
275/// # Thread safety
276///
277/// `Send`: yes — the mapping is identified by `(ptr, len)`, both `Send`-safe
278/// values; we restore `Send` via an `unsafe impl` since `NonNull<u8>` is
279/// `!Send` by default.
280/// `Sync`: NO. The cursor uses `UnsafeCell` for `&self` allocation; concurrent
281/// `&self` allocators would race. `UnsafeCell` is `!Sync`, which gives us the
282/// right behavior without any extra marker field. Cross-thread allocation
283/// belongs to higher layers (`SharedBumpArena`, `SlabRemote`).
284pub struct MmapBacked {
285 ptr: NonNull<u8>,
286 len: usize,
287 cursor: UnsafeCell<usize>,
288 /// Page-aligned high-water mark of committed bytes from `ptr`, in
289 /// `[0, len]`. Everything in `[ptr, ptr + committed)` is committed and
290 /// writable. Only consulted on Windows: eager mappings initialise it
291 /// to `len` (whole region committed at construction) so `commit` is a
292 /// cheap watermark hit; `lazy_commit` mappings start at `0` and grow it
293 /// one `VirtualAlloc(MEM_COMMIT)` per page-crossing as [`commit`] runs.
294 /// `UnsafeCell` (not atomic) because `MmapBacked` is `!Sync` — the
295 /// commit-aware single-writer consumers ([`BumpArena`] / `StackAlloc`,
296 /// and `MmapBacked::allocate` itself) hold exclusive access.
297 ///
298 /// [`commit`]: forge_alloc_core::FixedRange::commit
299 /// [`BumpArena`]: crate::BumpArena
300 committed: UnsafeCell<usize>,
301}
302
303impl MmapBacked {
304 /// Allocate an anonymous OS-mapped region of at least `size` bytes (rounded
305 /// up to the page size).
306 pub fn new(size: usize) -> Result<Self, AllocError> {
307 Self::with_flags(size, MmapFlags::NONE)
308 }
309
310 /// Allocate with huge-pages requested. This layer ignores the hint;
311 /// `HugePageAligned` enforces 2 MiB / 32 MiB alignment.
312 pub fn with_huge_pages(size: usize) -> Result<Self, AllocError> {
313 Self::with_flags(
314 size,
315 MmapFlags {
316 huge_pages: true,
317 ..MmapFlags::NONE
318 },
319 )
320 }
321
322 /// Reserve an anonymous OS-mapped region of at least `size` bytes
323 /// without committing it up front (see [`MmapFlags::lazy_commit`]).
324 ///
325 /// **Windows-only effect.** On Windows the region is `MEM_RESERVE`-only
326 /// and consumes no commit charge until [`FixedRange::commit`] commits
327 /// pages on demand; on Unix this is identical to [`new`](Self::new)
328 /// because `mmap` is already demand-paged.
329 ///
330 /// # Safety / usage contract
331 ///
332 /// The returned region hands back reserved-but-uncommitted pages on
333 /// Windows. It is safe under [`BumpArena`] / [`StackAlloc`](crate::StackAlloc)
334 /// (true demand-commit), under any pass-through `FixedRange` wrapper over
335 /// those, and under `Slab` / `SizeClassed` / direct
336 /// [`Allocator::allocate`] (safe, but committed eagerly). It faults under
337 /// `SharedBumpArena` and `GuardPage`. See [`MmapFlags::lazy_commit`] for
338 /// the full contract.
339 ///
340 /// [`FixedRange::commit`]: forge_alloc_core::FixedRange::commit
341 /// [`BumpArena`]: crate::BumpArena
342 pub fn new_lazy(size: usize) -> Result<Self, AllocError> {
343 Self::with_flags(
344 size,
345 MmapFlags {
346 lazy_commit: true,
347 ..MmapFlags::NONE
348 },
349 )
350 }
351
352 /// Allocate with the supplied [`MmapFlags`].
353 pub fn with_flags(size: usize, flags: MmapFlags) -> Result<Self, AllocError> {
354 if size == 0 {
355 // Pre-syscall rejection: record a synthetic EINVAL so
356 // `mmap_last_os_error()` callers see an honest diagnostic
357 // rather than whatever stale value lingers from a prior
358 // failure on this thread.
359 capture_synthetic_einval();
360 return Err(AllocError);
361 }
362 let page = page_size();
363 let len = match size.checked_add(page - 1).map(|s| s & !(page - 1)) {
364 Some(l) => l,
365 None => {
366 capture_synthetic_einval();
367 return Err(AllocError);
368 }
369 };
370 // SAFETY: platform-specific os_map enforces its own invariants and
371 // returns a non-null pointer to `len` writable bytes on success.
372 let ptr = unsafe { os_map(len, &flags)? };
373 // Eager mappings have the whole region committed at construction, so
374 // the watermark starts at `len` and `commit` always hits it. Lazy
375 // mappings (Windows MEM_RESERVE) start uncommitted at `0`. On Unix
376 // the watermark is never consulted (mmap is demand-paged; `commit`
377 // is a no-op), so `len` is a harmless default there regardless.
378 let committed = if cfg!(windows) && flags.lazy_commit {
379 0
380 } else {
381 len
382 };
383 Ok(Self {
384 ptr,
385 len,
386 cursor: UnsafeCell::new(0),
387 committed: UnsafeCell::new(committed),
388 })
389 }
390
391 /// Bytes already allocated from this backing.
392 #[inline]
393 pub fn allocated(&self) -> usize {
394 // SAFETY: !Sync — no concurrent access to cursor.
395 unsafe { *self.cursor.get() }
396 }
397
398 /// Total size of the OS-mapped region (page-aligned).
399 #[inline]
400 pub const fn capacity(&self) -> usize {
401 self.len
402 }
403
404 /// Bytes remaining for allocation.
405 #[inline]
406 pub fn remaining(&self) -> usize {
407 self.len - self.allocated()
408 }
409}
410
411impl Drop for MmapBacked {
412 fn drop(&mut self) {
413 // SAFETY: ptr/len pair came from os_map on construction; no copies of
414 // either escape this struct (no Clone impl). Caller of MmapBacked has
415 // by contract guaranteed no outstanding pointers into the region at
416 // drop time (the same caller-discipline that BumpArena::reset requires).
417 unsafe { os_unmap(self.ptr, self.len) };
418 }
419}
420
421unsafe impl Deallocator for MmapBacked {
422 #[inline]
423 unsafe fn deallocate(&self, _ptr: NonNull<u8>, _layout: NonZeroLayout) {
424 // No-op. Bump-style; reclaim via drop.
425 }
426}
427
428unsafe impl Allocator for MmapBacked {
429 fn allocate(&self, layout: NonZeroLayout) -> Result<NonNull<[u8]>, AllocError> {
430 let align = layout.align().get();
431 let size = layout.size().get();
432 // SAFETY: !Sync — no concurrent access to cursor.
433 unsafe {
434 let cursor_ptr = self.cursor.get();
435 let cur = *cursor_ptr;
436 let base = self.ptr.as_ptr() as usize;
437 let next = base
438 .checked_add(cur)
439 .and_then(|v| v.checked_add(align - 1))
440 .ok_or(AllocError)?
441 & !(align - 1);
442 let aligned_off = next - base;
443 let end_off = aligned_off.checked_add(size).ok_or(AllocError)?;
444 if end_off > self.len {
445 return Err(AllocError);
446 }
447 // Commit the block before handing it out, so consumers that
448 // carve a region via `allocate` and then write it by raw offset
449 // (`Slab`, `SizeClassed`, direct callers) are safe on a
450 // `lazy_commit` mapping instead of faulting — they degrade to
451 // commit-at-allocate (effectively eager) rather than demand-
452 // paged. No-op for an eager mapping (watermark starts at `len`).
453 // Commit before publishing the cursor so a declined commit
454 // leaves the backing unchanged. `BumpArena` bypasses this path
455 // (it writes via `base()+offset` and drives `commit` itself), so
456 // there is no double-commit.
457 os_commit(self.ptr, self.len, &self.committed, aligned_off, size)?;
458 *cursor_ptr = end_off;
459 let p = self.ptr.as_ptr().add(aligned_off);
460 // SAFETY: aligned_off <= len, and p derives from self.ptr which
461 // is non-null; the result is non-null.
462 Ok(NonNull::slice_from_raw_parts(
463 NonNull::new_unchecked(p),
464 size,
465 ))
466 }
467 }
468
469 #[inline]
470 fn capacity_bytes(&self) -> Option<usize> {
471 Some(self.len)
472 }
473}
474
475impl FixedRange for MmapBacked {
476 #[inline]
477 fn base(&self) -> NonNull<u8> {
478 self.ptr
479 }
480
481 #[inline]
482 fn size(&self) -> usize {
483 self.len
484 }
485
486 #[inline]
487 fn commit(&self, offset: usize, len: usize) -> Result<(), AllocError> {
488 // SAFETY: !Sync — the single commit-aware consumer (BumpArena) has
489 // exclusive access to the `committed` watermark; no concurrent
490 // commit can race the UnsafeCell. This relies on the invariant that
491 // only the owning BumpArena calls `commit` (and `&self` allocators
492 // can't run concurrently on a `!Sync` type). Calling `commit`
493 // directly on a shared `&MmapBacked` while an allocator advances the
494 // watermark would violate that and race the cell — don't.
495 unsafe { os_commit(self.ptr, self.len, &self.committed, offset, len) }
496 }
497}
498
499unsafe impl OsBacked for MmapBacked {
500 #[inline]
501 fn base_ptr(&self) -> NonNull<u8> {
502 self.ptr
503 }
504
505 #[inline]
506 fn region_size(&self) -> usize {
507 self.len
508 }
509
510 /// # Caveat (shared with [`commit`](Self::commit))
511 ///
512 /// On Windows this reads the `committed` high-water mark through `&self`
513 /// under the `!Sync` single-writer contract. Do NOT call `release_pages`
514 /// on a `&MmapBacked` that is shared while an allocator (or `commit`)
515 /// advances the watermark — that races the `UnsafeCell`, exactly as
516 /// [`commit`](Self::commit)'s own caveat warns.
517 #[inline]
518 unsafe fn release_pages(&self, ptr: NonNull<u8>, size: usize) {
519 // The commit watermark is a Windows-only construct: `os_commit` is a
520 // no-op on Unix and never advances it. So the clamp below is gated to
521 // Windows rather than applied cross-platform — on Unix `madvise`
522 // tolerates untouched pages, so the full range is always safe and a
523 // clamp would be dead code that could silently under-release if a
524 // future Unix lazy path ever set `committed < len`.
525 #[cfg(windows)]
526 {
527 // On a `lazy_commit` mapping, pages past the high-water mark are
528 // reserved-but-uncommitted, and `VirtualAlloc(MEM_RESET)` rejects
529 // uncommitted pages with ERROR_INVALID_PARAMETER — a silent no-op
530 // that also leaves a stale error on the `mmap_last_os_error` probe.
531 // Clamp the release range to the committed prefix so the reset only
532 // ever touches committed pages. For eager mappings `committed ==
533 // len`, so this is a no-op.
534 //
535 // SAFETY: `!Sync` single-writer — exclusive access to the watermark
536 // (see the method caveat above), the same contract `commit` relies on.
537 let committed = unsafe { *self.committed.get() };
538 let off = (ptr.as_ptr() as usize).saturating_sub(self.ptr.as_ptr() as usize);
539 let clamped = off.saturating_add(size).min(committed).saturating_sub(off);
540 if clamped == 0 {
541 return;
542 }
543 // SAFETY: caller has promised [ptr, ptr+size) lies wholly inside our
544 // region and has no live allocations; clamping only shrinks the range.
545 unsafe { os_release_pages(ptr, clamped) };
546 }
547 #[cfg(not(windows))]
548 {
549 // SAFETY: caller has promised [ptr, ptr+size) lies wholly inside our
550 // region and has no live allocations.
551 unsafe { os_release_pages(ptr, size) };
552 }
553 }
554
555 #[inline]
556 unsafe fn protect(&self, ptr: NonNull<u8>, size: usize, flags: ProtectFlags) {
557 // SAFETY: caller has promised [ptr, ptr+size) lies inside our region.
558 unsafe { os_protect(ptr, size, flags) };
559 }
560}
561
562// MmapBacked owns a `NonNull<u8>` to a non-shared OS mapping. Send is fine
563// (the mapping outlives the move because munmap is keyed on ptr+len, not on
564// thread identity). !Sync is inherited from the `UnsafeCell<usize>` cursor —
565// no extra marker field is needed.
566//
567// SAFETY: see the rationale above; no aliasing reference into the mapping
568// escapes the struct (callers receive raw `NonNull<u8>` pointers — Rust's
569// aliasing model treats those as inert, in the same way `Box<T>: Send`).
570unsafe impl Send for MmapBacked {}
571
572// ============================================================================
573// Platform glue
574// ============================================================================
575
576/// The OS memory page size in bytes — 4 KiB on most x86-64, 16 KiB on
577/// Apple Silicon. Pass this where a primitive needs a page-size argument
578/// (such as `GuardPage`) rather than hard-coding a value that is wrong
579/// on 16 KiB-page platforms.
580#[cfg(unix)]
581pub fn page_size() -> usize {
582 use core::sync::atomic::{AtomicUsize, Ordering};
583
584 // Cached for symmetry with the Windows path. `sysconf` is typically cheap
585 // on glibc/musl (resolved from the auxv at startup), but the cache makes
586 // the cost unconditionally a single relaxed load after the first call.
587 // `0` is the "not yet computed" sentinel (a real page size is always > 0);
588 // the race is benign because every first caller computes the same value.
589 static CACHED: AtomicUsize = AtomicUsize::new(0);
590 let cached = CACHED.load(Ordering::Relaxed);
591 if cached != 0 {
592 return cached;
593 }
594 // SAFETY: sysconf is async-signal-safe and always returns >= 0 for
595 // _SC_PAGESIZE on conforming Unix; we still fall back defensively when
596 // the call reports -1 (errno) so `with_flags` cannot hit `page - 1`
597 // underflow on a pathological kernel.
598 //
599 // NOTE: the 4096 fallback may undersize on 16K-page systems (Apple
600 // Silicon, some ARMv8) if sysconf ever fails — we'd round to 4K instead
601 // of 16K, then mmap would still align internally to 16K. The behavioral
602 // consequence is over-reservation at the round-up step, not unsoundness.
603 let p = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
604 // The returned value must be a power of two: the round-up masks downstream
605 // (`with_flags`, `os_commit`, `allocate`) all use `& !(page - 1)`, which is
606 // only a correct "round up to page" mask when `page` is a power of two. A
607 // pathological positive-but-non-pow2 value would pass a bare `> 0` guard yet
608 // silently round *down*, undersizing the mapping. Reject it here at the one
609 // chokepoint so every downstream mask has an enforced precondition.
610 let ps = match usize::try_from(p) {
611 Ok(p) if p > 0 && p.is_power_of_two() => p,
612 _ => 4096,
613 };
614 CACHED.store(ps, Ordering::Relaxed);
615 ps
616}
617
618/// The OS memory page size in bytes — 4 KiB on most x86-64, 16 KiB on
619/// Apple Silicon. Pass this where a primitive needs a page-size argument
620/// (such as `GuardPage`) rather than hard-coding a value that is wrong
621/// on 16 KiB-page platforms.
622#[cfg(windows)]
623pub fn page_size() -> usize {
624 use core::sync::atomic::{AtomicUsize, Ordering};
625 use windows_sys::Win32::System::SystemInformation::{GetSystemInfo, SYSTEM_INFO};
626
627 // The OS page size is a runtime constant, but `GetSystemInfo` is a real
628 // syscall-like call. `os_commit` consults `page_size()` on every
629 // demand-commit, which is per-allocation on the hot path, so cache the
630 // first result. `0` is the "not yet computed" sentinel (a real page size
631 // is always > 0). The race is benign: concurrent first callers each run
632 // `GetSystemInfo` and store the *same* value, so any interleaving leaves
633 // the cache holding the correct page size. `Relaxed` is sufficient — the
634 // stored value is a plain integer with no other state ordered against it.
635 static CACHED: AtomicUsize = AtomicUsize::new(0);
636 let cached = CACHED.load(Ordering::Relaxed);
637 if cached != 0 {
638 return cached;
639 }
640 // SAFETY: GetSystemInfo writes a fully-initialized SYSTEM_INFO into its
641 // out-pointer; we provide a stack slot of the correct size.
642 let mut info: SYSTEM_INFO = unsafe { core::mem::zeroed() };
643 unsafe { GetSystemInfo(&mut info) };
644 // `dwPageSize` is documented to be non-zero on all supported Windows
645 // editions; the explicit fallback is purely defensive so a degenerate
646 // value can never trigger `page - 1` underflow in `with_flags`.
647 // Must be a power of two: the round-up masks downstream (`with_flags`,
648 // `os_commit`, `allocate`) all use `& !(page - 1)`, which only rounds up
649 // correctly when `page` is a power of two. Reject a degenerate non-pow2
650 // value here at the one chokepoint rather than assuming it everywhere.
651 let p = info.dwPageSize as usize;
652 let ps = if p > 0 && p.is_power_of_two() {
653 p
654 } else {
655 4096
656 };
657 CACHED.store(ps, Ordering::Relaxed);
658 ps
659}
660
661#[cfg(unix)]
662unsafe fn os_map(len: usize, flags: &MmapFlags) -> Result<NonNull<u8>, AllocError> {
663 // `mut` is needed only on Linux, where the `MAP_POPULATE` branch below
664 // reassigns it; on macOS / other Unix the binding is never mutated.
665 #[cfg_attr(not(target_os = "linux"), allow(unused_mut))]
666 let mut mmap_flags = libc::MAP_ANONYMOUS | libc::MAP_PRIVATE;
667 if flags.populate {
668 // MAP_POPULATE exists on Linux; on macOS the call still succeeds but
669 // the flag is silently ignored.
670 #[cfg(target_os = "linux")]
671 {
672 mmap_flags |= libc::MAP_POPULATE;
673 }
674 }
675 // SAFETY: mmap with MAP_ANONYMOUS+MAP_PRIVATE, non-zero len, non-conflicting
676 // flags. Returns MAP_FAILED on error which we check.
677 let ptr = unsafe {
678 libc::mmap(
679 core::ptr::null_mut(),
680 len,
681 libc::PROT_READ | libc::PROT_WRITE,
682 mmap_flags,
683 -1,
684 0,
685 )
686 };
687 if ptr == libc::MAP_FAILED {
688 capture_os_error();
689 return Err(AllocError);
690 }
691 // SAFETY: mmap returned non-MAP_FAILED, so ptr is a valid non-null mapping.
692 Ok(unsafe { NonNull::new_unchecked(ptr as *mut u8) })
693}
694
695#[cfg(windows)]
696unsafe fn os_map(len: usize, flags: &MmapFlags) -> Result<NonNull<u8>, AllocError> {
697 use windows_sys::Win32::System::Memory::{
698 VirtualAlloc, MEM_COMMIT, MEM_RESERVE, PAGE_READWRITE,
699 };
700 // Eager (default): MEM_RESERVE|MEM_COMMIT charges `len` against the
701 // system commit limit up front (Windows does not overcommit), unlike
702 // the demand-paged Unix mmap above. `lazy_commit` reserves address
703 // space only (MEM_RESERVE) — no commit charge — and leaves per-page
704 // commit to `FixedRange::commit` as a BumpArena cursor advances. The
705 // reservation passes PAGE_READWRITE, but for a MEM_RESERVE-only range
706 // the protection is inert until commit; the later MEM_COMMIT supplies
707 // the real PAGE_READWRITE. Reserved-but-uncommitted pages fault on
708 // access regardless, which is the intended trap until `commit` runs.
709 let alloc_type = if flags.lazy_commit {
710 MEM_RESERVE
711 } else {
712 MEM_RESERVE | MEM_COMMIT
713 };
714 // SAFETY: VirtualAlloc with NULL base and a valid MEM_* type + PAGE_*
715 // protection is the standard anonymous-mapping pattern. Returns NULL on
716 // error.
717 let p = unsafe { VirtualAlloc(core::ptr::null_mut(), len, alloc_type, PAGE_READWRITE) };
718 let nn = NonNull::new(p as *mut u8);
719 if nn.is_none() {
720 capture_os_error();
721 }
722 nn.ok_or(AllocError)
723}
724
725#[cfg(unix)]
726unsafe fn os_commit(
727 _base: NonNull<u8>,
728 _region_len: usize,
729 _committed: &UnsafeCell<usize>,
730 _offset: usize,
731 _len: usize,
732) -> Result<(), AllocError> {
733 // mmap(MAP_ANONYMOUS|MAP_PRIVATE) is demand-paged: pages are committed
734 // (and commit-charged, under the kernel's overcommit policy) lazily on
735 // first touch with no per-page action required here. The watermark is
736 // never consulted on Unix, so `lazy_commit` is inert.
737 Ok(())
738}
739
740#[cfg(windows)]
741unsafe fn os_commit(
742 base: NonNull<u8>,
743 region_len: usize,
744 committed: &UnsafeCell<usize>,
745 offset: usize,
746 len: usize,
747) -> Result<(), AllocError> {
748 use windows_sys::Win32::System::Memory::{VirtualAlloc, MEM_COMMIT, PAGE_READWRITE};
749 // Page-round the requested end up, then clamp to the region. The clamp
750 // is defensive: the caller (BumpArena) has already bounds-checked
751 // `offset + len <= region_len` and `region_len` is page-aligned, so a
752 // valid request never exceeds the region after rounding.
753 let end = offset.checked_add(len).ok_or(AllocError)?;
754 let page = page_size();
755 let end_paged = end
756 .checked_add(page - 1)
757 .map(|v| v & !(page - 1))
758 .ok_or(AllocError)?
759 .min(region_len);
760 let committed_ptr = committed.get();
761 // SAFETY: !Sync — exclusive access to the watermark.
762 let already = unsafe { *committed_ptr };
763 if end_paged <= already {
764 // Whole requested range already committed (the common path once the
765 // cursor has walked past these pages, and the *only* path for an
766 // eager mapping whose watermark starts at `region_len`).
767 return Ok(());
768 }
769 // Commit contiguously from the current watermark up to `end_paged`,
770 // NOT from `offset`. This fills any gap between `already` and `offset`,
771 // so the committed prefix `[base, base + committed)` stays contiguous no
772 // matter what order callers request offsets in: a high `offset` eagerly
773 // commits everything below it. That is why a single high-water `usize` is
774 // a sufficient witness of committedness — it never has to track holes.
775 // (The trade-off: committing a far offset first over-commits the gap.
776 // In-tree callers — BumpArena, StackAlloc, allocate — are monotonic, so
777 // no gap is ever created in practice.)
778 // SAFETY: `already <= region_len` (watermark invariant) so the offset is
779 // in-bounds of the reserved region.
780 let commit_base = unsafe { base.as_ptr().add(already) };
781 let commit_len = end_paged - already;
782 // SAFETY: [base + already, base + end_paged) lies within the reserved
783 // region; MEM_COMMIT on an already-reserved range is the documented
784 // demand-commit pattern and is idempotent on any sub-pages already
785 // committed.
786 let p = unsafe {
787 VirtualAlloc(
788 commit_base as *mut _,
789 commit_len,
790 MEM_COMMIT,
791 PAGE_READWRITE,
792 )
793 };
794 if p.is_null() {
795 // OS declined the commit (commit limit). Leave the watermark
796 // unchanged so the range stays officially uncommitted, and surface
797 // a clean allocation failure rather than letting the caller write
798 // into a page the OS never backed.
799 capture_os_error();
800 return Err(AllocError);
801 }
802 // SAFETY: !Sync — exclusive access to the watermark.
803 unsafe { *committed_ptr = end_paged };
804 Ok(())
805}
806
807#[cfg(unix)]
808unsafe fn os_unmap(ptr: NonNull<u8>, len: usize) {
809 // SAFETY: ptr/len pair came from os_map; munmap of an active mapping is
810 // the only safe way to release it.
811 let rc = unsafe { libc::munmap(ptr.as_ptr() as *mut libc::c_void, len) };
812 if rc != 0 {
813 // Drop path can't propagate Err; record errno so callers can detect
814 // a previous unmap failure via `mmap_last_os_error()` if they choose.
815 capture_os_error();
816 }
817}
818
819#[cfg(windows)]
820unsafe fn os_unmap(ptr: NonNull<u8>, _len: usize) {
821 use windows_sys::Win32::System::Memory::{VirtualFree, MEM_RELEASE};
822 // SAFETY: VirtualFree with MEM_RELEASE expects the base pointer returned
823 // by VirtualAlloc and size = 0; that releases both the reservation and
824 // the commit. Errors are reported via thread-local; Drop can't propagate.
825 let ok = unsafe { VirtualFree(ptr.as_ptr() as *mut _, 0, MEM_RELEASE) };
826 if ok == 0 {
827 capture_os_error();
828 }
829}
830
831#[cfg(unix)]
832unsafe fn os_release_pages(ptr: NonNull<u8>, size: usize) {
833 // Choose advice by platform:
834 //
835 // Linux: MADV_DONTNEED on a private anonymous mapping immediately
836 // releases the physical pages; subsequent reads see zero-filled
837 // pages. This is the canonical "release-but-keep-vma" path.
838 //
839 // macOS: MADV_DONTNEED on a private mapping is only a hint — the
840 // kernel may ignore it. MADV_FREE (added 10.12 / macOS Sierra) is
841 // the documented path: the kernel may reclaim the pages under
842 // memory pressure, and a subsequent read sees either old data or
843 // zeros (the new contents are undefined). For "I really don't
844 // need this anymore" semantics, MADV_FREE is the right choice on
845 // macOS.
846 //
847 // Other Unix (BSD): MADV_FREE has the BSD semantics — same as
848 // macOS.
849 #[cfg(target_os = "linux")]
850 let advice = libc::MADV_DONTNEED;
851 #[cfg(not(target_os = "linux"))]
852 let advice = libc::MADV_FREE;
853 // SAFETY: ptr/size lie wholly inside our own mapping (per the
854 // OsBacked::release_pages caller contract); advice is a valid flag.
855 let rc = unsafe { libc::madvise(ptr.as_ptr() as *mut libc::c_void, size, advice) };
856 if rc != 0 {
857 capture_os_error();
858 }
859}
860
861#[cfg(windows)]
862unsafe fn os_release_pages(ptr: NonNull<u8>, size: usize) {
863 use windows_sys::Win32::System::Memory::{VirtualAlloc, MEM_RESET, PAGE_READWRITE};
864 // VirtualAlloc(MEM_RESET) operates on a page-granular range; misaligned
865 // `ptr` or `size` returns NULL with ERROR_INVALID_PARAMETER, which we
866 // surface via capture_os_error(). Debug builds assert up front so the
867 // misuse is caught in development rather than via a silent observability
868 // probe in production.
869 let page = page_size();
870 debug_assert_eq!(
871 (ptr.as_ptr() as usize) % page,
872 0,
873 "os_release_pages: ptr must be page-aligned on Windows MEM_RESET",
874 );
875 debug_assert_eq!(
876 size % page,
877 0,
878 "os_release_pages: size must be page-aligned on Windows MEM_RESET",
879 );
880 // SAFETY: VirtualAlloc with MEM_RESET on an existing region tells the OS
881 // the contents are discardable; the OS may reclaim the physical pages.
882 // The lpProtect argument is ignored for MEM_RESET but must be valid.
883 let p = unsafe { VirtualAlloc(ptr.as_ptr() as *mut _, size, MEM_RESET, PAGE_READWRITE) };
884 if p.is_null() {
885 capture_os_error();
886 }
887}
888
889/// Map forge-alloc-core's `ProtectFlags` to a Unix `mprotect` `prot` argument.
890///
891/// Unlike Windows, the Unix ABI exposes each protection bit independently
892/// (`PROT_READ`, `PROT_WRITE`, `PROT_EXEC`), so every one of the eight
893/// `(read, write, exec)` combinations maps bit-exactly to a `mprotect`
894/// argument with no over-grant or down-grade at this layer.
895///
896/// | `(read, write, exec)` | `prot` | Notes |
897/// |-----------------------|-------------------------------------|-------|
898/// | `(F, F, F)` | `PROT_NONE` (== 0) | exact |
899/// | `(T, F, F)` | `PROT_READ` | exact |
900/// | `(F, T, F)` | `PROT_WRITE` | exact at the syscall ABI; some archs (older x86_64) implicitly grant read when write is set, but that's below this layer |
901/// | `(F, F, T)` | `PROT_EXEC` | exact on NX-capable HW; pre-NX implicit read |
902/// | `(T, T, F)` | `PROT_READ \| PROT_WRITE` | exact |
903/// | `(T, F, T)` | `PROT_READ \| PROT_EXEC` | exact |
904/// | `(F, T, T)` | `PROT_WRITE \| PROT_EXEC` | exact (some kernels enforce W^X via seccomp/LSM; that's surfaced as `EINVAL` on the syscall, not silently masked here) |
905/// | `(T, T, T)` | `PROT_READ \| PROT_WRITE \| PROT_EXEC` | exact; some hardened kernels reject and surface `EACCES`/`EINVAL` — propagated unchanged |
906///
907/// Extracted so unit tests can verify the mapping table without invoking
908/// `mprotect` on the host (the test runs cross-platform; only the table
909/// math is platform-neutral). This is the Unix structural parallel to
910/// [`win32_prot_from_flags`] — each Unix arm maps bit-exactly, unlike
911/// Win32 which cannot express write-without-read combinations natively.
912#[cfg(unix)]
913pub(super) fn unix_prot_from_flags(flags: ProtectFlags) -> i32 {
914 // PROT_NONE is 0 on every Unix; the explicit assignment in the
915 // "all-false" branch below documents intent without changing bits.
916 let mut prot = 0i32;
917 if flags.read {
918 prot |= libc::PROT_READ;
919 }
920 if flags.write {
921 prot |= libc::PROT_WRITE;
922 }
923 if flags.exec {
924 prot |= libc::PROT_EXEC;
925 }
926 if !flags.read && !flags.write && !flags.exec {
927 prot = libc::PROT_NONE;
928 }
929 prot
930}
931
932#[cfg(unix)]
933unsafe fn os_protect(ptr: NonNull<u8>, size: usize, flags: ProtectFlags) {
934 let prot = unix_prot_from_flags(flags);
935 // SAFETY: mprotect on a region we own with valid flag bits.
936 let rc = unsafe { libc::mprotect(ptr.as_ptr() as *mut libc::c_void, size, prot) };
937 if rc != 0 {
938 capture_os_error();
939 }
940}
941
942/// Map forge-alloc-core's `ProtectFlags` to a Windows `PAGE_*` constant.
943///
944/// The mapping is **bit-preserving wherever the Win32 ABI can express the
945/// combination**, and chooses the smallest valid superset otherwise.
946/// Concretely, Win32 *does* expose a true exec-only mode (`PAGE_EXECUTE`,
947/// value 16) — readers of a `PAGE_EXECUTE` page take an access violation on
948/// hardware that supports NX (every supported x86_64 / aarch64 chip). On
949/// the small set of legacy CPUs without NX, the kernel implicitly grants
950/// read access; that downgrade is unavoidable and lives below this
951/// layer. Win32 does *not* expose a write-without-read or write+exec-
952/// without-read mode, so those must be upgraded.
953///
954/// | `(read, write, exec)` | Win32 constant | Notes |
955/// |---------------------------|---------------------------|-------|
956/// | `(F, F, F)` | `PAGE_NOACCESS` | exact |
957/// | `(T, F, F)` | `PAGE_READONLY` | exact |
958/// | `(T, T, F)` | `PAGE_READWRITE` | exact |
959/// | `(T, F, T)` | `PAGE_EXECUTE_READ` | exact |
960/// | `(T, T, T)` | `PAGE_EXECUTE_READWRITE` | exact |
961/// | `(F, F, T)` | `PAGE_EXECUTE` | exact on NX-capable HW |
962/// | `(F, T, F)` | `PAGE_READWRITE` | over-grants read |
963/// | `(F, T, T)` | `PAGE_EXECUTE_READWRITE` | over-grants read |
964///
965/// Extracted so unit tests can verify the mapping table without triggering
966/// the debug_assert in [`os_protect`] (which fires on write-without-read,
967/// the only combination that the helper genuinely cannot express).
968#[cfg(windows)]
969pub(super) fn win32_prot_from_flags(flags: ProtectFlags) -> u32 {
970 use windows_sys::Win32::System::Memory::{
971 PAGE_EXECUTE, PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE, PAGE_NOACCESS, PAGE_READONLY,
972 PAGE_READWRITE,
973 };
974 match (flags.read, flags.write, flags.exec) {
975 (false, false, false) => PAGE_NOACCESS,
976 (true, false, false) => PAGE_READONLY,
977 (true, true, false) => PAGE_READWRITE,
978 (true, false, true) => PAGE_EXECUTE_READ,
979 (true, true, true) => PAGE_EXECUTE_READWRITE,
980 // Exec-only: Windows DOES support this natively via PAGE_EXECUTE.
981 // On NX-capable hardware (every modern x64 / aarch64 chip) this is
982 // exec-only; on legacy NX-less CPUs the kernel implicitly grants
983 // read, which mirrors `mprotect(PROT_EXEC)` on the same hardware.
984 // Mapping it to PAGE_EXECUTE_READ here would *unconditionally*
985 // over-grant on every machine; using PAGE_EXECUTE only over-grants
986 // on the legacy ones — strictly tighter.
987 (false, false, true) => PAGE_EXECUTE,
988 // Write-or-exec with write but without read: Windows has no
989 // primitive for "write but not read", so upgrade to the smallest
990 // valid superset that retains every bit the caller asked for.
991 // Crucially, (false, true, true) must route to
992 // PAGE_EXECUTE_READWRITE — collapsing it to PAGE_READWRITE would
993 // silently drop the exec bit. The
994 // debug_assert in os_protect surfaces these over-grants in dev.
995 (false, true, true) => PAGE_EXECUTE_READWRITE,
996 (false, true, false) => PAGE_READWRITE,
997 }
998}
999
1000#[cfg(windows)]
1001unsafe fn os_protect(ptr: NonNull<u8>, size: usize, flags: ProtectFlags) {
1002 use windows_sys::Win32::System::Memory::VirtualProtect;
1003 // Of all eight `(read, write, exec)` combinations, the only ones
1004 // Win32 cannot express bit-exactly are write-without-read variants:
1005 // `(F, T, F)` and `(F, T, T)` — Windows has no primitive for "write
1006 // but not read", so `win32_prot_from_flags` upgrades them to
1007 // `PAGE_READWRITE` / `PAGE_EXECUTE_READWRITE`. Every other
1008 // combination (including exec-only via `PAGE_EXECUTE`) maps exactly
1009 // on NX-capable hardware. A debug-build assertion flags the unavoidable
1010 // upgrade so misuse during development surfaces in tests:
1011 debug_assert!(
1012 !flags.write || flags.read,
1013 "os_protect: write-without-read upgrades to RW/RWX on Windows — \
1014 caller relying on no-read semantics will not get them. Set flags.read=true \
1015 explicitly to silence this assertion.",
1016 );
1017 let prot = win32_prot_from_flags(flags);
1018 let mut old: u32 = 0;
1019 // SAFETY: VirtualProtect on a region returned by VirtualAlloc with valid
1020 // PAGE_* protection constants.
1021 let ok = unsafe { VirtualProtect(ptr.as_ptr() as *mut _, size, prot, &mut old) };
1022 if ok == 0 {
1023 capture_os_error();
1024 }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029 use super::*;
1030
1031 // Every test in this module exercises real OS mmap / VirtualAlloc paths.
1032 // Miri cannot model `mmap` / `VirtualAlloc` syscalls, so the entire
1033 // module is gated off under miri. The underlying invariants the tests
1034 // protect (page rounding, alignment, capacity, OS-error capture) are
1035 // unaffected by Miri's interpretation model — Miri's job here is to
1036 // detect UB in the *consumers* of MmapBacked (Slab / Bump / etc.) when
1037 // they're driven over InlineBacked.
1038
1039 #[test]
1040 #[cfg_attr(miri, ignore = "miri can't shim Win32 GetSystemInfo / sysconf")]
1041 fn page_size_is_reasonable() {
1042 let p = page_size();
1043 assert!(p >= 4096, "page size suspiciously small: {p}");
1044 assert!(p.is_power_of_two());
1045 }
1046
1047 #[test]
1048 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1049 fn alloc_then_write_then_read_back() {
1050 let m = MmapBacked::new(16 * 1024).expect("mmap should succeed for 16 KiB");
1051 let layout = NonZeroLayout::from_size_align(256, 8).unwrap();
1052 let block = m.allocate(layout).unwrap();
1053 let p = block.cast::<u8>();
1054 unsafe {
1055 core::ptr::write_bytes(p.as_ptr(), 0xCD, 256);
1056 for i in 0..256 {
1057 assert_eq!(*p.as_ptr().add(i), 0xCD);
1058 }
1059 }
1060 }
1061
1062 #[test]
1063 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1064 fn alloc_returns_aligned_pointer() {
1065 let m = MmapBacked::new(64 * 1024).unwrap();
1066 // First, push the cursor off zero with an odd-size allocation.
1067 let _ = m
1068 .allocate(NonZeroLayout::from_size_align(3, 1).unwrap())
1069 .unwrap();
1070 let layout = NonZeroLayout::from_size_align(64, 64).unwrap();
1071 let block = m.allocate(layout).unwrap();
1072 assert_eq!(block.cast::<u8>().as_ptr() as usize % 64, 0);
1073 }
1074
1075 #[test]
1076 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1077 fn alloc_fails_when_exhausted() {
1078 let m = MmapBacked::new(8 * 1024).unwrap();
1079 let cap = m.capacity();
1080 let layout = NonZeroLayout::from_size_align(cap, 8).unwrap();
1081 let _ = m.allocate(layout).unwrap();
1082 assert!(m
1083 .allocate(NonZeroLayout::from_size_align(1, 1).unwrap())
1084 .is_err());
1085 }
1086
1087 #[test]
1088 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1089 fn fixed_range_contains_allocations() {
1090 let m = MmapBacked::new(8 * 1024).unwrap();
1091 let layout = NonZeroLayout::from_size_align(64, 8).unwrap();
1092 let block = m.allocate(layout).unwrap();
1093 assert!(m.contains(block.cast::<u8>()));
1094 }
1095
1096 #[test]
1097 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1098 fn capacity_is_page_rounded() {
1099 let m = MmapBacked::new(1).unwrap();
1100 let cap = m.capacity();
1101 let page = page_size();
1102 assert_eq!(cap % page, 0);
1103 assert!(cap >= page);
1104 }
1105
1106 #[test]
1107 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1108 fn zero_size_request_errors() {
1109 assert!(MmapBacked::new(0).is_err());
1110 }
1111
1112 #[test]
1113 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1114 fn pre_syscall_rejection_sets_synthetic_einval() {
1115 // Pre-syscall failure paths (size==0, page-rounding overflow) must
1116 // populate the thread-local last-error slot with EINVAL rather
1117 // than leaving stale data from prior failures. Without this,
1118 // mmap_last_os_error() would silently lie about what just failed.
1119 mmap_clear_last_os_error();
1120 assert!(MmapBacked::new(0).is_err());
1121 let e = mmap_last_os_error().expect("synthetic EINVAL captured");
1122 #[cfg(unix)]
1123 assert_eq!(e.raw_os_error(), Some(libc::EINVAL));
1124 #[cfg(windows)]
1125 assert_eq!(
1126 e.raw_os_error(),
1127 Some(windows_sys::Win32::Foundation::ERROR_INVALID_PARAMETER as i32),
1128 );
1129
1130 // Overflow path: size + (page-1) wraps.
1131 mmap_clear_last_os_error();
1132 assert!(MmapBacked::new(usize::MAX).is_err());
1133 let e = mmap_last_os_error().expect("synthetic EINVAL on overflow");
1134 #[cfg(unix)]
1135 assert_eq!(e.raw_os_error(), Some(libc::EINVAL));
1136 #[cfg(windows)]
1137 assert_eq!(
1138 e.raw_os_error(),
1139 Some(windows_sys::Win32::Foundation::ERROR_INVALID_PARAMETER as i32),
1140 );
1141 }
1142
1143 #[test]
1144 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1145 fn last_os_error_captured_on_failure() {
1146 // Request an impossibly large mapping — both unix and Windows should
1147 // reject and set their thread-local error. We can't predict the exact
1148 // code (ENOMEM, EINVAL, EOVERFLOW, ERROR_NOT_ENOUGH_MEMORY, …) so we
1149 // only assert that *something* was captured.
1150 mmap_clear_last_os_error();
1151 assert!(mmap_last_os_error().is_none());
1152 // usize::MAX/2 rounds to usize::MAX-(page-1) which exceeds any
1153 // realistic address space, forcing a syscall failure.
1154 let huge = usize::MAX / 2;
1155 assert!(MmapBacked::new(huge).is_err());
1156 assert!(
1157 mmap_last_os_error().is_some(),
1158 "expected captured OS error after impossibly large mmap request",
1159 );
1160 }
1161
1162 #[test]
1163 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1164 fn os_backed_release_pages_after_alloc() {
1165 let m = MmapBacked::new(64 * 1024).unwrap();
1166 let p = m.base_ptr();
1167 // Write something, release, write again — must not crash.
1168 unsafe {
1169 core::ptr::write_bytes(p.as_ptr(), 0xEE, page_size());
1170 m.release_pages(p, page_size());
1171 core::ptr::write_bytes(p.as_ptr(), 0x11, page_size());
1172 }
1173 }
1174
1175 #[test]
1176 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1177 fn lazy_commit_then_write_round_trips() {
1178 // `new_lazy` reserves on Windows (no commit charge) and is identical
1179 // to `new` on Unix. After `commit`, the range must be writable on
1180 // every platform.
1181 let m = MmapBacked::new_lazy(64 * 1024).expect("lazy reserve should succeed");
1182 let len = page_size();
1183 m.commit(0, len)
1184 .expect("commit of a reserved range should succeed");
1185 let base = m.base().as_ptr();
1186 unsafe {
1187 core::ptr::write_bytes(base, 0xAB, len);
1188 assert_eq!(*base, 0xAB);
1189 assert_eq!(*base.add(len - 1), 0xAB);
1190 }
1191 }
1192
1193 #[test]
1194 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1195 fn commit_is_idempotent_and_monotonic() {
1196 let m = MmapBacked::new_lazy(64 * 1024).unwrap();
1197 let page = page_size();
1198 // Re-committing the same range and a sub-range is a no-op success.
1199 m.commit(0, page).unwrap();
1200 m.commit(0, page).unwrap();
1201 m.commit(0, 1).unwrap();
1202 // Extend the watermark forward by one page.
1203 m.commit(page, page).unwrap();
1204 // A range already below the watermark stays Ok without a syscall.
1205 m.commit(0, 2 * page).unwrap();
1206 }
1207
1208 #[test]
1209 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1210 fn eager_mapping_commit_is_noop() {
1211 // A default (eager) mapping has the whole region committed at
1212 // construction; the watermark starts at `len`, so `commit` succeeds
1213 // for any in-region range as a pure watermark hit.
1214 let m = MmapBacked::new(16 * 1024).unwrap();
1215 m.commit(0, 16 * 1024).unwrap();
1216 m.commit(page_size(), page_size()).unwrap();
1217 }
1218
1219 #[test]
1220 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1221 fn bump_arena_over_lazy_mmap_commits_on_alloc() {
1222 use crate::BumpArena;
1223 // BumpArena is the commit-aware consumer: each allocate commits the
1224 // block before returning it, so writing through every returned
1225 // pointer is sound even though the backing was only reserved.
1226 let arena = BumpArena::new(MmapBacked::new_lazy(256 * 1024).unwrap()).unwrap();
1227 let layout = NonZeroLayout::from_size_align(page_size(), 8).unwrap();
1228 for _ in 0..16 {
1229 let block = arena.allocate(layout).unwrap();
1230 let p = block.cast::<u8>().as_ptr();
1231 unsafe {
1232 core::ptr::write_bytes(p, 0xCD, page_size());
1233 assert_eq!(*p, 0xCD);
1234 assert_eq!(*p.add(page_size() - 1), 0xCD);
1235 }
1236 }
1237 }
1238
1239 #[test]
1240 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1241 fn bump_over_passthrough_wrapper_over_lazy_mmap_commits() {
1242 use crate::BumpArena;
1243 use crate::Statistics;
1244 // A pass-through FixedRange wrapper (Statistics) interposed between
1245 // BumpArena and a lazy mapping must forward `commit`, so writes are
1246 // still committed and don't fault.
1247 let arena =
1248 BumpArena::new(Statistics::new(MmapBacked::new_lazy(256 * 1024).unwrap())).unwrap();
1249 let layout = NonZeroLayout::from_size_align(page_size(), 8).unwrap();
1250 for _ in 0..8 {
1251 let block = arena.allocate(layout).unwrap();
1252 let p = block.cast::<u8>().as_ptr();
1253 unsafe {
1254 core::ptr::write_bytes(p, 0x5A, page_size());
1255 assert_eq!(*p.add(page_size() - 1), 0x5A);
1256 }
1257 }
1258 }
1259
1260 #[test]
1261 #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1262 fn slab_over_lazy_mmap_commits_via_allocate() {
1263 use crate::Slab;
1264 // Slab carves its region via `backing.allocate()`, which commits the
1265 // block up front on a lazy mapping (fix #2), so the slot writes that
1266 // follow don't fault.
1267 let s: Slab<u64, MmapBacked> =
1268 Slab::new(1024, MmapBacked::new_lazy(1 << 20).unwrap()).unwrap();
1269 let layout = NonZeroLayout::for_type::<u64>().unwrap();
1270 let p = s.allocate(layout).unwrap();
1271 unsafe {
1272 p.cast::<u64>().as_ptr().write(0xDEAD_BEEF);
1273 assert_eq!(p.cast::<u64>().as_ptr().read(), 0xDEAD_BEEF);
1274 s.deallocate(p.cast(), layout);
1275 }
1276 }
1277
1278 /// Structural parallel to the Windows
1279 /// `win32_prot_from_flags_preserves_every_requested_bit` regression
1280 /// test: confirm that every `(read, write, exec)` combination on Unix
1281 /// produces the corresponding bit-exact `PROT_*` mask with no over-
1282 /// grant (no spurious read added to exec-only) and no down-grade
1283 /// (W+X must not collapse to W). Unlike Win32 — which lacks primitives
1284 /// for write-without-read and exec-only — Unix `mprotect` exposes each
1285 /// bit independently, so the table is exact across all eight rows.
1286 ///
1287 /// Running this test on a non-Unix host (Windows) verifies the table
1288 /// math at compile time only when this `#[cfg(unix)]` gate is active;
1289 /// CI on Linux/macOS exercises the assertions at runtime.
1290 #[cfg(unix)]
1291 #[test]
1292 fn unix_prot_from_flags_preserves_every_requested_bit() {
1293 // ProtectFlags is #[non_exhaustive] — build via base + field assigns.
1294 let mut none = ProtectFlags::NONE;
1295 let mut r = ProtectFlags::NONE;
1296 r.read = true;
1297 let mut w = ProtectFlags::NONE;
1298 w.write = true;
1299 let mut x = ProtectFlags::NONE;
1300 x.exec = true;
1301 let mut rw = ProtectFlags::NONE;
1302 rw.read = true;
1303 rw.write = true;
1304 let mut rx = ProtectFlags::NONE;
1305 rx.read = true;
1306 rx.exec = true;
1307 let mut wx = ProtectFlags::NONE;
1308 wx.write = true;
1309 wx.exec = true;
1310 let mut rwx = ProtectFlags::NONE;
1311 rwx.read = true;
1312 rwx.write = true;
1313 rwx.exec = true;
1314 // Suppress unused_mut on `none` — clippy/rustc otherwise gripe.
1315 let _ = &mut none;
1316
1317 assert_eq!(unix_prot_from_flags(none), libc::PROT_NONE);
1318 assert_eq!(unix_prot_from_flags(r), libc::PROT_READ);
1319 assert_eq!(
1320 unix_prot_from_flags(w),
1321 libc::PROT_WRITE,
1322 "W must be PROT_WRITE only — Unix allows write-without-read at the syscall ABI; \
1323 any kernel-side implicit read-grant lives below this layer and is not our concern",
1324 );
1325 assert_eq!(
1326 unix_prot_from_flags(x),
1327 libc::PROT_EXEC,
1328 "X must be PROT_EXEC only — over-granting (e.g. adding PROT_READ) \
1329 must not appear on Unix",
1330 );
1331 assert_eq!(unix_prot_from_flags(rw), libc::PROT_READ | libc::PROT_WRITE);
1332 assert_eq!(unix_prot_from_flags(rx), libc::PROT_READ | libc::PROT_EXEC);
1333 assert_eq!(
1334 unix_prot_from_flags(wx),
1335 libc::PROT_WRITE | libc::PROT_EXEC,
1336 "W+X must be PROT_WRITE|PROT_EXEC — silently dropping the exec bit \
1337 must not appear on Unix. Hardened kernels that enforce W^X surface \
1338 EINVAL/EACCES at the mprotect syscall, not by silently masking bits here.",
1339 );
1340 assert_eq!(
1341 unix_prot_from_flags(rwx),
1342 libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC,
1343 );
1344 }
1345
1346 /// Regression: `win32_prot_from_flags` used to map `(read=false,
1347 /// write=true, exec=true)` to `PAGE_READWRITE`, silently dropping the
1348 /// caller's exec bit. Hardening wrappers that ask for W+X (uncommon but
1349 /// valid for JIT-like flows that don't need read) would have gotten
1350 /// pages that fault on instruction fetch in release builds — the
1351 /// debug_assert in `os_protect` only catches write-without-read in dev.
1352 /// The fix routes W+X through `PAGE_EXECUTE_READWRITE`.
1353 ///
1354 /// Exec-only `(F, F, T)` uses `PAGE_EXECUTE` (which Windows *does*
1355 /// support natively) so that callers that opt out of read on NX-capable
1356 /// hardware actually get exec-only semantics rather than an
1357 /// unconditional upgrade to RX.
1358 ///
1359 /// The mapping is tested in isolation (bypassing `os_protect`'s
1360 /// `debug_assert!(!(write && !read))`). Unix is unaffected — its
1361 /// `mprotect` path expresses each bit independently.
1362 #[cfg(windows)]
1363 #[test]
1364 #[cfg_attr(
1365 miri,
1366 ignore = "win32 import resolution requires actual Windows runtime"
1367 )]
1368 fn win32_prot_from_flags_preserves_every_requested_bit() {
1369 use windows_sys::Win32::System::Memory::{
1370 PAGE_EXECUTE, PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE, PAGE_NOACCESS, PAGE_READONLY,
1371 PAGE_READWRITE,
1372 };
1373 // ProtectFlags is #[non_exhaustive] — build via base + field assigns.
1374 let mut none = ProtectFlags::NONE;
1375 let mut r = ProtectFlags::NONE;
1376 r.read = true;
1377 let mut w = ProtectFlags::NONE;
1378 w.write = true;
1379 let mut x = ProtectFlags::NONE;
1380 x.exec = true;
1381 let mut rw = ProtectFlags::NONE;
1382 rw.read = true;
1383 rw.write = true;
1384 let mut rx = ProtectFlags::NONE;
1385 rx.read = true;
1386 rx.exec = true;
1387 let mut rwx = ProtectFlags::NONE;
1388 rwx.read = true;
1389 rwx.write = true;
1390 rwx.exec = true;
1391 let mut wx = ProtectFlags::NONE;
1392 wx.write = true;
1393 wx.exec = true;
1394 // Suppress unused_mut on `none` — clippy/rustc otherwise gripe.
1395 let _ = &mut none;
1396
1397 assert_eq!(win32_prot_from_flags(none), PAGE_NOACCESS);
1398 assert_eq!(win32_prot_from_flags(r), PAGE_READONLY);
1399 assert_eq!(win32_prot_from_flags(rw), PAGE_READWRITE);
1400 assert_eq!(win32_prot_from_flags(rx), PAGE_EXECUTE_READ);
1401 assert_eq!(win32_prot_from_flags(rwx), PAGE_EXECUTE_READWRITE);
1402 // Exec-only is exact on NX-capable HW — Windows has PAGE_EXECUTE.
1403 assert_eq!(
1404 win32_prot_from_flags(x),
1405 PAGE_EXECUTE,
1406 "exec-only must use PAGE_EXECUTE (exec-only on NX-capable HW), \
1407 not PAGE_EXECUTE_READ which would unconditionally add read",
1408 );
1409 // Write-without-read upgrades — Windows cannot express write-only.
1410 assert_eq!(
1411 win32_prot_from_flags(w),
1412 PAGE_READWRITE,
1413 "W upgrades to RW (Windows has no write-only primitive)",
1414 );
1415 assert_eq!(
1416 win32_prot_from_flags(wx),
1417 PAGE_EXECUTE_READWRITE,
1418 "W+X must upgrade to RWX, not collapse to PAGE_READWRITE — \
1419 silently dropping the exec bit would fault on instruction fetch",
1420 );
1421 }
1422}