Skip to main content

forge_alloc/backing/
mmap.rs

1//! `MmapBacked` — OS-managed anonymous memory region.
2//!
3//! Linux/macOS: `mmap(MAP_ANONYMOUS | MAP_PRIVATE)`.
4//! Windows: `VirtualAlloc(MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE)`.
5//!
6//! On drop, the region is returned to the OS (`munmap` / `VirtualFree`). The
7//! region is laid out bump-style — each `allocate` advances a cursor through
8//! the mapping; `deallocate` is a no-op. Higher layers (`BumpArena`, `Slab`)
9//! impose their own structure on what they receive.
10//!
11//! # See also
12//!
13//! [`HeapBytes`](crate::HeapBytes) is the heap twin of `MmapBacked`'s
14//! region-ownership half: a `FixedRange`-only owner of a single
15//! global-allocator block, no syscalls. Use `HeapBytes` (under a
16//! `BumpArena`) when you need a contiguous bounded region but the
17//! mmap-level isolation (separate VM area, guard-page potential) isn't
18//! worth the ~10-50 µs `mmap` / `VirtualAlloc` cost.
19
20use core::cell::UnsafeCell;
21use core::ptr::NonNull;
22
23use forge_alloc_core::{
24    AllocError, Allocator, Deallocator, FixedRange, NonZeroLayout, OsBacked, ProtectFlags,
25};
26
27std::thread_local! {
28    /// Last OS error captured from a failing mmap-layer syscall on this thread.
29    /// Set immediately after a failure; read via [`mmap_last_os_error`].
30    ///
31    /// Stored as a raw error code (errno on Unix, `GetLastError` value on
32    /// Windows). `None` means no failure has been recorded on this thread yet,
33    /// or the slot was explicitly cleared.
34    static LAST_OS_ERROR: core::cell::Cell<Option<i32>> = const { core::cell::Cell::new(None) };
35}
36
37/// Record the most recent failing-syscall error from the platform's
38/// thread-local errno / GetLastError into this module's slot. Must be
39/// called *immediately* after the failing syscall, before any other libc /
40/// Win32 call can clobber the underlying thread-local.
41#[inline]
42fn capture_os_error() {
43    let raw = std::io::Error::last_os_error().raw_os_error();
44    LAST_OS_ERROR.with(|c| c.set(raw));
45}
46
47/// Return the most recent failing-syscall error captured into this
48/// module's slot on the current thread, or `None` if none has been
49/// recorded since thread start or [`mmap_clear_last_os_error`] was last
50/// called.
51///
52/// Code is platform-specific (errno on Unix, `GetLastError` on Windows).
53/// Read this *immediately* after a `MmapBacked` constructor or OS-call
54/// returns an error — subsequent libc/Win32 calls in other crates may
55/// overwrite the platform's underlying thread-local. The snapshot in
56/// THIS module is only updated when (a) a syscall inside `MmapBacked`
57/// fails, (b) a pre-syscall validation path in `MmapBacked::with_flags`
58/// rejects its argument (synthetic `EINVAL`), or (c) a composing crate
59/// pushes its own errno via [`mmap_record_os_error`].
60#[must_use]
61pub fn mmap_last_os_error() -> Option<std::io::Error> {
62    LAST_OS_ERROR
63        .with(|c| c.get())
64        .map(std::io::Error::from_raw_os_error)
65}
66
67/// Clear the per-thread last-error slot. Mainly useful in tests; callers
68/// in production typically just read [`mmap_last_os_error`] after a
69/// failure.
70pub fn mmap_clear_last_os_error() {
71    LAST_OS_ERROR.with(|c| c.set(None));
72}
73
74/// Record the *current* platform errno / GetLastError into the per-thread
75/// last-error slot from an external crate. Use this immediately after a
76/// failing syscall (e.g. `mbind`, `madvise`, `pthread_*`) in a crate that
77/// composes with `MmapBacked` so callers can read a single
78/// [`mmap_last_os_error`] regardless of which layer's syscall failed.
79///
80/// # Ordering contract
81///
82/// Must be called **immediately after the failing syscall returns** and
83/// **before any other libc / Win32 call**. The platform thread-local
84/// that backs `std::io::Error::last_os_error()` (errno on Unix,
85/// `GetLastError` on Windows) is volatile — any subsequent call (even a
86/// no-failure one — e.g. an allocator's bookkeeping `free` or a logging
87/// `write`) may clobber it before this function gets a chance to read
88/// the failing code.
89///
90/// # Thread safety
91///
92/// The slot is thread-local. Concurrent calls from different threads
93/// touch disjoint storage and cannot race; concurrent calls within the
94/// same thread are impossible (single-threaded execution within a
95/// thread). Each thread sees the most recent error captured *on that
96/// thread*, regardless of which crate captured it.
97#[inline]
98pub fn mmap_record_os_error() {
99    capture_os_error();
100}
101
102/// Record a synthetic `EINVAL` into the per-thread last-error slot. Used
103/// when a [`MmapBacked`] constructor rejects its argument (size==0, page-
104/// rounding overflow) without invoking the kernel — otherwise callers
105/// reading [`mmap_last_os_error`] would see whatever stale value a prior
106/// syscall failure left on this thread, or `None`, both of which are
107/// misleading. EINVAL is the universal "validation failed" signal on
108/// Unix (errno 22) and Windows (`ERROR_INVALID_PARAMETER`, 87).
109///
110/// `pub(super)` so the sibling `huge_page_backed` module can reuse the
111/// same per-thread slot for its own pre-syscall rejections without
112/// round-tripping through the platform's `errno` / `GetLastError`
113/// thread-local (which any allocator call in between could clobber).
114#[inline]
115pub(super) fn capture_synthetic_einval() {
116    #[cfg(unix)]
117    let code: i32 = libc::EINVAL;
118    #[cfg(windows)]
119    // `ERROR_INVALID_PARAMETER` is defined as `u32` in `windows-sys`; the
120    // crate-wide slot stores `i32` to match `std::io::Error::raw_os_error`,
121    // so cast at the boundary. The value (87) is within `i32::MAX` and
122    // round-trips through `Error::from_raw_os_error` without loss.
123    let code: i32 = windows_sys::Win32::Foundation::ERROR_INVALID_PARAMETER as i32;
124    LAST_OS_ERROR.with(|c| c.set(Some(code)));
125}
126
127/// Write a raw OS error code into the per-thread last-error slot. Used by
128/// composing crates that need to preserve a previously captured error across
129/// a Drop (e.g. `LockedMmapBacked` saving the lock errno before the inner
130/// `MmapBacked` drops and potentially overwrites it with a munmap error).
131///
132/// Mirrors [`capture_synthetic_einval`]: both write directly to `LAST_OS_ERROR`
133/// with a caller-supplied `i32` value.
134#[inline]
135pub(crate) fn mmap_set_last_os_error(code: i32) {
136    LAST_OS_ERROR.with(|c| c.set(Some(code)));
137}
138
139/// Optional flags for [`MmapBacked::with_flags`].
140///
141/// Most flags route to features not yet implemented (`HugePageAligned`,
142/// `NumaLocal`). Currently only `populate` is honored on platforms that support
143/// it; the rest are accepted for forward compatibility but currently no-op.
144///
145/// `#[non_exhaustive]` so future bits (`MAP_NORESERVE`, MTE enable) can be
146/// added without an API break.
147///
148/// **Note on `MAP_LOCKED` / `mlock`:** page-locking for cryptographic secrets
149/// is available as a separate backing type — [`LockedMmapBacked`] — rather
150/// than as a flag here. Using a distinct type enforces the fail-closed
151/// guarantee (no silent unlocked fallback) at the type level and makes the
152/// security intent visible in caller code.
153///
154/// [`LockedMmapBacked`]: crate::LockedMmapBacked
155#[derive(Copy, Clone, Debug, PartialEq, Eq)]
156#[non_exhaustive]
157pub struct MmapFlags {
158    /// Request transparent / explicit huge pages. Implemented via
159    /// `HugePageAligned`; ignored at this layer.
160    pub huge_pages: bool,
161    /// Fault all pages at allocation time so subsequent accesses don't take
162    /// page-fault latency.
163    ///
164    /// **Platform support is asymmetric** — setting this to `true` does
165    /// not guarantee eager paging on every platform:
166    ///
167    /// - **Linux**: maps to `MAP_POPULATE`. Kernel walks the page tables
168    ///   at `mmap` time so subsequent accesses don't fault.
169    /// - **macOS / BSD**: **silently ignored**. There is no portable
170    ///   equivalent that operates at `mmap` time; eager paging on Darwin
171    ///   requires `madvise(MADV_WILLNEED)` over the region after mapping,
172    ///   which `MmapBacked` does not currently perform.
173    /// - **Windows**: **silently ignored** — but *not* because eager
174    ///   paging already happened. `VirtualAlloc(MEM_RESERVE|MEM_COMMIT)`
175    ///   charges the full region against the system commit limit at
176    ///   construction, yet does **not** make pages resident: the working
177    ///   set is still populated lazily by demand-zero faults on first
178    ///   access, exactly as on Unix. So the per-page fault latency this
179    ///   flag targets is unchanged on Windows; there is nothing for it to
180    ///   populate eagerly. The real Windows-specific divergence is a cost,
181    ///   not a benefit: because Windows does not overcommit, that commit
182    ///   charge is reserved whether or not the pages are ever touched, so a
183    ///   large `MmapBacked` whose consumer writes only a fraction of it
184    ///   (e.g. a bump arena sized for headroom) still consumes its full
185    ///   size of commit budget — and the mapping fails at construction once
186    ///   the cumulative charge exceeds the limit, even with physical RAM
187    ///   free. Opt out of the up-front charge with [`lazy_commit`](Self::lazy_commit)
188    ///   (reserve-only + incremental `MEM_COMMIT` driven by
189    ///   [`FixedRange::commit`] as a [`BumpArena`](crate::BumpArena) cursor
190    ///   advances).
191    ///
192    /// Use [`Self::populate_supported`] to test at runtime whether
193    /// setting this flag will have any effect, or branch on `cfg!` in
194    /// caller code.
195    pub populate: bool,
196    /// Bind to a specific NUMA node. Implemented via `NumaLocal`; ignored
197    /// at this layer.
198    pub numa_node: Option<u32>,
199    /// Append one unmapped guard page after the region. Implemented via
200    /// `GuardPage`; ignored at this layer.
201    pub guard_at_end: bool,
202    /// Reserve the address range without committing it up front, leaving
203    /// per-page commit to [`FixedRange::commit`] as a consumer's cursor
204    /// advances.
205    ///
206    /// **Windows-only effect.** On Windows, `VirtualAlloc(MEM_RESERVE)`
207    /// reserves address space without charging the system commit limit;
208    /// pages are committed lazily via `commit`. On Unix this flag is
209    /// inert — `mmap(MAP_ANONYMOUS|MAP_PRIVATE)` is already demand-paged,
210    /// so the region is created exactly as without the flag and `commit`
211    /// is a no-op.
212    ///
213    /// # Safety / usage contract
214    ///
215    /// A `lazy_commit` mapping hands back **reserved-but-uncommitted**
216    /// pages on Windows; writing one before [`FixedRange::commit`] has
217    /// committed it faults (access violation). Supported consumers commit
218    /// before any write reaches the page:
219    ///
220    /// - [`BumpArena`] / [`StackAlloc`](crate::StackAlloc) commit each block
221    ///   as the cursor advances — true demand-commit, the intended use.
222    /// - A pass-through `FixedRange` wrapper interposed between the arena and
223    ///   the mapping (`Statistics`, `PoisonOnFree`, `Quarantine`,
224    ///   `Watermark`, `Canary`, `CacheJitter`, `Faulty`, `HugePageAligned`,
225    ///   `NumaLocal`, `SplitMetadata`) forwards `commit`, so it stays safe.
226    /// - `Slab`, `SizeClassed`, and direct [`Allocator::allocate`] carve via
227    ///   `allocate`, which commits the block up front — safe, but commits
228    ///   *eagerly* (no demand-paging benefit on this path).
229    ///
230    /// Two consumers still fault and are **unsupported** over a lazy mapping
231    /// — use [`MmapBacked::new`] (eager) for them:
232    ///
233    /// - `SharedBumpArena` — it is `Sync` and would race the `!Sync` commit
234    ///   watermark, so it deliberately does not call `commit`.
235    /// - `GuardPage` — its usable range starts past a guard page and its
236    ///   inner bound is only `OsBacked`, so it has no `commit` to forward.
237    ///
238    /// [`BumpArena`]: crate::BumpArena
239    /// [`FixedRange::commit`]: forge_alloc_core::FixedRange::commit
240    pub lazy_commit: bool,
241}
242
243impl MmapFlags {
244    /// Empty flag set — equivalent to [`MmapBacked::new`].
245    pub const NONE: Self = Self {
246        huge_pages: false,
247        populate: false,
248        numa_node: None,
249        guard_at_end: false,
250        lazy_commit: false,
251    };
252
253    /// Returns `true` if `populate: true` will actually be honored on the
254    /// current platform — `true` on Linux, `false` on macOS / BSD /
255    /// Windows. Allows callers to branch on whether the eager-paging
256    /// performance hint is meaningful without resorting to `cfg!` checks
257    /// scattered through application code.
258    #[inline]
259    pub const fn populate_supported() -> bool {
260        cfg!(target_os = "linux")
261    }
262}
263
264impl Default for MmapFlags {
265    fn default() -> Self {
266        Self::NONE
267    }
268}
269
270/// OS-mapped anonymous region.
271///
272/// `len` is rounded up to a multiple of the page size at construction. The
273/// `Allocator` impl serves requests bump-style from the mapping.
274///
275/// # Thread safety
276///
277/// `Send`: yes — the mapping is identified by `(ptr, len)`, both `Send`-safe
278/// values; we restore `Send` via an `unsafe impl` since `NonNull<u8>` is
279/// `!Send` by default.
280/// `Sync`: NO. The cursor uses `UnsafeCell` for `&self` allocation; concurrent
281/// `&self` allocators would race. `UnsafeCell` is `!Sync`, which gives us the
282/// right behavior without any extra marker field. Cross-thread allocation
283/// belongs to higher layers (`SharedBumpArena`, `SlabRemote`).
284pub struct MmapBacked {
285    ptr: NonNull<u8>,
286    len: usize,
287    cursor: UnsafeCell<usize>,
288    /// Page-aligned high-water mark of committed bytes from `ptr`, in
289    /// `[0, len]`. Everything in `[ptr, ptr + committed)` is committed and
290    /// writable. Only consulted on Windows: eager mappings initialise it
291    /// to `len` (whole region committed at construction) so `commit` is a
292    /// cheap watermark hit; `lazy_commit` mappings start at `0` and grow it
293    /// one `VirtualAlloc(MEM_COMMIT)` per page-crossing as [`commit`] runs.
294    /// `UnsafeCell` (not atomic) because `MmapBacked` is `!Sync` — the
295    /// commit-aware single-writer consumers ([`BumpArena`] / `StackAlloc`,
296    /// and `MmapBacked::allocate` itself) hold exclusive access.
297    ///
298    /// [`commit`]: forge_alloc_core::FixedRange::commit
299    /// [`BumpArena`]: crate::BumpArena
300    committed: UnsafeCell<usize>,
301}
302
303impl MmapBacked {
304    /// Allocate an anonymous OS-mapped region of at least `size` bytes (rounded
305    /// up to the page size).
306    pub fn new(size: usize) -> Result<Self, AllocError> {
307        Self::with_flags(size, MmapFlags::NONE)
308    }
309
310    /// Allocate with huge-pages requested. This layer ignores the hint;
311    /// `HugePageAligned` enforces 2 MiB / 32 MiB alignment.
312    pub fn with_huge_pages(size: usize) -> Result<Self, AllocError> {
313        Self::with_flags(
314            size,
315            MmapFlags {
316                huge_pages: true,
317                ..MmapFlags::NONE
318            },
319        )
320    }
321
322    /// Reserve an anonymous OS-mapped region of at least `size` bytes
323    /// without committing it up front (see [`MmapFlags::lazy_commit`]).
324    ///
325    /// **Windows-only effect.** On Windows the region is `MEM_RESERVE`-only
326    /// and consumes no commit charge until [`FixedRange::commit`] commits
327    /// pages on demand; on Unix this is identical to [`new`](Self::new)
328    /// because `mmap` is already demand-paged.
329    ///
330    /// # Safety / usage contract
331    ///
332    /// The returned region hands back reserved-but-uncommitted pages on
333    /// Windows. It is safe under [`BumpArena`] / [`StackAlloc`](crate::StackAlloc)
334    /// (true demand-commit), under any pass-through `FixedRange` wrapper over
335    /// those, and under `Slab` / `SizeClassed` / direct
336    /// [`Allocator::allocate`] (safe, but committed eagerly). It faults under
337    /// `SharedBumpArena` and `GuardPage`. See [`MmapFlags::lazy_commit`] for
338    /// the full contract.
339    ///
340    /// [`FixedRange::commit`]: forge_alloc_core::FixedRange::commit
341    /// [`BumpArena`]: crate::BumpArena
342    pub fn new_lazy(size: usize) -> Result<Self, AllocError> {
343        Self::with_flags(
344            size,
345            MmapFlags {
346                lazy_commit: true,
347                ..MmapFlags::NONE
348            },
349        )
350    }
351
352    /// Allocate with the supplied [`MmapFlags`].
353    pub fn with_flags(size: usize, flags: MmapFlags) -> Result<Self, AllocError> {
354        if size == 0 {
355            // Pre-syscall rejection: record a synthetic EINVAL so
356            // `mmap_last_os_error()` callers see an honest diagnostic
357            // rather than whatever stale value lingers from a prior
358            // failure on this thread.
359            capture_synthetic_einval();
360            return Err(AllocError);
361        }
362        let page = page_size();
363        let len = match size.checked_add(page - 1).map(|s| s & !(page - 1)) {
364            Some(l) => l,
365            None => {
366                capture_synthetic_einval();
367                return Err(AllocError);
368            }
369        };
370        // SAFETY: platform-specific os_map enforces its own invariants and
371        // returns a non-null pointer to `len` writable bytes on success.
372        let ptr = unsafe { os_map(len, &flags)? };
373        // Eager mappings have the whole region committed at construction, so
374        // the watermark starts at `len` and `commit` always hits it. Lazy
375        // mappings (Windows MEM_RESERVE) start uncommitted at `0`. On Unix
376        // the watermark is never consulted (mmap is demand-paged; `commit`
377        // is a no-op), so `len` is a harmless default there regardless.
378        let committed = if cfg!(windows) && flags.lazy_commit {
379            0
380        } else {
381            len
382        };
383        Ok(Self {
384            ptr,
385            len,
386            cursor: UnsafeCell::new(0),
387            committed: UnsafeCell::new(committed),
388        })
389    }
390
391    /// Bytes already allocated from this backing.
392    #[inline]
393    pub fn allocated(&self) -> usize {
394        // SAFETY: !Sync — no concurrent access to cursor.
395        unsafe { *self.cursor.get() }
396    }
397
398    /// Total size of the OS-mapped region (page-aligned).
399    #[inline]
400    pub const fn capacity(&self) -> usize {
401        self.len
402    }
403
404    /// Bytes remaining for allocation.
405    #[inline]
406    pub fn remaining(&self) -> usize {
407        self.len - self.allocated()
408    }
409}
410
411impl Drop for MmapBacked {
412    fn drop(&mut self) {
413        // SAFETY: ptr/len pair came from os_map on construction; no copies of
414        // either escape this struct (no Clone impl). Caller of MmapBacked has
415        // by contract guaranteed no outstanding pointers into the region at
416        // drop time (the same caller-discipline that BumpArena::reset requires).
417        unsafe { os_unmap(self.ptr, self.len) };
418    }
419}
420
421unsafe impl Deallocator for MmapBacked {
422    #[inline]
423    unsafe fn deallocate(&self, _ptr: NonNull<u8>, _layout: NonZeroLayout) {
424        // No-op. Bump-style; reclaim via drop.
425    }
426}
427
428unsafe impl Allocator for MmapBacked {
429    fn allocate(&self, layout: NonZeroLayout) -> Result<NonNull<[u8]>, AllocError> {
430        let align = layout.align().get();
431        let size = layout.size().get();
432        // SAFETY: !Sync — no concurrent access to cursor.
433        unsafe {
434            let cursor_ptr = self.cursor.get();
435            let cur = *cursor_ptr;
436            let base = self.ptr.as_ptr() as usize;
437            let next = base
438                .checked_add(cur)
439                .and_then(|v| v.checked_add(align - 1))
440                .ok_or(AllocError)?
441                & !(align - 1);
442            let aligned_off = next - base;
443            let end_off = aligned_off.checked_add(size).ok_or(AllocError)?;
444            if end_off > self.len {
445                return Err(AllocError);
446            }
447            // Commit the block before handing it out, so consumers that
448            // carve a region via `allocate` and then write it by raw offset
449            // (`Slab`, `SizeClassed`, direct callers) are safe on a
450            // `lazy_commit` mapping instead of faulting — they degrade to
451            // commit-at-allocate (effectively eager) rather than demand-
452            // paged. No-op for an eager mapping (watermark starts at `len`).
453            // Commit before publishing the cursor so a declined commit
454            // leaves the backing unchanged. `BumpArena` bypasses this path
455            // (it writes via `base()+offset` and drives `commit` itself), so
456            // there is no double-commit.
457            os_commit(self.ptr, self.len, &self.committed, aligned_off, size)?;
458            *cursor_ptr = end_off;
459            let p = self.ptr.as_ptr().add(aligned_off);
460            // SAFETY: aligned_off <= len, and p derives from self.ptr which
461            // is non-null; the result is non-null.
462            Ok(NonNull::slice_from_raw_parts(
463                NonNull::new_unchecked(p),
464                size,
465            ))
466        }
467    }
468
469    #[inline]
470    fn capacity_bytes(&self) -> Option<usize> {
471        Some(self.len)
472    }
473}
474
475impl FixedRange for MmapBacked {
476    #[inline]
477    fn base(&self) -> NonNull<u8> {
478        self.ptr
479    }
480
481    #[inline]
482    fn size(&self) -> usize {
483        self.len
484    }
485
486    #[inline]
487    fn commit(&self, offset: usize, len: usize) -> Result<(), AllocError> {
488        // SAFETY: !Sync — the single commit-aware consumer (BumpArena) has
489        // exclusive access to the `committed` watermark; no concurrent
490        // commit can race the UnsafeCell. This relies on the invariant that
491        // only the owning BumpArena calls `commit` (and `&self` allocators
492        // can't run concurrently on a `!Sync` type). Calling `commit`
493        // directly on a shared `&MmapBacked` while an allocator advances the
494        // watermark would violate that and race the cell — don't.
495        unsafe { os_commit(self.ptr, self.len, &self.committed, offset, len) }
496    }
497}
498
499unsafe impl OsBacked for MmapBacked {
500    #[inline]
501    fn base_ptr(&self) -> NonNull<u8> {
502        self.ptr
503    }
504
505    #[inline]
506    fn region_size(&self) -> usize {
507        self.len
508    }
509
510    /// # Caveat (shared with [`commit`](Self::commit))
511    ///
512    /// On Windows this reads the `committed` high-water mark through `&self`
513    /// under the `!Sync` single-writer contract. Do NOT call `release_pages`
514    /// on a `&MmapBacked` that is shared while an allocator (or `commit`)
515    /// advances the watermark — that races the `UnsafeCell`, exactly as
516    /// [`commit`](Self::commit)'s own caveat warns.
517    #[inline]
518    unsafe fn release_pages(&self, ptr: NonNull<u8>, size: usize) {
519        // The commit watermark is a Windows-only construct: `os_commit` is a
520        // no-op on Unix and never advances it. So the clamp below is gated to
521        // Windows rather than applied cross-platform — on Unix `madvise`
522        // tolerates untouched pages, so the full range is always safe and a
523        // clamp would be dead code that could silently under-release if a
524        // future Unix lazy path ever set `committed < len`.
525        #[cfg(windows)]
526        {
527            // On a `lazy_commit` mapping, pages past the high-water mark are
528            // reserved-but-uncommitted, and `VirtualAlloc(MEM_RESET)` rejects
529            // uncommitted pages with ERROR_INVALID_PARAMETER — a silent no-op
530            // that also leaves a stale error on the `mmap_last_os_error` probe.
531            // Clamp the release range to the committed prefix so the reset only
532            // ever touches committed pages. For eager mappings `committed ==
533            // len`, so this is a no-op.
534            //
535            // SAFETY: `!Sync` single-writer — exclusive access to the watermark
536            // (see the method caveat above), the same contract `commit` relies on.
537            let committed = unsafe { *self.committed.get() };
538            let off = (ptr.as_ptr() as usize).saturating_sub(self.ptr.as_ptr() as usize);
539            let clamped = off.saturating_add(size).min(committed).saturating_sub(off);
540            if clamped == 0 {
541                return;
542            }
543            // SAFETY: caller has promised [ptr, ptr+size) lies wholly inside our
544            // region and has no live allocations; clamping only shrinks the range.
545            unsafe { os_release_pages(ptr, clamped) };
546        }
547        #[cfg(not(windows))]
548        {
549            // SAFETY: caller has promised [ptr, ptr+size) lies wholly inside our
550            // region and has no live allocations.
551            unsafe { os_release_pages(ptr, size) };
552        }
553    }
554
555    #[inline]
556    unsafe fn protect(&self, ptr: NonNull<u8>, size: usize, flags: ProtectFlags) {
557        // SAFETY: caller has promised [ptr, ptr+size) lies inside our region.
558        unsafe { os_protect(ptr, size, flags) };
559    }
560}
561
562// MmapBacked owns a `NonNull<u8>` to a non-shared OS mapping. Send is fine
563// (the mapping outlives the move because munmap is keyed on ptr+len, not on
564// thread identity). !Sync is inherited from the `UnsafeCell<usize>` cursor —
565// no extra marker field is needed.
566//
567// SAFETY: see the rationale above; no aliasing reference into the mapping
568// escapes the struct (callers receive raw `NonNull<u8>` pointers — Rust's
569// aliasing model treats those as inert, in the same way `Box<T>: Send`).
570unsafe impl Send for MmapBacked {}
571
572// ============================================================================
573// Platform glue
574// ============================================================================
575
576/// The OS memory page size in bytes — 4 KiB on most x86-64, 16 KiB on
577/// Apple Silicon. Pass this where a primitive needs a page-size argument
578/// (such as `GuardPage`) rather than hard-coding a value that is wrong
579/// on 16 KiB-page platforms.
580#[cfg(unix)]
581pub fn page_size() -> usize {
582    use core::sync::atomic::{AtomicUsize, Ordering};
583
584    // Cached for symmetry with the Windows path. `sysconf` is typically cheap
585    // on glibc/musl (resolved from the auxv at startup), but the cache makes
586    // the cost unconditionally a single relaxed load after the first call.
587    // `0` is the "not yet computed" sentinel (a real page size is always > 0);
588    // the race is benign because every first caller computes the same value.
589    static CACHED: AtomicUsize = AtomicUsize::new(0);
590    let cached = CACHED.load(Ordering::Relaxed);
591    if cached != 0 {
592        return cached;
593    }
594    // SAFETY: sysconf is async-signal-safe and always returns >= 0 for
595    // _SC_PAGESIZE on conforming Unix; we still fall back defensively when
596    // the call reports -1 (errno) so `with_flags` cannot hit `page - 1`
597    // underflow on a pathological kernel.
598    //
599    // NOTE: the 4096 fallback may undersize on 16K-page systems (Apple
600    // Silicon, some ARMv8) if sysconf ever fails — we'd round to 4K instead
601    // of 16K, then mmap would still align internally to 16K. The behavioral
602    // consequence is over-reservation at the round-up step, not unsoundness.
603    let p = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
604    // The returned value must be a power of two: the round-up masks downstream
605    // (`with_flags`, `os_commit`, `allocate`) all use `& !(page - 1)`, which is
606    // only a correct "round up to page" mask when `page` is a power of two. A
607    // pathological positive-but-non-pow2 value would pass a bare `> 0` guard yet
608    // silently round *down*, undersizing the mapping. Reject it here at the one
609    // chokepoint so every downstream mask has an enforced precondition.
610    let ps = match usize::try_from(p) {
611        Ok(p) if p > 0 && p.is_power_of_two() => p,
612        _ => 4096,
613    };
614    CACHED.store(ps, Ordering::Relaxed);
615    ps
616}
617
618/// The OS memory page size in bytes — 4 KiB on most x86-64, 16 KiB on
619/// Apple Silicon. Pass this where a primitive needs a page-size argument
620/// (such as `GuardPage`) rather than hard-coding a value that is wrong
621/// on 16 KiB-page platforms.
622#[cfg(windows)]
623pub fn page_size() -> usize {
624    use core::sync::atomic::{AtomicUsize, Ordering};
625    use windows_sys::Win32::System::SystemInformation::{GetSystemInfo, SYSTEM_INFO};
626
627    // The OS page size is a runtime constant, but `GetSystemInfo` is a real
628    // syscall-like call. `os_commit` consults `page_size()` on every
629    // demand-commit, which is per-allocation on the hot path, so cache the
630    // first result. `0` is the "not yet computed" sentinel (a real page size
631    // is always > 0). The race is benign: concurrent first callers each run
632    // `GetSystemInfo` and store the *same* value, so any interleaving leaves
633    // the cache holding the correct page size. `Relaxed` is sufficient — the
634    // stored value is a plain integer with no other state ordered against it.
635    static CACHED: AtomicUsize = AtomicUsize::new(0);
636    let cached = CACHED.load(Ordering::Relaxed);
637    if cached != 0 {
638        return cached;
639    }
640    // SAFETY: GetSystemInfo writes a fully-initialized SYSTEM_INFO into its
641    // out-pointer; we provide a stack slot of the correct size.
642    let mut info: SYSTEM_INFO = unsafe { core::mem::zeroed() };
643    unsafe { GetSystemInfo(&mut info) };
644    // `dwPageSize` is documented to be non-zero on all supported Windows
645    // editions; the explicit fallback is purely defensive so a degenerate
646    // value can never trigger `page - 1` underflow in `with_flags`.
647    // Must be a power of two: the round-up masks downstream (`with_flags`,
648    // `os_commit`, `allocate`) all use `& !(page - 1)`, which only rounds up
649    // correctly when `page` is a power of two. Reject a degenerate non-pow2
650    // value here at the one chokepoint rather than assuming it everywhere.
651    let p = info.dwPageSize as usize;
652    let ps = if p > 0 && p.is_power_of_two() {
653        p
654    } else {
655        4096
656    };
657    CACHED.store(ps, Ordering::Relaxed);
658    ps
659}
660
661#[cfg(unix)]
662unsafe fn os_map(len: usize, flags: &MmapFlags) -> Result<NonNull<u8>, AllocError> {
663    // `mut` is needed only on Linux, where the `MAP_POPULATE` branch below
664    // reassigns it; on macOS / other Unix the binding is never mutated.
665    #[cfg_attr(not(target_os = "linux"), allow(unused_mut))]
666    let mut mmap_flags = libc::MAP_ANONYMOUS | libc::MAP_PRIVATE;
667    if flags.populate {
668        // MAP_POPULATE exists on Linux; on macOS the call still succeeds but
669        // the flag is silently ignored.
670        #[cfg(target_os = "linux")]
671        {
672            mmap_flags |= libc::MAP_POPULATE;
673        }
674    }
675    // SAFETY: mmap with MAP_ANONYMOUS+MAP_PRIVATE, non-zero len, non-conflicting
676    // flags. Returns MAP_FAILED on error which we check.
677    let ptr = unsafe {
678        libc::mmap(
679            core::ptr::null_mut(),
680            len,
681            libc::PROT_READ | libc::PROT_WRITE,
682            mmap_flags,
683            -1,
684            0,
685        )
686    };
687    if ptr == libc::MAP_FAILED {
688        capture_os_error();
689        return Err(AllocError);
690    }
691    // SAFETY: mmap returned non-MAP_FAILED, so ptr is a valid non-null mapping.
692    Ok(unsafe { NonNull::new_unchecked(ptr as *mut u8) })
693}
694
695#[cfg(windows)]
696unsafe fn os_map(len: usize, flags: &MmapFlags) -> Result<NonNull<u8>, AllocError> {
697    use windows_sys::Win32::System::Memory::{
698        VirtualAlloc, MEM_COMMIT, MEM_RESERVE, PAGE_READWRITE,
699    };
700    // Eager (default): MEM_RESERVE|MEM_COMMIT charges `len` against the
701    // system commit limit up front (Windows does not overcommit), unlike
702    // the demand-paged Unix mmap above. `lazy_commit` reserves address
703    // space only (MEM_RESERVE) — no commit charge — and leaves per-page
704    // commit to `FixedRange::commit` as a BumpArena cursor advances. The
705    // reservation passes PAGE_READWRITE, but for a MEM_RESERVE-only range
706    // the protection is inert until commit; the later MEM_COMMIT supplies
707    // the real PAGE_READWRITE. Reserved-but-uncommitted pages fault on
708    // access regardless, which is the intended trap until `commit` runs.
709    let alloc_type = if flags.lazy_commit {
710        MEM_RESERVE
711    } else {
712        MEM_RESERVE | MEM_COMMIT
713    };
714    // SAFETY: VirtualAlloc with NULL base and a valid MEM_* type + PAGE_*
715    // protection is the standard anonymous-mapping pattern. Returns NULL on
716    // error.
717    let p = unsafe { VirtualAlloc(core::ptr::null_mut(), len, alloc_type, PAGE_READWRITE) };
718    let nn = NonNull::new(p as *mut u8);
719    if nn.is_none() {
720        capture_os_error();
721    }
722    nn.ok_or(AllocError)
723}
724
725#[cfg(unix)]
726unsafe fn os_commit(
727    _base: NonNull<u8>,
728    _region_len: usize,
729    _committed: &UnsafeCell<usize>,
730    _offset: usize,
731    _len: usize,
732) -> Result<(), AllocError> {
733    // mmap(MAP_ANONYMOUS|MAP_PRIVATE) is demand-paged: pages are committed
734    // (and commit-charged, under the kernel's overcommit policy) lazily on
735    // first touch with no per-page action required here. The watermark is
736    // never consulted on Unix, so `lazy_commit` is inert.
737    Ok(())
738}
739
740#[cfg(windows)]
741unsafe fn os_commit(
742    base: NonNull<u8>,
743    region_len: usize,
744    committed: &UnsafeCell<usize>,
745    offset: usize,
746    len: usize,
747) -> Result<(), AllocError> {
748    use windows_sys::Win32::System::Memory::{VirtualAlloc, MEM_COMMIT, PAGE_READWRITE};
749    // Page-round the requested end up, then clamp to the region. The clamp
750    // is defensive: the caller (BumpArena) has already bounds-checked
751    // `offset + len <= region_len` and `region_len` is page-aligned, so a
752    // valid request never exceeds the region after rounding.
753    let end = offset.checked_add(len).ok_or(AllocError)?;
754    let page = page_size();
755    let end_paged = end
756        .checked_add(page - 1)
757        .map(|v| v & !(page - 1))
758        .ok_or(AllocError)?
759        .min(region_len);
760    let committed_ptr = committed.get();
761    // SAFETY: !Sync — exclusive access to the watermark.
762    let already = unsafe { *committed_ptr };
763    if end_paged <= already {
764        // Whole requested range already committed (the common path once the
765        // cursor has walked past these pages, and the *only* path for an
766        // eager mapping whose watermark starts at `region_len`).
767        return Ok(());
768    }
769    // Commit contiguously from the current watermark up to `end_paged`,
770    // NOT from `offset`. This fills any gap between `already` and `offset`,
771    // so the committed prefix `[base, base + committed)` stays contiguous no
772    // matter what order callers request offsets in: a high `offset` eagerly
773    // commits everything below it. That is why a single high-water `usize` is
774    // a sufficient witness of committedness — it never has to track holes.
775    // (The trade-off: committing a far offset first over-commits the gap.
776    // In-tree callers — BumpArena, StackAlloc, allocate — are monotonic, so
777    // no gap is ever created in practice.)
778    // SAFETY: `already <= region_len` (watermark invariant) so the offset is
779    // in-bounds of the reserved region.
780    let commit_base = unsafe { base.as_ptr().add(already) };
781    let commit_len = end_paged - already;
782    // SAFETY: [base + already, base + end_paged) lies within the reserved
783    // region; MEM_COMMIT on an already-reserved range is the documented
784    // demand-commit pattern and is idempotent on any sub-pages already
785    // committed.
786    let p = unsafe {
787        VirtualAlloc(
788            commit_base as *mut _,
789            commit_len,
790            MEM_COMMIT,
791            PAGE_READWRITE,
792        )
793    };
794    if p.is_null() {
795        // OS declined the commit (commit limit). Leave the watermark
796        // unchanged so the range stays officially uncommitted, and surface
797        // a clean allocation failure rather than letting the caller write
798        // into a page the OS never backed.
799        capture_os_error();
800        return Err(AllocError);
801    }
802    // SAFETY: !Sync — exclusive access to the watermark.
803    unsafe { *committed_ptr = end_paged };
804    Ok(())
805}
806
807#[cfg(unix)]
808unsafe fn os_unmap(ptr: NonNull<u8>, len: usize) {
809    // SAFETY: ptr/len pair came from os_map; munmap of an active mapping is
810    // the only safe way to release it.
811    let rc = unsafe { libc::munmap(ptr.as_ptr() as *mut libc::c_void, len) };
812    if rc != 0 {
813        // Drop path can't propagate Err; record errno so callers can detect
814        // a previous unmap failure via `mmap_last_os_error()` if they choose.
815        capture_os_error();
816    }
817}
818
819#[cfg(windows)]
820unsafe fn os_unmap(ptr: NonNull<u8>, _len: usize) {
821    use windows_sys::Win32::System::Memory::{VirtualFree, MEM_RELEASE};
822    // SAFETY: VirtualFree with MEM_RELEASE expects the base pointer returned
823    // by VirtualAlloc and size = 0; that releases both the reservation and
824    // the commit. Errors are reported via thread-local; Drop can't propagate.
825    let ok = unsafe { VirtualFree(ptr.as_ptr() as *mut _, 0, MEM_RELEASE) };
826    if ok == 0 {
827        capture_os_error();
828    }
829}
830
831#[cfg(unix)]
832unsafe fn os_release_pages(ptr: NonNull<u8>, size: usize) {
833    // Choose advice by platform:
834    //
835    // Linux: MADV_DONTNEED on a private anonymous mapping immediately
836    // releases the physical pages; subsequent reads see zero-filled
837    // pages. This is the canonical "release-but-keep-vma" path.
838    //
839    // macOS: MADV_DONTNEED on a private mapping is only a hint — the
840    // kernel may ignore it. MADV_FREE (added 10.12 / macOS Sierra) is
841    // the documented path: the kernel may reclaim the pages under
842    // memory pressure, and a subsequent read sees either old data or
843    // zeros (the new contents are undefined). For "I really don't
844    // need this anymore" semantics, MADV_FREE is the right choice on
845    // macOS.
846    //
847    // Other Unix (BSD): MADV_FREE has the BSD semantics — same as
848    // macOS.
849    #[cfg(target_os = "linux")]
850    let advice = libc::MADV_DONTNEED;
851    #[cfg(not(target_os = "linux"))]
852    let advice = libc::MADV_FREE;
853    // SAFETY: ptr/size lie wholly inside our own mapping (per the
854    // OsBacked::release_pages caller contract); advice is a valid flag.
855    let rc = unsafe { libc::madvise(ptr.as_ptr() as *mut libc::c_void, size, advice) };
856    if rc != 0 {
857        capture_os_error();
858    }
859}
860
861#[cfg(windows)]
862unsafe fn os_release_pages(ptr: NonNull<u8>, size: usize) {
863    use windows_sys::Win32::System::Memory::{VirtualAlloc, MEM_RESET, PAGE_READWRITE};
864    // VirtualAlloc(MEM_RESET) operates on a page-granular range; misaligned
865    // `ptr` or `size` returns NULL with ERROR_INVALID_PARAMETER, which we
866    // surface via capture_os_error(). Debug builds assert up front so the
867    // misuse is caught in development rather than via a silent observability
868    // probe in production.
869    let page = page_size();
870    debug_assert_eq!(
871        (ptr.as_ptr() as usize) % page,
872        0,
873        "os_release_pages: ptr must be page-aligned on Windows MEM_RESET",
874    );
875    debug_assert_eq!(
876        size % page,
877        0,
878        "os_release_pages: size must be page-aligned on Windows MEM_RESET",
879    );
880    // SAFETY: VirtualAlloc with MEM_RESET on an existing region tells the OS
881    // the contents are discardable; the OS may reclaim the physical pages.
882    // The lpProtect argument is ignored for MEM_RESET but must be valid.
883    let p = unsafe { VirtualAlloc(ptr.as_ptr() as *mut _, size, MEM_RESET, PAGE_READWRITE) };
884    if p.is_null() {
885        capture_os_error();
886    }
887}
888
889/// Map forge-alloc-core's `ProtectFlags` to a Unix `mprotect` `prot` argument.
890///
891/// Unlike Windows, the Unix ABI exposes each protection bit independently
892/// (`PROT_READ`, `PROT_WRITE`, `PROT_EXEC`), so every one of the eight
893/// `(read, write, exec)` combinations maps bit-exactly to a `mprotect`
894/// argument with no over-grant or down-grade at this layer.
895///
896/// | `(read, write, exec)` | `prot`                              | Notes |
897/// |-----------------------|-------------------------------------|-------|
898/// | `(F, F, F)`           | `PROT_NONE` (== 0)                  | exact |
899/// | `(T, F, F)`           | `PROT_READ`                         | exact |
900/// | `(F, T, F)`           | `PROT_WRITE`                        | exact at the syscall ABI; some archs (older x86_64) implicitly grant read when write is set, but that's below this layer |
901/// | `(F, F, T)`           | `PROT_EXEC`                         | exact on NX-capable HW; pre-NX implicit read |
902/// | `(T, T, F)`           | `PROT_READ \| PROT_WRITE`           | exact |
903/// | `(T, F, T)`           | `PROT_READ \| PROT_EXEC`            | exact |
904/// | `(F, T, T)`           | `PROT_WRITE \| PROT_EXEC`           | exact (some kernels enforce W^X via seccomp/LSM; that's surfaced as `EINVAL` on the syscall, not silently masked here) |
905/// | `(T, T, T)`           | `PROT_READ \| PROT_WRITE \| PROT_EXEC` | exact; some hardened kernels reject and surface `EACCES`/`EINVAL` — propagated unchanged |
906///
907/// Extracted so unit tests can verify the mapping table without invoking
908/// `mprotect` on the host (the test runs cross-platform; only the table
909/// math is platform-neutral). This is the Unix structural parallel to
910/// [`win32_prot_from_flags`] — each Unix arm maps bit-exactly, unlike
911/// Win32 which cannot express write-without-read combinations natively.
912#[cfg(unix)]
913pub(super) fn unix_prot_from_flags(flags: ProtectFlags) -> i32 {
914    // PROT_NONE is 0 on every Unix; the explicit assignment in the
915    // "all-false" branch below documents intent without changing bits.
916    let mut prot = 0i32;
917    if flags.read {
918        prot |= libc::PROT_READ;
919    }
920    if flags.write {
921        prot |= libc::PROT_WRITE;
922    }
923    if flags.exec {
924        prot |= libc::PROT_EXEC;
925    }
926    if !flags.read && !flags.write && !flags.exec {
927        prot = libc::PROT_NONE;
928    }
929    prot
930}
931
932#[cfg(unix)]
933unsafe fn os_protect(ptr: NonNull<u8>, size: usize, flags: ProtectFlags) {
934    let prot = unix_prot_from_flags(flags);
935    // SAFETY: mprotect on a region we own with valid flag bits.
936    let rc = unsafe { libc::mprotect(ptr.as_ptr() as *mut libc::c_void, size, prot) };
937    if rc != 0 {
938        capture_os_error();
939    }
940}
941
942/// Map forge-alloc-core's `ProtectFlags` to a Windows `PAGE_*` constant.
943///
944/// The mapping is **bit-preserving wherever the Win32 ABI can express the
945/// combination**, and chooses the smallest valid superset otherwise.
946/// Concretely, Win32 *does* expose a true exec-only mode (`PAGE_EXECUTE`,
947/// value 16) — readers of a `PAGE_EXECUTE` page take an access violation on
948/// hardware that supports NX (every supported x86_64 / aarch64 chip). On
949/// the small set of legacy CPUs without NX, the kernel implicitly grants
950/// read access; that downgrade is unavoidable and lives below this
951/// layer. Win32 does *not* expose a write-without-read or write+exec-
952/// without-read mode, so those must be upgraded.
953///
954/// | `(read, write, exec)`     | Win32 constant            | Notes |
955/// |---------------------------|---------------------------|-------|
956/// | `(F, F, F)`               | `PAGE_NOACCESS`           | exact |
957/// | `(T, F, F)`               | `PAGE_READONLY`           | exact |
958/// | `(T, T, F)`               | `PAGE_READWRITE`          | exact |
959/// | `(T, F, T)`               | `PAGE_EXECUTE_READ`       | exact |
960/// | `(T, T, T)`               | `PAGE_EXECUTE_READWRITE`  | exact |
961/// | `(F, F, T)`               | `PAGE_EXECUTE`            | exact on NX-capable HW |
962/// | `(F, T, F)`               | `PAGE_READWRITE`          | over-grants read |
963/// | `(F, T, T)`               | `PAGE_EXECUTE_READWRITE`  | over-grants read |
964///
965/// Extracted so unit tests can verify the mapping table without triggering
966/// the debug_assert in [`os_protect`] (which fires on write-without-read,
967/// the only combination that the helper genuinely cannot express).
968#[cfg(windows)]
969pub(super) fn win32_prot_from_flags(flags: ProtectFlags) -> u32 {
970    use windows_sys::Win32::System::Memory::{
971        PAGE_EXECUTE, PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE, PAGE_NOACCESS, PAGE_READONLY,
972        PAGE_READWRITE,
973    };
974    match (flags.read, flags.write, flags.exec) {
975        (false, false, false) => PAGE_NOACCESS,
976        (true, false, false) => PAGE_READONLY,
977        (true, true, false) => PAGE_READWRITE,
978        (true, false, true) => PAGE_EXECUTE_READ,
979        (true, true, true) => PAGE_EXECUTE_READWRITE,
980        // Exec-only: Windows DOES support this natively via PAGE_EXECUTE.
981        // On NX-capable hardware (every modern x64 / aarch64 chip) this is
982        // exec-only; on legacy NX-less CPUs the kernel implicitly grants
983        // read, which mirrors `mprotect(PROT_EXEC)` on the same hardware.
984        // Mapping it to PAGE_EXECUTE_READ here would *unconditionally*
985        // over-grant on every machine; using PAGE_EXECUTE only over-grants
986        // on the legacy ones — strictly tighter.
987        (false, false, true) => PAGE_EXECUTE,
988        // Write-or-exec with write but without read: Windows has no
989        // primitive for "write but not read", so upgrade to the smallest
990        // valid superset that retains every bit the caller asked for.
991        // Crucially, (false, true, true) must route to
992        // PAGE_EXECUTE_READWRITE — collapsing it to PAGE_READWRITE would
993        // silently drop the exec bit. The
994        // debug_assert in os_protect surfaces these over-grants in dev.
995        (false, true, true) => PAGE_EXECUTE_READWRITE,
996        (false, true, false) => PAGE_READWRITE,
997    }
998}
999
1000#[cfg(windows)]
1001unsafe fn os_protect(ptr: NonNull<u8>, size: usize, flags: ProtectFlags) {
1002    use windows_sys::Win32::System::Memory::VirtualProtect;
1003    // Of all eight `(read, write, exec)` combinations, the only ones
1004    // Win32 cannot express bit-exactly are write-without-read variants:
1005    // `(F, T, F)` and `(F, T, T)` — Windows has no primitive for "write
1006    // but not read", so `win32_prot_from_flags` upgrades them to
1007    // `PAGE_READWRITE` / `PAGE_EXECUTE_READWRITE`. Every other
1008    // combination (including exec-only via `PAGE_EXECUTE`) maps exactly
1009    // on NX-capable hardware. A debug-build assertion flags the unavoidable
1010    // upgrade so misuse during development surfaces in tests:
1011    debug_assert!(
1012        !flags.write || flags.read,
1013        "os_protect: write-without-read upgrades to RW/RWX on Windows — \
1014         caller relying on no-read semantics will not get them. Set flags.read=true \
1015         explicitly to silence this assertion.",
1016    );
1017    let prot = win32_prot_from_flags(flags);
1018    let mut old: u32 = 0;
1019    // SAFETY: VirtualProtect on a region returned by VirtualAlloc with valid
1020    // PAGE_* protection constants.
1021    let ok = unsafe { VirtualProtect(ptr.as_ptr() as *mut _, size, prot, &mut old) };
1022    if ok == 0 {
1023        capture_os_error();
1024    }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029    use super::*;
1030
1031    // Every test in this module exercises real OS mmap / VirtualAlloc paths.
1032    // Miri cannot model `mmap` / `VirtualAlloc` syscalls, so the entire
1033    // module is gated off under miri. The underlying invariants the tests
1034    // protect (page rounding, alignment, capacity, OS-error capture) are
1035    // unaffected by Miri's interpretation model — Miri's job here is to
1036    // detect UB in the *consumers* of MmapBacked (Slab / Bump / etc.) when
1037    // they're driven over InlineBacked.
1038
1039    #[test]
1040    #[cfg_attr(miri, ignore = "miri can't shim Win32 GetSystemInfo / sysconf")]
1041    fn page_size_is_reasonable() {
1042        let p = page_size();
1043        assert!(p >= 4096, "page size suspiciously small: {p}");
1044        assert!(p.is_power_of_two());
1045    }
1046
1047    #[test]
1048    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1049    fn alloc_then_write_then_read_back() {
1050        let m = MmapBacked::new(16 * 1024).expect("mmap should succeed for 16 KiB");
1051        let layout = NonZeroLayout::from_size_align(256, 8).unwrap();
1052        let block = m.allocate(layout).unwrap();
1053        let p = block.cast::<u8>();
1054        unsafe {
1055            core::ptr::write_bytes(p.as_ptr(), 0xCD, 256);
1056            for i in 0..256 {
1057                assert_eq!(*p.as_ptr().add(i), 0xCD);
1058            }
1059        }
1060    }
1061
1062    #[test]
1063    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1064    fn alloc_returns_aligned_pointer() {
1065        let m = MmapBacked::new(64 * 1024).unwrap();
1066        // First, push the cursor off zero with an odd-size allocation.
1067        let _ = m
1068            .allocate(NonZeroLayout::from_size_align(3, 1).unwrap())
1069            .unwrap();
1070        let layout = NonZeroLayout::from_size_align(64, 64).unwrap();
1071        let block = m.allocate(layout).unwrap();
1072        assert_eq!(block.cast::<u8>().as_ptr() as usize % 64, 0);
1073    }
1074
1075    #[test]
1076    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1077    fn alloc_fails_when_exhausted() {
1078        let m = MmapBacked::new(8 * 1024).unwrap();
1079        let cap = m.capacity();
1080        let layout = NonZeroLayout::from_size_align(cap, 8).unwrap();
1081        let _ = m.allocate(layout).unwrap();
1082        assert!(m
1083            .allocate(NonZeroLayout::from_size_align(1, 1).unwrap())
1084            .is_err());
1085    }
1086
1087    #[test]
1088    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1089    fn fixed_range_contains_allocations() {
1090        let m = MmapBacked::new(8 * 1024).unwrap();
1091        let layout = NonZeroLayout::from_size_align(64, 8).unwrap();
1092        let block = m.allocate(layout).unwrap();
1093        assert!(m.contains(block.cast::<u8>()));
1094    }
1095
1096    #[test]
1097    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1098    fn capacity_is_page_rounded() {
1099        let m = MmapBacked::new(1).unwrap();
1100        let cap = m.capacity();
1101        let page = page_size();
1102        assert_eq!(cap % page, 0);
1103        assert!(cap >= page);
1104    }
1105
1106    #[test]
1107    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1108    fn zero_size_request_errors() {
1109        assert!(MmapBacked::new(0).is_err());
1110    }
1111
1112    #[test]
1113    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1114    fn pre_syscall_rejection_sets_synthetic_einval() {
1115        // Pre-syscall failure paths (size==0, page-rounding overflow) must
1116        // populate the thread-local last-error slot with EINVAL rather
1117        // than leaving stale data from prior failures. Without this,
1118        // mmap_last_os_error() would silently lie about what just failed.
1119        mmap_clear_last_os_error();
1120        assert!(MmapBacked::new(0).is_err());
1121        let e = mmap_last_os_error().expect("synthetic EINVAL captured");
1122        #[cfg(unix)]
1123        assert_eq!(e.raw_os_error(), Some(libc::EINVAL));
1124        #[cfg(windows)]
1125        assert_eq!(
1126            e.raw_os_error(),
1127            Some(windows_sys::Win32::Foundation::ERROR_INVALID_PARAMETER as i32),
1128        );
1129
1130        // Overflow path: size + (page-1) wraps.
1131        mmap_clear_last_os_error();
1132        assert!(MmapBacked::new(usize::MAX).is_err());
1133        let e = mmap_last_os_error().expect("synthetic EINVAL on overflow");
1134        #[cfg(unix)]
1135        assert_eq!(e.raw_os_error(), Some(libc::EINVAL));
1136        #[cfg(windows)]
1137        assert_eq!(
1138            e.raw_os_error(),
1139            Some(windows_sys::Win32::Foundation::ERROR_INVALID_PARAMETER as i32),
1140        );
1141    }
1142
1143    #[test]
1144    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1145    fn last_os_error_captured_on_failure() {
1146        // Request an impossibly large mapping — both unix and Windows should
1147        // reject and set their thread-local error. We can't predict the exact
1148        // code (ENOMEM, EINVAL, EOVERFLOW, ERROR_NOT_ENOUGH_MEMORY, …) so we
1149        // only assert that *something* was captured.
1150        mmap_clear_last_os_error();
1151        assert!(mmap_last_os_error().is_none());
1152        // usize::MAX/2 rounds to usize::MAX-(page-1) which exceeds any
1153        // realistic address space, forcing a syscall failure.
1154        let huge = usize::MAX / 2;
1155        assert!(MmapBacked::new(huge).is_err());
1156        assert!(
1157            mmap_last_os_error().is_some(),
1158            "expected captured OS error after impossibly large mmap request",
1159        );
1160    }
1161
1162    #[test]
1163    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1164    fn os_backed_release_pages_after_alloc() {
1165        let m = MmapBacked::new(64 * 1024).unwrap();
1166        let p = m.base_ptr();
1167        // Write something, release, write again — must not crash.
1168        unsafe {
1169            core::ptr::write_bytes(p.as_ptr(), 0xEE, page_size());
1170            m.release_pages(p, page_size());
1171            core::ptr::write_bytes(p.as_ptr(), 0x11, page_size());
1172        }
1173    }
1174
1175    #[test]
1176    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1177    fn lazy_commit_then_write_round_trips() {
1178        // `new_lazy` reserves on Windows (no commit charge) and is identical
1179        // to `new` on Unix. After `commit`, the range must be writable on
1180        // every platform.
1181        let m = MmapBacked::new_lazy(64 * 1024).expect("lazy reserve should succeed");
1182        let len = page_size();
1183        m.commit(0, len)
1184            .expect("commit of a reserved range should succeed");
1185        let base = m.base().as_ptr();
1186        unsafe {
1187            core::ptr::write_bytes(base, 0xAB, len);
1188            assert_eq!(*base, 0xAB);
1189            assert_eq!(*base.add(len - 1), 0xAB);
1190        }
1191    }
1192
1193    #[test]
1194    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1195    fn commit_is_idempotent_and_monotonic() {
1196        let m = MmapBacked::new_lazy(64 * 1024).unwrap();
1197        let page = page_size();
1198        // Re-committing the same range and a sub-range is a no-op success.
1199        m.commit(0, page).unwrap();
1200        m.commit(0, page).unwrap();
1201        m.commit(0, 1).unwrap();
1202        // Extend the watermark forward by one page.
1203        m.commit(page, page).unwrap();
1204        // A range already below the watermark stays Ok without a syscall.
1205        m.commit(0, 2 * page).unwrap();
1206    }
1207
1208    #[test]
1209    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1210    fn eager_mapping_commit_is_noop() {
1211        // A default (eager) mapping has the whole region committed at
1212        // construction; the watermark starts at `len`, so `commit` succeeds
1213        // for any in-region range as a pure watermark hit.
1214        let m = MmapBacked::new(16 * 1024).unwrap();
1215        m.commit(0, 16 * 1024).unwrap();
1216        m.commit(page_size(), page_size()).unwrap();
1217    }
1218
1219    #[test]
1220    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1221    fn bump_arena_over_lazy_mmap_commits_on_alloc() {
1222        use crate::BumpArena;
1223        // BumpArena is the commit-aware consumer: each allocate commits the
1224        // block before returning it, so writing through every returned
1225        // pointer is sound even though the backing was only reserved.
1226        let arena = BumpArena::new(MmapBacked::new_lazy(256 * 1024).unwrap()).unwrap();
1227        let layout = NonZeroLayout::from_size_align(page_size(), 8).unwrap();
1228        for _ in 0..16 {
1229            let block = arena.allocate(layout).unwrap();
1230            let p = block.cast::<u8>().as_ptr();
1231            unsafe {
1232                core::ptr::write_bytes(p, 0xCD, page_size());
1233                assert_eq!(*p, 0xCD);
1234                assert_eq!(*p.add(page_size() - 1), 0xCD);
1235            }
1236        }
1237    }
1238
1239    #[test]
1240    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1241    fn bump_over_passthrough_wrapper_over_lazy_mmap_commits() {
1242        use crate::BumpArena;
1243        use crate::Statistics;
1244        // A pass-through FixedRange wrapper (Statistics) interposed between
1245        // BumpArena and a lazy mapping must forward `commit`, so writes are
1246        // still committed and don't fault.
1247        let arena =
1248            BumpArena::new(Statistics::new(MmapBacked::new_lazy(256 * 1024).unwrap())).unwrap();
1249        let layout = NonZeroLayout::from_size_align(page_size(), 8).unwrap();
1250        for _ in 0..8 {
1251            let block = arena.allocate(layout).unwrap();
1252            let p = block.cast::<u8>().as_ptr();
1253            unsafe {
1254                core::ptr::write_bytes(p, 0x5A, page_size());
1255                assert_eq!(*p.add(page_size() - 1), 0x5A);
1256            }
1257        }
1258    }
1259
1260    #[test]
1261    #[cfg_attr(miri, ignore = "miri can't shim mmap / VirtualAlloc")]
1262    fn slab_over_lazy_mmap_commits_via_allocate() {
1263        use crate::Slab;
1264        // Slab carves its region via `backing.allocate()`, which commits the
1265        // block up front on a lazy mapping (fix #2), so the slot writes that
1266        // follow don't fault.
1267        let s: Slab<u64, MmapBacked> =
1268            Slab::new(1024, MmapBacked::new_lazy(1 << 20).unwrap()).unwrap();
1269        let layout = NonZeroLayout::for_type::<u64>().unwrap();
1270        let p = s.allocate(layout).unwrap();
1271        unsafe {
1272            p.cast::<u64>().as_ptr().write(0xDEAD_BEEF);
1273            assert_eq!(p.cast::<u64>().as_ptr().read(), 0xDEAD_BEEF);
1274            s.deallocate(p.cast(), layout);
1275        }
1276    }
1277
1278    /// Structural parallel to the Windows
1279    /// `win32_prot_from_flags_preserves_every_requested_bit` regression
1280    /// test: confirm that every `(read, write, exec)` combination on Unix
1281    /// produces the corresponding bit-exact `PROT_*` mask with no over-
1282    /// grant (no spurious read added to exec-only) and no down-grade
1283    /// (W+X must not collapse to W). Unlike Win32 — which lacks primitives
1284    /// for write-without-read and exec-only — Unix `mprotect` exposes each
1285    /// bit independently, so the table is exact across all eight rows.
1286    ///
1287    /// Running this test on a non-Unix host (Windows) verifies the table
1288    /// math at compile time only when this `#[cfg(unix)]` gate is active;
1289    /// CI on Linux/macOS exercises the assertions at runtime.
1290    #[cfg(unix)]
1291    #[test]
1292    fn unix_prot_from_flags_preserves_every_requested_bit() {
1293        // ProtectFlags is #[non_exhaustive] — build via base + field assigns.
1294        let mut none = ProtectFlags::NONE;
1295        let mut r = ProtectFlags::NONE;
1296        r.read = true;
1297        let mut w = ProtectFlags::NONE;
1298        w.write = true;
1299        let mut x = ProtectFlags::NONE;
1300        x.exec = true;
1301        let mut rw = ProtectFlags::NONE;
1302        rw.read = true;
1303        rw.write = true;
1304        let mut rx = ProtectFlags::NONE;
1305        rx.read = true;
1306        rx.exec = true;
1307        let mut wx = ProtectFlags::NONE;
1308        wx.write = true;
1309        wx.exec = true;
1310        let mut rwx = ProtectFlags::NONE;
1311        rwx.read = true;
1312        rwx.write = true;
1313        rwx.exec = true;
1314        // Suppress unused_mut on `none` — clippy/rustc otherwise gripe.
1315        let _ = &mut none;
1316
1317        assert_eq!(unix_prot_from_flags(none), libc::PROT_NONE);
1318        assert_eq!(unix_prot_from_flags(r), libc::PROT_READ);
1319        assert_eq!(
1320            unix_prot_from_flags(w),
1321            libc::PROT_WRITE,
1322            "W must be PROT_WRITE only — Unix allows write-without-read at the syscall ABI; \
1323             any kernel-side implicit read-grant lives below this layer and is not our concern",
1324        );
1325        assert_eq!(
1326            unix_prot_from_flags(x),
1327            libc::PROT_EXEC,
1328            "X must be PROT_EXEC only — over-granting (e.g. adding PROT_READ) \
1329             must not appear on Unix",
1330        );
1331        assert_eq!(unix_prot_from_flags(rw), libc::PROT_READ | libc::PROT_WRITE);
1332        assert_eq!(unix_prot_from_flags(rx), libc::PROT_READ | libc::PROT_EXEC);
1333        assert_eq!(
1334            unix_prot_from_flags(wx),
1335            libc::PROT_WRITE | libc::PROT_EXEC,
1336            "W+X must be PROT_WRITE|PROT_EXEC — silently dropping the exec bit \
1337             must not appear on Unix. Hardened kernels that enforce W^X surface \
1338             EINVAL/EACCES at the mprotect syscall, not by silently masking bits here.",
1339        );
1340        assert_eq!(
1341            unix_prot_from_flags(rwx),
1342            libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC,
1343        );
1344    }
1345
1346    /// Regression: `win32_prot_from_flags` used to map `(read=false,
1347    /// write=true, exec=true)` to `PAGE_READWRITE`, silently dropping the
1348    /// caller's exec bit. Hardening wrappers that ask for W+X (uncommon but
1349    /// valid for JIT-like flows that don't need read) would have gotten
1350    /// pages that fault on instruction fetch in release builds — the
1351    /// debug_assert in `os_protect` only catches write-without-read in dev.
1352    /// The fix routes W+X through `PAGE_EXECUTE_READWRITE`.
1353    ///
1354    /// Exec-only `(F, F, T)` uses `PAGE_EXECUTE` (which Windows *does*
1355    /// support natively) so that callers that opt out of read on NX-capable
1356    /// hardware actually get exec-only semantics rather than an
1357    /// unconditional upgrade to RX.
1358    ///
1359    /// The mapping is tested in isolation (bypassing `os_protect`'s
1360    /// `debug_assert!(!(write && !read))`). Unix is unaffected — its
1361    /// `mprotect` path expresses each bit independently.
1362    #[cfg(windows)]
1363    #[test]
1364    #[cfg_attr(
1365        miri,
1366        ignore = "win32 import resolution requires actual Windows runtime"
1367    )]
1368    fn win32_prot_from_flags_preserves_every_requested_bit() {
1369        use windows_sys::Win32::System::Memory::{
1370            PAGE_EXECUTE, PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE, PAGE_NOACCESS, PAGE_READONLY,
1371            PAGE_READWRITE,
1372        };
1373        // ProtectFlags is #[non_exhaustive] — build via base + field assigns.
1374        let mut none = ProtectFlags::NONE;
1375        let mut r = ProtectFlags::NONE;
1376        r.read = true;
1377        let mut w = ProtectFlags::NONE;
1378        w.write = true;
1379        let mut x = ProtectFlags::NONE;
1380        x.exec = true;
1381        let mut rw = ProtectFlags::NONE;
1382        rw.read = true;
1383        rw.write = true;
1384        let mut rx = ProtectFlags::NONE;
1385        rx.read = true;
1386        rx.exec = true;
1387        let mut rwx = ProtectFlags::NONE;
1388        rwx.read = true;
1389        rwx.write = true;
1390        rwx.exec = true;
1391        let mut wx = ProtectFlags::NONE;
1392        wx.write = true;
1393        wx.exec = true;
1394        // Suppress unused_mut on `none` — clippy/rustc otherwise gripe.
1395        let _ = &mut none;
1396
1397        assert_eq!(win32_prot_from_flags(none), PAGE_NOACCESS);
1398        assert_eq!(win32_prot_from_flags(r), PAGE_READONLY);
1399        assert_eq!(win32_prot_from_flags(rw), PAGE_READWRITE);
1400        assert_eq!(win32_prot_from_flags(rx), PAGE_EXECUTE_READ);
1401        assert_eq!(win32_prot_from_flags(rwx), PAGE_EXECUTE_READWRITE);
1402        // Exec-only is exact on NX-capable HW — Windows has PAGE_EXECUTE.
1403        assert_eq!(
1404            win32_prot_from_flags(x),
1405            PAGE_EXECUTE,
1406            "exec-only must use PAGE_EXECUTE (exec-only on NX-capable HW), \
1407             not PAGE_EXECUTE_READ which would unconditionally add read",
1408        );
1409        // Write-without-read upgrades — Windows cannot express write-only.
1410        assert_eq!(
1411            win32_prot_from_flags(w),
1412            PAGE_READWRITE,
1413            "W upgrades to RW (Windows has no write-only primitive)",
1414        );
1415        assert_eq!(
1416            win32_prot_from_flags(wx),
1417            PAGE_EXECUTE_READWRITE,
1418            "W+X must upgrade to RWX, not collapse to PAGE_READWRITE — \
1419             silently dropping the exec bit would fault on instruction fetch",
1420        );
1421    }
1422}