Skip to main content

hyperlight_common/arch/amd64/
vmem.rs

1/*
2Copyright 2025  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15 */
16
17//! x86-64 4-level page table manipulation code.
18//!
19//! This module implements page table setup for x86-64 long mode using 4-level paging:
20//! - PML4 (Page Map Level 4) - bits 47:39 - 512 entries, each covering 512GB
21//! - PDPT (Page Directory Pointer Table) - bits 38:30 - 512 entries, each covering 1GB
22//! - PD (Page Directory) - bits 29:21 - 512 entries, each covering 2MB
23//! - PT (Page Table) - bits 20:12 - 512 entries, each covering 4KB pages
24//!
25//! The code uses an iterator-based approach to walk the page table hierarchy,
26//! allocating intermediate tables as needed and setting appropriate flags on leaf PTEs
27
28use crate::vmem::{
29    BasicMapping, CowMapping, MapRequest, MapResponse, Mapping, MappingKind, TableMovabilityBase,
30    TableOps, TableReadOps, UpdateParent, UpdateParentNone, Void, modify_ptes,
31    write_entry_updating,
32};
33
34/// Parent is another page table whose ancestors may also need
35/// updating when it relocates.
36pub struct UpdateParentTable<Op: TableOps, P: UpdateParent<Op>> {
37    pub(crate) parent: P,
38    pub(crate) entry_ptr: Op::TableAddr,
39}
40impl<Op: TableOps, P: UpdateParent<Op>> Clone for UpdateParentTable<Op, P> {
41    fn clone(&self) -> Self {
42        *self
43    }
44}
45impl<Op: TableOps, P: UpdateParent<Op>> Copy for UpdateParentTable<Op, P> {}
46impl<Op: TableOps, P: UpdateParent<Op>> UpdateParentTable<Op, P> {
47    pub(crate) fn new(parent: P, entry_ptr: Op::TableAddr) -> Self {
48        UpdateParentTable { parent, entry_ptr }
49    }
50}
51
52/// Parent is the root (e.g. CR3).
53#[derive(Copy, Clone)]
54pub struct UpdateParentRoot {}
55
56/// Read a PTE and return it (widened to u64) if the present bit is
57/// set. The amd64 "present" encoding is a single bit (bit 0); other
58/// architectures may need richer semantics, which is why this lives
59/// per-arch rather than in the common module.
60///
61/// # Safety
62/// `entry_ptr` must point to a valid page table entry.
63#[inline(always)]
64#[allow(clippy::useless_conversion)]
65pub(super) unsafe fn read_pte_if_present<Op: TableReadOps>(
66    op: &Op,
67    entry_ptr: Op::TableAddr,
68) -> Option<u64> {
69    let pte: u64 = unsafe { op.read_entry(entry_ptr) }.into();
70    if (pte & PAGE_PRESENT) != 0 {
71        Some(pte)
72    } else {
73        None
74    }
75}
76
77/// Require that a PTE is present and descend to the next-level table.
78///
79/// # Safety
80/// `op` must provide valid page table memory.
81pub(super) unsafe fn require_pte_exist<Op: TableReadOps, P: UpdateParent<Op>>(
82    op: &Op,
83    x: MapResponse<Op, P>,
84) -> Option<MapRequest<Op, P::ChildType>>
85where
86    P::ChildType: UpdateParent<Op>,
87{
88    unsafe { read_pte_if_present(op, x.entry_ptr) }.map(|pte| MapRequest {
89        #[allow(clippy::unnecessary_cast)]
90        table_base: Op::from_phys((pte & PTE_ADDR_MASK) as PhysAddr),
91        vmin: x.vmin,
92        len: x.len,
93        update_parent: x.update_parent.for_child_at_entry(x.entry_ptr),
94    })
95}
96
97// Paging Flags
98//
99// See the following links explaining paging:
100//
101// * Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 3A: System Programming Guide, Part 1
102//  - Chapter 5 "Paging"
103//
104// https://cdrdv2.intel.com/v1/dl/getContent/671200
105//
106// * AMD64 Architecture Programmer’s Manual, Volume 2: System Programming, Section 5.3: Long-Mode Page Translation
107//
108// https://docs.amd.com/v/u/en-US/24593_3.43
109//
110// Or if you prefer something less formal:
111//
112// * Very basic description: https://stackoverflow.com/a/26945892
113// * More in-depth descriptions: https://wiki.osdev.org/Paging
114//
115
116/// Page is Present
117pub const PAGE_PRESENT: u64 = 1;
118/// Page is Read/Write (if not set page is read only so long as the WP bit in CR0 is set to 1 - which it is in Hyperlight)
119const PAGE_RW: u64 = 1 << 1;
120/// Execute Disable (if this bit is set then data in the page cannot be executed)`
121const PAGE_NX: u64 = 1 << 63;
122/// Mask to extract the physical address from a PTE (bits 51:12)
123/// This masks out the lower 12 flag bits AND the upper bits including NX (bit 63)
124pub const PTE_ADDR_MASK: u64 = 0x000F_FFFF_FFFF_F000;
125const PAGE_USER_ACCESS_DISABLED: u64 = 0 << 2; // U/S bit not set - supervisor mode only (no code runs in user mode for now)
126const PAGE_DIRTY_SET: u64 = 1 << 6; // D - dirty bit
127const PAGE_ACCESSED_SET: u64 = 1 << 5; // A - accessed bit
128const PAGE_CACHE_ENABLED: u64 = 0 << 4; // PCD - page cache disable bit not set (caching enabled)
129const PAGE_WRITE_BACK: u64 = 0 << 3; // PWT - page write-through bit not set (write-back caching)
130const PAGE_PAT_WB: u64 = 0 << 7; // PAT - page attribute table index bit (0 for write-back memory when PCD=0, PWT=0)
131
132// We use various patterns of the available-for-software-use bits to
133// represent certain special mappings.
134const PTE_AVL_MASK: u64 = 0x0000_0000_0000_0E00;
135const PAGE_AVL_COW: u64 = 1 << 9;
136
137/// Returns PAGE_RW if writable is true, 0 otherwise
138#[inline(always)]
139const fn page_rw_flag(writable: bool) -> u64 {
140    if writable { PAGE_RW } else { 0 }
141}
142
143/// Returns PAGE_NX if executable is false (NX = No Execute), 0 otherwise
144#[inline(always)]
145const fn page_nx_flag(executable: bool) -> u64 {
146    if executable { 0 } else { PAGE_NX }
147}
148
149/// Helper function to generate a page table entry that points to another table
150#[allow(clippy::identity_op)]
151#[allow(clippy::precedence)]
152fn pte_for_table<Op: TableOps>(table_addr: Op::TableAddr) -> u64 {
153    Op::to_phys(table_addr) |
154        PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag
155        PAGE_CACHE_ENABLED | // leave caching enabled
156        PAGE_WRITE_BACK | // use write-back caching
157        PAGE_USER_ACCESS_DISABLED |// dont allow user access (no code runs in user mode for now)
158        PAGE_RW | // R/W - we don't use block-level permissions
159        PAGE_PRESENT // P   - this entry is present
160}
161
162/// This trait is used to select appropriate implementations of
163/// [`UpdateParent`] to be used, depending on whether a particular
164/// implementation needs the ability to move tables.
165pub trait TableMovability<Op: TableReadOps + ?Sized, TableMoveInfo> {
166    type RootUpdateParent: UpdateParent<Op, TableMoveInfo = TableMoveInfo>;
167    fn root_update_parent() -> Self::RootUpdateParent;
168}
169impl<Op: TableOps<TableMovability = crate::vmem::MayMoveTable>> TableMovability<Op, Op::TableAddr>
170    for crate::vmem::MayMoveTable
171{
172    type RootUpdateParent = UpdateParentRoot;
173    fn root_update_parent() -> Self::RootUpdateParent {
174        UpdateParentRoot {}
175    }
176}
177impl<Op: TableReadOps> TableMovability<Op, Void> for crate::vmem::MayNotMoveTable {
178    type RootUpdateParent = UpdateParentNone;
179    fn root_update_parent() -> Self::RootUpdateParent {
180        UpdateParentNone {}
181    }
182}
183
184impl<
185    Op: TableOps<TableMovability = crate::vmem::MayMoveTable>,
186    P: UpdateParent<Op, TableMoveInfo = Op::TableAddr>,
187> UpdateParent<Op> for UpdateParentTable<Op, P>
188{
189    type TableMoveInfo = Op::TableAddr;
190    type ChildType = UpdateParentTable<Op, Self>;
191    fn update_parent(self, op: &Op, new_ptr: Op::TableAddr) {
192        let pte = pte_for_table::<Op>(new_ptr);
193        unsafe {
194            write_entry_updating(op, self.parent, self.entry_ptr, pte);
195        }
196    }
197    fn for_child_at_entry(self, entry_ptr: Op::TableAddr) -> Self::ChildType {
198        Self::ChildType::new(self, entry_ptr)
199    }
200}
201
202impl<Op: TableOps<TableMovability = crate::vmem::MayMoveTable>> UpdateParent<Op>
203    for UpdateParentRoot
204{
205    type TableMoveInfo = Op::TableAddr;
206    type ChildType = UpdateParentTable<Op, Self>;
207    fn update_parent(self, op: &Op, new_ptr: Op::TableAddr) {
208        unsafe {
209            op.update_root(new_ptr);
210        }
211    }
212    fn for_child_at_entry(self, entry_ptr: Op::TableAddr) -> Self::ChildType {
213        Self::ChildType::new(self, entry_ptr)
214    }
215}
216
217/// Page-mapping callback to allocate a next-level page table if necessary.
218/// # Safety
219/// This function modifies page table data structures, and should not be called concurrently
220/// with any other operations that modify the page tables.
221unsafe fn alloc_pte_if_needed<
222    Op: TableOps,
223    P: UpdateParent<
224            Op,
225            TableMoveInfo = <Op::TableMovability as TableMovabilityBase<Op>>::TableMoveInfo,
226        >,
227>(
228    op: &Op,
229    x: MapResponse<Op, P>,
230) -> MapRequest<Op, P::ChildType>
231where
232    P::ChildType: UpdateParent<Op>,
233{
234    let new_update_parent = x.update_parent.for_child_at_entry(x.entry_ptr);
235    if let Some(pte) = unsafe { read_pte_if_present(op, x.entry_ptr) } {
236        return MapRequest {
237            table_base: Op::from_phys(pte & PTE_ADDR_MASK),
238            vmin: x.vmin,
239            len: x.len,
240            update_parent: new_update_parent,
241        };
242    }
243
244    let page_addr = unsafe { op.alloc_table() };
245
246    let pte = pte_for_table::<Op>(page_addr);
247    unsafe {
248        write_entry_updating(op, x.update_parent, x.entry_ptr, pte);
249    };
250    MapRequest {
251        table_base: page_addr,
252        vmin: x.vmin,
253        len: x.len,
254        update_parent: new_update_parent,
255    }
256}
257
258/// Map a normal memory page
259/// # Safety
260/// This function modifies page table data structures, and should not be called concurrently
261/// with any other operations that modify the page tables.
262#[allow(clippy::identity_op)]
263#[allow(clippy::precedence)]
264unsafe fn map_page<
265    Op: TableOps,
266    P: UpdateParent<
267            Op,
268            TableMoveInfo = <Op::TableMovability as TableMovabilityBase<Op>>::TableMoveInfo,
269        >,
270>(
271    op: &Op,
272    mapping: &Mapping,
273    r: MapResponse<Op, P>,
274) {
275    let pte = match &mapping.kind {
276        MappingKind::Basic(bm) =>
277        // TODO: Support not readable
278        // NOTE: On x86-64, there is no separate "readable" bit in the page table entry.
279        // This means that pages cannot be made write-only or execute-only without also being readable.
280        // All pages that are mapped as writable or executable are also implicitly readable.
281        // If support for "not readable" mappings is required in the future, it would need to be
282        // implemented using additional mechanisms (e.g., page-fault handling or memory protection keys),
283        // but for now, this architectural limitation is accepted.
284        {
285            (mapping.phys_base + (r.vmin - mapping.virt_base)) |
286                page_nx_flag(bm.executable) | // NX - no execute unless allowed
287                PAGE_PAT_WB | // PAT index bit for write-back memory
288                PAGE_DIRTY_SET | // prevent the CPU writing to the dirty bit
289                PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag
290                PAGE_CACHE_ENABLED | // leave caching enabled
291                PAGE_WRITE_BACK | // use write-back caching
292                PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now)
293                page_rw_flag(bm.writable) | // R/W - set if writable
294                PAGE_PRESENT // P   - this entry is present
295        }
296        MappingKind::Cow(cm) => {
297            (mapping.phys_base + (r.vmin - mapping.virt_base)) |
298                page_nx_flag(cm.executable) | // NX - no execute unless allowed
299                PAGE_AVL_COW |
300                PAGE_PAT_WB | // PAT index bit for write-back memory
301                PAGE_DIRTY_SET | // prevent the CPU writing to the dirty bit
302                PAGE_ACCESSED_SET | // prevent the CPU writing to the access flag
303                PAGE_CACHE_ENABLED | // leave caching enabled
304                PAGE_WRITE_BACK | // use write-back caching
305                PAGE_USER_ACCESS_DISABLED | // dont allow user access (no code runs in user mode for now)
306                0 | // R/W - Cow page is never writable
307                PAGE_PRESENT // P   - this entry is present
308        }
309        MappingKind::Unmapped => 0,
310    };
311    unsafe {
312        write_entry_updating(op, r.update_parent, r.entry_ptr, pte);
313    }
314}
315
316// There are no notable architecture-specific safety considerations
317// here, and the general conditions are documented in the
318// architecture-independent re-export in vmem.rs
319
320/// Maps a contiguous virtual address range to physical memory.
321///
322/// This function walks the 4-level page table hierarchy (PML4 → PDPT → PD → PT),
323/// allocating intermediate tables as needed via `alloc_pte_if_needed`, and finally
324/// writing the leaf page table entries with the requested permissions via `map_page`.
325///
326/// The iterator chain processes each level:
327/// 1. PML4 (47:39) - allocate PDPT if needed
328/// 2. PDPT (38:30) - allocate PD if needed
329/// 3. PD (29:21) - allocate PT if needed
330/// 4. PT (20:12) - write final PTE with physical address and flags
331///
332/// Multi-space page-table walking on amd64: walks each root
333/// independently and emits all leaves as `ThisSpace`. Aliased
334/// intermediate-table detection is not implemented here because no
335/// current embedder exercises that pattern on amd64.
336///
337/// TODO: align with the i686 implementation and detect aliased
338/// intermediate tables to avoid semantic divergence across arches.
339/// Tracking: follow-up issue.
340#[allow(clippy::missing_safety_doc)]
341pub unsafe fn walk_va_spaces<Op: TableReadOps>(
342    op: &Op,
343    roots: &[Op::TableAddr],
344    address: u64,
345    len: u64,
346) -> ::alloc::vec::Vec<(
347    crate::vmem::SpaceId,
348    ::alloc::vec::Vec<crate::vmem::SpaceAwareMapping>,
349)> {
350    use ::alloc::vec::Vec;
351
352    let mut out: Vec<(crate::vmem::SpaceId, Vec<crate::vmem::SpaceAwareMapping>)> =
353        Vec::with_capacity(roots.len());
354
355    let addr = address & ((1u64 << VA_BITS) - 1);
356    let vmin = addr & !(PAGE_SIZE as u64 - 1);
357    let vmax = core::cmp::min(addr + len, 1u64 << VA_BITS);
358
359    for &root in roots {
360        #[allow(clippy::unnecessary_cast)]
361        let root_id: crate::vmem::SpaceId = Op::to_phys(root) as u64;
362        let mut mappings: Vec<crate::vmem::SpaceAwareMapping> = Vec::new();
363
364        let iter = modify_ptes::<47, 39, Op, _>(MapRequest {
365            table_base: root,
366            vmin,
367            len: vmax.saturating_sub(vmin),
368            update_parent: UpdateParentNone {},
369        })
370        .filter_map(|r| unsafe { require_pte_exist(op, r) })
371        .flat_map(modify_ptes::<38, 30, Op, _>)
372        .filter_map(|r| unsafe { require_pte_exist(op, r) })
373        .flat_map(modify_ptes::<29, 21, Op, _>)
374        .filter_map(|r| unsafe { require_pte_exist(op, r) })
375        .flat_map(modify_ptes::<20, 12, Op, _>);
376
377        for r in iter {
378            let Some(pte) = (unsafe { read_pte_if_present(op, r.entry_ptr) }) else {
379                continue;
380            };
381            let phys_addr = pte & PTE_ADDR_MASK;
382            let sgn_bit = r.vmin >> (VA_BITS - 1);
383            let sgn_bits = 0u64.wrapping_sub(sgn_bit) << VA_BITS;
384            let virt_addr = sgn_bits | r.vmin;
385
386            let executable = (pte & PAGE_NX) == 0;
387            let avl = pte & PTE_AVL_MASK;
388            let kind = if avl == PAGE_AVL_COW {
389                MappingKind::Cow(CowMapping {
390                    readable: true,
391                    executable,
392                })
393            } else {
394                MappingKind::Basic(BasicMapping {
395                    readable: true,
396                    writable: (pte & PAGE_RW) != 0,
397                    executable,
398                })
399            };
400            mappings.push(crate::vmem::SpaceAwareMapping::ThisSpace(Mapping {
401                phys_base: phys_addr,
402                virt_base: virt_addr,
403                len: PAGE_SIZE as u64,
404                kind,
405                user_accessible: false,
406            }));
407        }
408
409        out.push((root_id, mappings));
410    }
411
412    out
413}
414
415/// See [`walk_va_spaces`]: amd64 never emits `AnotherSpace`, so this
416/// is unreachable in practice. It silently no-ops (rather than
417/// panicking) to keep the architecture-independent re-export usable.
418#[allow(clippy::missing_safety_doc)]
419pub unsafe fn space_aware_map<Op: TableOps>(
420    _op: &Op,
421    _ref_map: crate::vmem::SpaceReferenceMapping,
422    _built_roots: &::alloc::collections::BTreeMap<crate::vmem::SpaceId, Op::TableAddr>,
423) {
424}
425
426#[allow(clippy::missing_safety_doc)]
427pub unsafe fn map<Op: TableOps>(op: &Op, mapping: Mapping) {
428    modify_ptes::<47, 39, Op, _>(MapRequest {
429        table_base: op.root_table(),
430        vmin: mapping.virt_base,
431        len: mapping.len,
432        update_parent: Op::TableMovability::root_update_parent(),
433    })
434    .map(|r| unsafe { alloc_pte_if_needed(op, r) })
435    .flat_map(modify_ptes::<38, 30, Op, _>)
436    .map(|r| unsafe { alloc_pte_if_needed(op, r) })
437    .flat_map(modify_ptes::<29, 21, Op, _>)
438    .map(|r| unsafe { alloc_pte_if_needed(op, r) })
439    .flat_map(modify_ptes::<20, 12, Op, _>)
440    .map(|r| unsafe { map_page(op, &mapping, r) })
441    .for_each(drop);
442}
443
444// There are no notable architecture-specific safety considerations
445// here, and the general conditions are documented in the
446// architecture-independent re-export in vmem.rs
447
448/// Translates a virtual address range to the physical address pages
449/// that back it by walking the page tables.
450///
451/// Returns an iterator with an entry for each mapped page that
452/// intersects the given range.
453///
454/// This takes AsRef<Op> + Copy so that on targets where the
455/// operations have little state (e.g. the guest) the operations state
456/// can be copied into the closure(s) in the iterator, allowing for a
457/// nicer result lifetime.  On targets like the
458/// building-an-original-snapshot portion of the host, where the
459/// operations structure owns a large buffer, a reference can instead
460/// be passed.
461#[allow(clippy::missing_safety_doc)]
462pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>(
463    op: impl core::convert::AsRef<Op> + Copy + 'a,
464    address: u64,
465    len: u64,
466) -> impl Iterator<Item = Mapping> + 'a {
467    // Undo sign-extension
468    let addr = address & ((1u64 << VA_BITS) - 1);
469    // Mask off any sub-page bits
470    let vmin = addr & !(PAGE_SIZE as u64 - 1);
471    // Calculate the maximum virtual address we need to look at based on the starting
472    // address and length ensuring we don't go past the end of the address space
473    let vmax = core::cmp::min(addr + len, 1u64 << VA_BITS);
474    modify_ptes::<47, 39, Op, _>(MapRequest {
475        table_base: op.as_ref().root_table(),
476        vmin,
477        len: vmax - vmin,
478        update_parent: UpdateParentNone {},
479    })
480    .filter_map(move |r| unsafe { require_pte_exist(op.as_ref(), r) })
481    .flat_map(modify_ptes::<38, 30, Op, _>)
482    .filter_map(move |r| unsafe { require_pte_exist(op.as_ref(), r) })
483    .flat_map(modify_ptes::<29, 21, Op, _>)
484    .filter_map(move |r| unsafe { require_pte_exist(op.as_ref(), r) })
485    .flat_map(modify_ptes::<20, 12, Op, _>)
486    .filter_map(move |r| {
487        let pte = unsafe { read_pte_if_present(op.as_ref(), r.entry_ptr) }?;
488        let phys_addr = pte & PTE_ADDR_MASK;
489        // Re-do the sign extension
490        let sgn_bit = r.vmin >> (VA_BITS - 1);
491        let sgn_bits = 0u64.wrapping_sub(sgn_bit) << VA_BITS;
492        let virt_addr = sgn_bits | r.vmin;
493
494        let executable = (pte & PAGE_NX) == 0;
495        let avl = pte & PTE_AVL_MASK;
496        let kind = if avl == PAGE_AVL_COW {
497            MappingKind::Cow(CowMapping {
498                readable: true,
499                executable,
500            })
501        } else {
502            MappingKind::Basic(BasicMapping {
503                readable: true,
504                writable: (pte & PAGE_RW) != 0,
505                executable,
506            })
507        };
508        Some(Mapping {
509            phys_base: phys_addr,
510            virt_base: virt_addr,
511            len: PAGE_SIZE as u64,
512            kind,
513            user_accessible: false,
514        })
515    })
516}
517
518const VA_BITS: usize = 48; // We use 48-bit virtual addresses at the moment.
519
520pub const PAGE_SIZE: usize = 4096;
521pub const PAGE_TABLE_SIZE: usize = 4096;
522pub type PageTableEntry = u64;
523pub type VirtAddr = u64;
524pub type PhysAddr = u64;
525
526#[cfg(test)]
527mod tests {
528    use alloc::vec;
529    use alloc::vec::Vec;
530    use core::cell::RefCell;
531
532    use super::*;
533    use crate::vmem::{
534        BasicMapping, Mapping, MappingKind, MayNotMoveTable, PAGE_TABLE_ENTRIES_PER_TABLE,
535        TableOps, TableReadOps, Void, bits,
536    };
537
538    /// A mock TableOps implementation for testing that stores page tables in memory
539    /// needed because the `GuestPageTableBuffer` is in hyperlight_host which would cause a circular dependency
540    struct MockTableOps {
541        tables: RefCell<Vec<[u64; PAGE_TABLE_ENTRIES_PER_TABLE]>>,
542    }
543
544    // for virt_to_phys
545    impl core::convert::AsRef<MockTableOps> for MockTableOps {
546        fn as_ref(&self) -> &Self {
547            self
548        }
549    }
550
551    impl MockTableOps {
552        fn new() -> Self {
553            // Start with one table (the root/PML4)
554            Self {
555                tables: RefCell::new(vec![[0u64; PAGE_TABLE_ENTRIES_PER_TABLE]]),
556            }
557        }
558
559        fn table_count(&self) -> usize {
560            self.tables.borrow().len()
561        }
562
563        fn get_entry(&self, table_idx: usize, entry_idx: usize) -> u64 {
564            self.tables.borrow()[table_idx][entry_idx]
565        }
566    }
567
568    impl TableReadOps for MockTableOps {
569        type TableAddr = (usize, usize); // (table_index, entry_index)
570
571        fn entry_addr(addr: Self::TableAddr, entry_offset: u64) -> Self::TableAddr {
572            // Convert to physical address, add offset, convert back
573            let phys = Self::to_phys(addr) + entry_offset;
574            Self::from_phys(phys)
575        }
576
577        unsafe fn read_entry(&self, addr: Self::TableAddr) -> u64 {
578            self.tables.borrow()[addr.0][addr.1]
579        }
580
581        fn to_phys(addr: Self::TableAddr) -> PhysAddr {
582            // Each table is 4KB, entries are 8 bytes
583            (addr.0 as u64 * PAGE_TABLE_SIZE as u64) + (addr.1 as u64 * 8)
584        }
585
586        fn from_phys(addr: PhysAddr) -> Self::TableAddr {
587            let table_idx = (addr / PAGE_TABLE_SIZE as u64) as usize;
588            let entry_idx = ((addr % PAGE_TABLE_SIZE as u64) / 8) as usize;
589            (table_idx, entry_idx)
590        }
591
592        fn root_table(&self) -> Self::TableAddr {
593            (0, 0)
594        }
595    }
596
597    impl TableOps for MockTableOps {
598        type TableMovability = MayNotMoveTable;
599
600        unsafe fn alloc_table(&self) -> Self::TableAddr {
601            let mut tables = self.tables.borrow_mut();
602            let idx = tables.len();
603            tables.push([0u64; PAGE_TABLE_ENTRIES_PER_TABLE]);
604            (idx, 0)
605        }
606
607        unsafe fn write_entry(&self, addr: Self::TableAddr, entry: u64) -> Option<Void> {
608            self.tables.borrow_mut()[addr.0][addr.1] = entry;
609            None
610        }
611
612        unsafe fn update_root(&self, impossible: Void) {
613            match impossible {}
614        }
615    }
616
617    // ==================== bits() function tests ====================
618
619    #[test]
620    fn test_bits_extracts_pml4_index() {
621        // PML4 uses bits 47:39
622        // Address 0x0000_0080_0000_0000 should have PML4 index 1
623        let addr: u64 = 0x0000_0080_0000_0000;
624        assert_eq!(bits::<47, 39>(addr), 1);
625    }
626
627    #[test]
628    fn test_bits_extracts_pdpt_index() {
629        // PDPT uses bits 38:30
630        // Address with PDPT index 1: bit 30 set = 0x4000_0000 (1GB)
631        let addr: u64 = 0x4000_0000;
632        assert_eq!(bits::<38, 30>(addr), 1);
633    }
634
635    #[test]
636    fn test_bits_extracts_pd_index() {
637        // PD uses bits 29:21
638        // Address 0x0000_0000_0020_0000 (2MB) should have PD index 1
639        let addr: u64 = 0x0000_0000_0020_0000;
640        assert_eq!(bits::<29, 21>(addr), 1);
641    }
642
643    #[test]
644    fn test_bits_extracts_pt_index() {
645        // PT uses bits 20:12
646        // Address 0x0000_0000_0000_1000 (4KB) should have PT index 1
647        let addr: u64 = 0x0000_0000_0000_1000;
648        assert_eq!(bits::<20, 12>(addr), 1);
649    }
650
651    #[test]
652    fn test_bits_max_index() {
653        // Maximum 9-bit index is 511
654        // PML4 index 511 = bits 47:39 all set = 0x0000_FF80_0000_0000
655        let addr: u64 = 0x0000_FF80_0000_0000;
656        assert_eq!(bits::<47, 39>(addr), 511);
657    }
658
659    // ==================== PTE flag tests ====================
660
661    #[test]
662    fn test_page_rw_flag_writable() {
663        assert_eq!(page_rw_flag(true), PAGE_RW);
664    }
665
666    #[test]
667    fn test_page_rw_flag_readonly() {
668        assert_eq!(page_rw_flag(false), 0);
669    }
670
671    #[test]
672    fn test_page_nx_flag_executable() {
673        assert_eq!(page_nx_flag(true), 0); // Executable = no NX bit
674    }
675
676    #[test]
677    fn test_page_nx_flag_not_executable() {
678        assert_eq!(page_nx_flag(false), PAGE_NX);
679    }
680
681    // ==================== map() function tests ====================
682
683    #[test]
684    fn test_map_single_page() {
685        let ops = MockTableOps::new();
686        let mapping = Mapping {
687            phys_base: 0x1000,
688            virt_base: 0x1000,
689            len: PAGE_SIZE as u64,
690            kind: MappingKind::Basic(BasicMapping {
691                readable: true,
692                writable: true,
693                executable: false,
694            }),
695            user_accessible: false,
696        };
697
698        unsafe { map(&ops, mapping) };
699
700        // Should have allocated: PML4(exists) + PDPT + PD + PT = 4 tables
701        assert_eq!(ops.table_count(), 4);
702
703        // Check PML4 entry 0 points to PDPT (table 1) with correct flags
704        let pml4_entry = ops.get_entry(0, 0);
705        assert_ne!(pml4_entry & PAGE_PRESENT, 0, "PML4 entry should be present");
706        assert_ne!(pml4_entry & PAGE_RW, 0, "PML4 entry should be writable");
707
708        // Check the leaf PTE has correct flags
709        // PT is table 3, entry 1 (for virt_base 0x1000)
710        let pte = ops.get_entry(3, 1);
711        assert_ne!(pte & PAGE_PRESENT, 0, "PTE should be present");
712        assert_ne!(pte & PAGE_RW, 0, "PTE should be writable");
713        assert_ne!(pte & PAGE_NX, 0, "PTE should have NX set (not executable)");
714        assert_eq!(pte & PTE_ADDR_MASK, 0x1000, "PTE should map to phys 0x1000");
715    }
716
717    #[test]
718    fn test_map_executable_page() {
719        let ops = MockTableOps::new();
720        let mapping = Mapping {
721            phys_base: 0x2000,
722            virt_base: 0x2000,
723            len: PAGE_SIZE as u64,
724            kind: MappingKind::Basic(BasicMapping {
725                readable: true,
726                writable: false,
727                executable: true,
728            }),
729            user_accessible: false,
730        };
731
732        unsafe { map(&ops, mapping) };
733
734        // PT is table 3, entry 2 (for virt_base 0x2000)
735        let pte = ops.get_entry(3, 2);
736        assert_ne!(pte & PAGE_PRESENT, 0, "PTE should be present");
737        assert_eq!(pte & PAGE_RW, 0, "PTE should be read-only");
738        assert_eq!(pte & PAGE_NX, 0, "PTE should NOT have NX set (executable)");
739    }
740
741    #[test]
742    fn test_map_multiple_pages() {
743        let ops = MockTableOps::new();
744        let mapping = Mapping {
745            phys_base: 0x10000,
746            virt_base: 0x10000,
747            len: 4 * PAGE_SIZE as u64, // 4 pages = 16KB
748            kind: MappingKind::Basic(BasicMapping {
749                readable: true,
750                writable: true,
751                executable: false,
752            }),
753            user_accessible: false,
754        };
755
756        unsafe { map(&ops, mapping) };
757
758        // Check all 4 PTEs are present
759        for i in 0..4 {
760            let entry_idx = 16 + i; // 0x10000 / 0x1000 = 16
761            let pte = ops.get_entry(3, entry_idx);
762            assert_ne!(pte & PAGE_PRESENT, 0, "PTE {} should be present", i);
763            let expected_phys = 0x10000 + (i as u64 * PAGE_SIZE as u64);
764            assert_eq!(
765                pte & PTE_ADDR_MASK,
766                expected_phys,
767                "PTE {} should map to correct phys addr",
768                i
769            );
770        }
771    }
772
773    #[test]
774    fn test_map_reuses_existing_tables() {
775        let ops = MockTableOps::new();
776
777        // Map first region
778        let mapping1 = Mapping {
779            phys_base: 0x1000,
780            virt_base: 0x1000,
781            len: PAGE_SIZE as u64,
782            kind: MappingKind::Basic(BasicMapping {
783                readable: true,
784                writable: true,
785                executable: false,
786            }),
787            user_accessible: false,
788        };
789        unsafe { map(&ops, mapping1) };
790        let tables_after_first = ops.table_count();
791
792        // Map second region in same PT (different page)
793        let mapping2 = Mapping {
794            phys_base: 0x5000,
795            virt_base: 0x5000,
796            len: PAGE_SIZE as u64,
797            kind: MappingKind::Basic(BasicMapping {
798                readable: true,
799                writable: true,
800                executable: false,
801            }),
802            user_accessible: false,
803        };
804        unsafe { map(&ops, mapping2) };
805
806        // Should NOT allocate new tables (reuses existing hierarchy)
807        assert_eq!(
808            ops.table_count(),
809            tables_after_first,
810            "Should reuse existing page tables"
811        );
812    }
813
814    // ==================== virt_to_phys() tests ====================
815
816    #[test]
817    fn test_virt_to_phys_mapped_address() {
818        let ops = MockTableOps::new();
819        let mapping = Mapping {
820            phys_base: 0x1000,
821            virt_base: 0x1000,
822            len: PAGE_SIZE as u64,
823            kind: MappingKind::Basic(BasicMapping {
824                readable: true,
825                writable: true,
826                executable: false,
827            }),
828            user_accessible: false,
829        };
830
831        unsafe { map(&ops, mapping) };
832
833        let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() };
834        assert!(result.is_some(), "Should find mapped address");
835        let mapping = result.unwrap();
836        assert_eq!(mapping.phys_base, 0x1000);
837    }
838
839    #[test]
840    fn test_virt_to_phys_unaligned_virt() {
841        let ops = MockTableOps::new();
842        let mapping = Mapping {
843            phys_base: 0x1000,
844            virt_base: 0x1000,
845            len: PAGE_SIZE as u64,
846            kind: MappingKind::Basic(BasicMapping {
847                readable: true,
848                writable: true,
849                executable: false,
850            }),
851            user_accessible: false,
852        };
853
854        unsafe { map(&ops, mapping) };
855
856        let result = unsafe { virt_to_phys(&ops, 0x1234, 1).next() };
857        assert!(result.is_some(), "Should find mapped address");
858        let mapping = result.unwrap();
859        assert_eq!(mapping.phys_base, 0x1000);
860    }
861
862    #[test]
863    fn test_virt_to_phys_unaligned_virt_and_across_pages_len() {
864        let ops = MockTableOps::new();
865        let mapping = Mapping {
866            phys_base: 0x1000,
867            virt_base: 0x1000,
868            len: 2 * PAGE_SIZE as u64, // 2 page
869            kind: MappingKind::Basic(BasicMapping {
870                readable: true,
871                writable: true,
872                executable: false,
873            }),
874            user_accessible: false,
875        };
876
877        unsafe { map(&ops, mapping) };
878
879        let mappings = unsafe { virt_to_phys(&ops, 0x1F00, 0x300).collect::<Vec<_>>() };
880        assert_eq!(mappings.len(), 2, "Should return 2 mappings for 2 pages");
881        assert_eq!(mappings[0].phys_base, 0x1000);
882        assert_eq!(mappings[1].phys_base, 0x2000);
883    }
884
885    #[test]
886    fn test_virt_to_phys_unaligned_virt_and_multiple_page_len() {
887        let ops = MockTableOps::new();
888        let mapping = Mapping {
889            phys_base: 0x1000,
890            virt_base: 0x1000,
891            len: PAGE_SIZE as u64 * 2 + 0x200, // 2 page + 512 bytes
892            kind: MappingKind::Basic(BasicMapping {
893                readable: true,
894                writable: true,
895                executable: false,
896            }),
897            user_accessible: false,
898        };
899
900        unsafe { map(&ops, mapping) };
901
902        let mappings =
903            unsafe { virt_to_phys(&ops, 0x1234, PAGE_SIZE as u64 * 2 + 0x10).collect::<Vec<_>>() };
904        assert_eq!(mappings.len(), 3, "Should return 3 mappings for 3 pages");
905        assert_eq!(mappings[0].phys_base, 0x1000);
906        assert_eq!(mappings[1].phys_base, 0x2000);
907        assert_eq!(mappings[2].phys_base, 0x3000);
908    }
909
910    #[test]
911    fn test_virt_to_phys_perms() {
912        let test = |kind| {
913            let ops = MockTableOps::new();
914            let mapping = Mapping {
915                phys_base: 0x1000,
916                virt_base: 0x1000,
917                len: PAGE_SIZE as u64,
918                kind,
919                user_accessible: false,
920            };
921            unsafe { map(&ops, mapping) };
922            let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() };
923            let mapping = result.unwrap();
924            assert_eq!(mapping.kind, kind);
925        };
926        test(MappingKind::Basic(BasicMapping {
927            readable: true,
928            writable: false,
929            executable: false,
930        }));
931        test(MappingKind::Basic(BasicMapping {
932            readable: true,
933            writable: false,
934            executable: true,
935        }));
936        test(MappingKind::Basic(BasicMapping {
937            readable: true,
938            writable: true,
939            executable: false,
940        }));
941        test(MappingKind::Basic(BasicMapping {
942            readable: true,
943            writable: true,
944            executable: true,
945        }));
946        test(MappingKind::Cow(CowMapping {
947            readable: true,
948            executable: false,
949        }));
950        test(MappingKind::Cow(CowMapping {
951            readable: true,
952            executable: true,
953        }));
954    }
955
956    #[test]
957    fn test_virt_to_phys_unmapped_address() {
958        let ops = MockTableOps::new();
959        // Don't map anything
960
961        let result = unsafe { virt_to_phys(&ops, 0x1000, 1).next() };
962        assert!(result.is_none(), "Should return None for unmapped address");
963    }
964
965    #[test]
966    fn test_virt_to_phys_partially_mapped() {
967        let ops = MockTableOps::new();
968        let mapping = Mapping {
969            phys_base: 0x1000,
970            virt_base: 0x1000,
971            len: PAGE_SIZE as u64,
972            kind: MappingKind::Basic(BasicMapping {
973                readable: true,
974                writable: true,
975                executable: false,
976            }),
977            user_accessible: false,
978        };
979
980        unsafe { map(&ops, mapping) };
981
982        // Query an address in a different PT entry (unmapped)
983        let result = unsafe { virt_to_phys(&ops, 0x5000, 1).next() };
984        assert!(
985            result.is_none(),
986            "Should return None for unmapped address in same PT"
987        );
988    }
989
990    // ==================== ModifyPteIterator tests ====================
991
992    #[test]
993    fn test_modify_pte_iterator_single_page() {
994        let ops = MockTableOps::new();
995        let request = MapRequest {
996            table_base: ops.root_table(),
997            vmin: 0x1000,
998            len: PAGE_SIZE as u64,
999            update_parent: UpdateParentNone {},
1000        };
1001
1002        let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
1003        assert_eq!(responses.len(), 1, "Single page should yield one response");
1004        assert_eq!(responses[0].vmin, 0x1000);
1005        assert_eq!(responses[0].len, PAGE_SIZE as u64);
1006    }
1007
1008    #[test]
1009    fn test_modify_pte_iterator_multiple_pages() {
1010        let ops = MockTableOps::new();
1011        let request = MapRequest {
1012            table_base: ops.root_table(),
1013            vmin: 0x1000,
1014            len: 3 * PAGE_SIZE as u64,
1015            update_parent: UpdateParentNone {},
1016        };
1017
1018        let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
1019        assert_eq!(responses.len(), 3, "3 pages should yield 3 responses");
1020    }
1021
1022    #[test]
1023    fn test_modify_pte_iterator_zero_length() {
1024        let ops = MockTableOps::new();
1025        let request = MapRequest {
1026            table_base: ops.root_table(),
1027            vmin: 0x1000,
1028            len: 0,
1029            update_parent: UpdateParentNone {},
1030        };
1031
1032        let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
1033        assert_eq!(responses.len(), 0, "Zero length should yield no responses");
1034    }
1035
1036    #[test]
1037    fn test_modify_pte_iterator_unaligned_start() {
1038        let ops = MockTableOps::new();
1039        // Start at 0x1800 (mid-page), map 0x1000 bytes
1040        // Should cover 0x1800-0x1FFF (first page) and 0x2000-0x27FF (second page)
1041        let request = MapRequest {
1042            table_base: ops.root_table(),
1043            vmin: 0x1800,
1044            len: 0x1000,
1045            update_parent: UpdateParentNone {},
1046        };
1047
1048        let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
1049        assert_eq!(
1050            responses.len(),
1051            2,
1052            "Unaligned mapping spanning 2 pages should yield 2 responses"
1053        );
1054        assert_eq!(responses[0].vmin, 0x1800);
1055        assert_eq!(responses[0].len, 0x800); // Remaining in first page
1056        assert_eq!(responses[1].vmin, 0x2000);
1057        assert_eq!(responses[1].len, 0x800); // Continuing in second page
1058    }
1059
1060    // ==================== TableOps entry_addr tests ====================
1061
1062    #[test]
1063    fn test_entry_addr_from_table_base() {
1064        // entry_addr is called with a table base (entry_index = 0) and a byte offset
1065        // offset = entry_index * 8, so offset 40 means entry 5
1066        let result = MockTableOps::entry_addr((2, 0), 40);
1067        assert_eq!(result, (2, 5), "Should return (table 2, entry 5)");
1068    }
1069
1070    #[test]
1071    fn test_entry_addr_with_nonzero_base_entry() {
1072        // Even though entry_addr is typically called with entry_index=0,
1073        // it should handle non-zero base correctly by adding the offset
1074        // Base: table 1, entry 10 (phys = 1*4096 + 10*8 = 4176)
1075        // Offset: 16 bytes (2 entries)
1076        // Result phys: 4176 + 16 = 4192 = 1*4096 + 12*8 → (1, 12)
1077        let result = MockTableOps::entry_addr((1, 10), 16);
1078        assert_eq!(result, (1, 12), "Should add offset to base entry");
1079    }
1080
1081    #[test]
1082    fn test_to_phys_from_phys_roundtrip() {
1083        // Verify to_phys and from_phys are inverses
1084        let addr = (3, 42);
1085        let phys = MockTableOps::to_phys(addr);
1086        let back = MockTableOps::from_phys(phys);
1087        assert_eq!(back, addr, "to_phys/from_phys should roundtrip");
1088    }
1089}