Skip to main content

hyperlight_host/mem/
mgr.rs

1/*
2Copyright 2025  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15 */
16#[cfg(feature = "nanvix-unstable")]
17use std::mem::offset_of;
18
19use flatbuffers::FlatBufferBuilder;
20use hyperlight_common::flatbuffer_wrappers::function_call::{
21    FunctionCall, validate_guest_function_call_buffer,
22};
23use hyperlight_common::flatbuffer_wrappers::function_types::FunctionCallResult;
24use hyperlight_common::flatbuffer_wrappers::guest_log_data::GuestLogData;
25use hyperlight_common::vmem::{self, PAGE_TABLE_SIZE};
26#[cfg(all(feature = "crashdump", not(feature = "i686-guest")))]
27use hyperlight_common::vmem::{BasicMapping, MappingKind};
28use tracing::{Span, instrument};
29
30use super::layout::SandboxMemoryLayout;
31use super::shared_mem::{
32    ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, ReadonlySharedMemory, SharedMemory,
33};
34use crate::hypervisor::regs::CommonSpecialRegisters;
35use crate::mem::memory_region::MemoryRegion;
36#[cfg(crashdump)]
37use crate::mem::memory_region::{CrashDumpRegion, MemoryRegionFlags, MemoryRegionType};
38use crate::sandbox::snapshot::{NextAction, Snapshot};
39use crate::{Result, new_error};
40
41#[cfg(all(feature = "crashdump", not(feature = "i686-guest")))]
42fn mapping_kind_to_flags(kind: &MappingKind) -> (MemoryRegionFlags, MemoryRegionType) {
43    match kind {
44        MappingKind::Basic(BasicMapping {
45            readable,
46            writable,
47            executable,
48        }) => {
49            let mut flags = MemoryRegionFlags::empty();
50            if *readable {
51                flags |= MemoryRegionFlags::READ;
52            }
53            if *writable {
54                flags |= MemoryRegionFlags::WRITE;
55            }
56            if *executable {
57                flags |= MemoryRegionFlags::EXECUTE;
58            }
59            (flags, MemoryRegionType::Snapshot)
60        }
61        MappingKind::Cow(cow) => {
62            let mut flags = MemoryRegionFlags::empty();
63            if cow.readable {
64                flags |= MemoryRegionFlags::READ;
65            }
66            if cow.executable {
67                flags |= MemoryRegionFlags::EXECUTE;
68            }
69            (flags, MemoryRegionType::Scratch)
70        }
71        MappingKind::Unmapped => (MemoryRegionFlags::empty(), MemoryRegionType::Snapshot),
72    }
73}
74
75/// Try to extend the last region in `regions` if the new page is contiguous
76/// in both guest and host address space and has the same flags.
77///
78/// Returns `true` if the region was coalesced, `false` if a new region is needed.
79#[cfg(all(feature = "crashdump", not(feature = "i686-guest")))]
80fn try_coalesce_region(
81    regions: &mut [CrashDumpRegion],
82    virt_base: usize,
83    virt_end: usize,
84    host_base: usize,
85    flags: MemoryRegionFlags,
86) -> bool {
87    if let Some(last) = regions.last_mut()
88        && last.guest_region.end == virt_base
89        && last.host_region.end == host_base
90        && last.flags == flags
91    {
92        last.guest_region.end = virt_end;
93        last.host_region.end = host_base + (virt_end - virt_base);
94        return true;
95    }
96    false
97}
98
99// It would be nice to have a simple type alias
100// `SnapshotSharedMemory<S: SharedMemory>` that abstracts over the
101// fact that the snapshot shared memory is `ReadonlySharedMemory`
102// normally, but there is (temporary) support for writable
103// `GuestSharedMemory` with `#[cfg(feature =
104// "i686-guest")]`. Unfortunately, rustc gets annoyed about an
105// unused type parameter, unless one goes to a little bit of effort to
106// trick it...
107mod unused_hack {
108    #[cfg(not(unshared_snapshot_mem))]
109    use crate::mem::shared_mem::ReadonlySharedMemory;
110    use crate::mem::shared_mem::SharedMemory;
111    pub trait SnapshotSharedMemoryT {
112        type T<S: SharedMemory>;
113    }
114    pub struct SnapshotSharedMemory_;
115    impl SnapshotSharedMemoryT for SnapshotSharedMemory_ {
116        #[cfg(not(unshared_snapshot_mem))]
117        type T<S: SharedMemory> = ReadonlySharedMemory;
118        #[cfg(unshared_snapshot_mem)]
119        type T<S: SharedMemory> = S;
120    }
121    pub type SnapshotSharedMemory<S> = <SnapshotSharedMemory_ as SnapshotSharedMemoryT>::T<S>;
122}
123impl ReadonlySharedMemory {
124    pub(crate) fn to_mgr_snapshot_mem(
125        &self,
126    ) -> Result<SnapshotSharedMemory<ExclusiveSharedMemory>> {
127        #[cfg(not(unshared_snapshot_mem))]
128        let ret = self.clone();
129        #[cfg(unshared_snapshot_mem)]
130        let ret = self.copy_to_writable()?;
131        Ok(ret)
132    }
133}
134pub(crate) use unused_hack::SnapshotSharedMemory;
135/// A struct that is responsible for laying out and managing the memory
136/// for a given `Sandbox`.
137#[derive(Clone)]
138pub(crate) struct SandboxMemoryManager<S: SharedMemory> {
139    /// Shared memory for the Sandbox
140    pub(crate) shared_mem: SnapshotSharedMemory<S>,
141    /// Scratch memory for the Sandbox
142    pub(crate) scratch_mem: S,
143    /// The memory layout of the underlying shared memory
144    pub(crate) layout: SandboxMemoryLayout,
145    /// Offset for the execution entrypoint from `load_addr`
146    pub(crate) entrypoint: NextAction,
147    /// How many memory regions were mapped after sandbox creation
148    pub(crate) mapped_rgns: u64,
149    /// Buffer for accumulating guest abort messages
150    pub(crate) abort_buffer: Vec<u8>,
151    /// Generation counter: how many snapshots have been taken from
152    /// this sandbox's execution path from init to here. Incremented
153    /// on each `snapshot` call; on `restore_snapshot` we inherit the
154    /// restored snapshot's own generation number so the guest-visible
155    /// counter tracks which snapshot the sandbox is a clone of.
156    pub(crate) snapshot_count: u64,
157}
158
159/// Buffer for building guest page tables during snapshot creation.
160/// `TableAddr` is an absolute GPA (u64) so the same address space is
161/// used regardless of entry size.
162pub(crate) struct GuestPageTableBuffer {
163    buffer: std::cell::RefCell<Vec<u8>>,
164    phys_base: usize,
165    /// Absolute GPA of the currently-active root table. For
166    /// multi-root guests, `set_root` switches which root subsequent
167    /// `vmem::map` / `vmem::space_aware_map` calls target — typically
168    /// to an address previously returned by `alloc_table`.
169    root: std::cell::Cell<u64>,
170}
171
172impl vmem::TableReadOps for GuestPageTableBuffer {
173    type TableAddr = u64;
174
175    fn entry_addr(addr: u64, offset: u64) -> u64 {
176        addr + offset
177    }
178
179    unsafe fn read_entry(&self, addr: u64) -> vmem::PageTableEntry {
180        let buffer = self.buffer.borrow();
181        let byte_offset = addr as usize - self.phys_base;
182        let pte_size = core::mem::size_of::<vmem::PageTableEntry>();
183        let Some(bytes) = buffer.get(byte_offset..byte_offset + pte_size) else {
184            return 0;
185        };
186        let mut buf = [0u8; 8];
187        buf[..pte_size].copy_from_slice(bytes);
188        vmem::PageTableEntry::from_le_bytes(buf[..pte_size].try_into().unwrap_or_default())
189    }
190
191    fn to_phys(addr: u64) -> vmem::PhysAddr {
192        addr as vmem::PhysAddr
193    }
194
195    fn from_phys(addr: vmem::PhysAddr) -> u64 {
196        #[allow(clippy::unnecessary_cast)]
197        {
198            addr as u64
199        }
200    }
201
202    fn root_table(&self) -> u64 {
203        self.root.get()
204    }
205}
206
207impl vmem::TableOps for GuestPageTableBuffer {
208    type TableMovability = vmem::MayNotMoveTable;
209
210    unsafe fn alloc_table(&self) -> u64 {
211        let mut b = self.buffer.borrow_mut();
212        let offset = b.len();
213        b.resize(offset + PAGE_TABLE_SIZE, 0);
214        (self.phys_base + offset) as u64
215    }
216
217    unsafe fn write_entry(&self, addr: u64, entry: vmem::PageTableEntry) -> Option<vmem::Void> {
218        let mut b = self.buffer.borrow_mut();
219        let byte_offset = addr as usize - self.phys_base;
220        let pte_size = core::mem::size_of::<vmem::PageTableEntry>();
221        if let Some(slice) = b.get_mut(byte_offset..byte_offset + pte_size) {
222            slice.copy_from_slice(&entry.to_le_bytes()[..pte_size]);
223        }
224        None
225    }
226
227    unsafe fn update_root(&self, impossible: vmem::Void) {
228        match impossible {}
229    }
230}
231
232impl core::convert::AsRef<GuestPageTableBuffer> for GuestPageTableBuffer {
233    fn as_ref(&self) -> &Self {
234        self
235    }
236}
237
238impl GuestPageTableBuffer {
239    /// Create a new buffer with an initial zeroed root table at
240    /// `phys_base`. The returned buffer's current root is `phys_base`;
241    /// additional roots can be obtained by calling `alloc_table`.
242    pub(crate) fn new(phys_base: usize) -> Self {
243        GuestPageTableBuffer {
244            buffer: std::cell::RefCell::new(vec![0u8; PAGE_TABLE_SIZE]),
245            phys_base,
246            root: std::cell::Cell::new(phys_base as u64),
247        }
248    }
249
250    /// Switch the active root. `addr` must have been obtained either
251    /// as the initial root GPA (`phys_base`) or via `alloc_table`.
252    pub(crate) fn set_root(&self, addr: u64) {
253        self.root.set(addr);
254    }
255
256    /// GPA of the initial root allocated by `new`.
257    pub(crate) fn initial_root(&self) -> u64 {
258        self.phys_base as u64
259    }
260
261    #[cfg(test)]
262    #[allow(dead_code)]
263    pub(crate) fn size(&self) -> usize {
264        self.buffer.borrow().len()
265    }
266
267    pub(crate) fn into_bytes(self) -> Box<[u8]> {
268        self.buffer.into_inner().into_boxed_slice()
269    }
270}
271
272impl<S> SandboxMemoryManager<S>
273where
274    S: SharedMemory,
275{
276    /// Create a new `SandboxMemoryManager` with the given parameters
277    #[instrument(skip_all, parent = Span::current(), level= "Trace")]
278    pub(crate) fn new(
279        layout: SandboxMemoryLayout,
280        shared_mem: SnapshotSharedMemory<S>,
281        scratch_mem: S,
282        entrypoint: NextAction,
283    ) -> Self {
284        Self {
285            layout,
286            shared_mem,
287            scratch_mem,
288            entrypoint,
289            mapped_rgns: 0,
290            abort_buffer: Vec::new(),
291            snapshot_count: 0,
292        }
293    }
294
295    /// Get mutable access to the abort buffer
296    pub(crate) fn get_abort_buffer_mut(&mut self) -> &mut Vec<u8> {
297        &mut self.abort_buffer
298    }
299
300    /// Create a snapshot with the given mapped regions
301    pub(crate) fn snapshot(
302        &mut self,
303        sandbox_id: u64,
304        mapped_regions: Vec<MemoryRegion>,
305        root_pt_gpas: &[u64],
306        rsp_gva: u64,
307        sregs: CommonSpecialRegisters,
308        entrypoint: NextAction,
309    ) -> Result<Snapshot> {
310        self.snapshot_count += 1;
311        Snapshot::new(
312            &mut self.shared_mem,
313            &mut self.scratch_mem,
314            sandbox_id,
315            self.layout,
316            crate::mem::exe::LoadInfo::dummy(),
317            mapped_regions,
318            root_pt_gpas,
319            rsp_gva,
320            sregs,
321            entrypoint,
322            self.snapshot_count,
323        )
324    }
325}
326
327impl SandboxMemoryManager<ExclusiveSharedMemory> {
328    pub(crate) fn from_snapshot(s: &Snapshot) -> Result<Self> {
329        let layout = *s.layout();
330        let shared_mem = s.memory().to_mgr_snapshot_mem()?;
331        let scratch_mem = ExclusiveSharedMemory::new(s.layout().get_scratch_size())?;
332        let entrypoint = s.entrypoint();
333        Ok(Self::new(layout, shared_mem, scratch_mem, entrypoint))
334    }
335
336    /// Wraps ExclusiveSharedMemory::build
337    // Morally, this should not have to be a Result: this operation is
338    // infallible. The source of the Result is
339    // update_scratch_bookkeeping(), which calls functions that can
340    // fail due to bounds checks (which are statically known to be ok
341    // in this situation) or due to failing to take the scratch shared
342    // memory lock, but the scratch shared memory is built in this
343    // function, its lock does not escape before the end of the
344    // function, and the lock is taken by no other code path, so we
345    // know it is not contended.
346    pub fn build(
347        self,
348    ) -> Result<(
349        SandboxMemoryManager<HostSharedMemory>,
350        SandboxMemoryManager<GuestSharedMemory>,
351    )> {
352        let (hshm, gshm) = self.shared_mem.build();
353        let (hscratch, gscratch) = self.scratch_mem.build();
354        let mut host_mgr = SandboxMemoryManager {
355            shared_mem: hshm,
356            scratch_mem: hscratch,
357            layout: self.layout,
358            entrypoint: self.entrypoint,
359            mapped_rgns: self.mapped_rgns,
360            abort_buffer: self.abort_buffer,
361            snapshot_count: self.snapshot_count,
362        };
363        let guest_mgr = SandboxMemoryManager {
364            shared_mem: gshm,
365            scratch_mem: gscratch,
366            layout: self.layout,
367            entrypoint: self.entrypoint,
368            mapped_rgns: self.mapped_rgns,
369            abort_buffer: Vec::new(), // Guest doesn't need abort buffer
370            snapshot_count: self.snapshot_count,
371        };
372        host_mgr.update_scratch_bookkeeping()?;
373        Ok((host_mgr, guest_mgr))
374    }
375}
376
377impl SandboxMemoryManager<HostSharedMemory> {
378    /// Write a [`FileMappingInfo`] entry into the PEB's preallocated array.
379    ///
380    /// Reads the current entry count from the PEB, validates that the
381    /// array isn't full ([`MAX_FILE_MAPPINGS`]), writes the entry at the
382    /// next available slot, and increments the count.
383    ///
384    /// This is the **only** place that writes to the PEB file mappings
385    /// array — both `MultiUseSandbox::map_file_cow` and the evolve loop
386    /// call through here so the logic is not duplicated.
387    ///
388    /// # Errors
389    ///
390    /// Returns an error if [`MAX_FILE_MAPPINGS`] has been reached.
391    ///
392    /// [`FileMappingInfo`]: hyperlight_common::mem::FileMappingInfo
393    /// [`MAX_FILE_MAPPINGS`]: hyperlight_common::mem::MAX_FILE_MAPPINGS
394    #[cfg(feature = "nanvix-unstable")]
395    pub(crate) fn write_file_mapping_entry(
396        &mut self,
397        guest_addr: u64,
398        size: u64,
399        label: &[u8; hyperlight_common::mem::FILE_MAPPING_LABEL_MAX_LEN + 1],
400    ) -> Result<()> {
401        use hyperlight_common::mem::{FileMappingInfo, MAX_FILE_MAPPINGS};
402
403        // Read the current entry count from the PEB. This is the source
404        // of truth — it survives snapshot/restore because the PEB is
405        // part of shared memory that gets snapshotted.
406        let current_count =
407            self.shared_mem
408                .read::<u64>(self.layout.get_file_mappings_size_offset())? as usize;
409
410        if current_count >= MAX_FILE_MAPPINGS {
411            return Err(crate::new_error!(
412                "file mapping limit reached ({} of {})",
413                current_count,
414                MAX_FILE_MAPPINGS,
415            ));
416        }
417
418        // Write the entry into the next available slot.
419        let entry_offset = self.layout.get_file_mappings_array_offset()
420            + current_count * std::mem::size_of::<FileMappingInfo>();
421        let guest_addr_offset = offset_of!(FileMappingInfo, guest_addr);
422        let size_offset = offset_of!(FileMappingInfo, size);
423        let label_offset = offset_of!(FileMappingInfo, label);
424        self.shared_mem
425            .write::<u64>(entry_offset + guest_addr_offset, guest_addr)?;
426        self.shared_mem
427            .write::<u64>(entry_offset + size_offset, size)?;
428        self.shared_mem
429            .copy_from_slice(label, entry_offset + label_offset)?;
430
431        // Increment the entry count.
432        let new_count = (current_count + 1) as u64;
433        self.shared_mem
434            .write::<u64>(self.layout.get_file_mappings_size_offset(), new_count)?;
435
436        Ok(())
437    }
438
439    /// Reads a host function call from memory
440    #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
441    pub(crate) fn get_host_function_call(&mut self) -> Result<FunctionCall> {
442        self.scratch_mem.try_pop_buffer_into::<FunctionCall>(
443            self.layout.get_output_data_buffer_scratch_host_offset(),
444            self.layout.sandbox_memory_config.get_output_data_size(),
445        )
446    }
447
448    /// Writes a host function call result to memory
449    #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
450    pub(crate) fn write_response_from_host_function_call(
451        &mut self,
452        res: &FunctionCallResult,
453    ) -> Result<()> {
454        let mut builder = FlatBufferBuilder::new();
455        let data = res.encode(&mut builder);
456
457        self.scratch_mem.push_buffer(
458            self.layout.get_input_data_buffer_scratch_host_offset(),
459            self.layout.sandbox_memory_config.get_input_data_size(),
460            data,
461        )
462    }
463
464    /// Writes a guest function call to memory
465    #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
466    pub(crate) fn write_guest_function_call(&mut self, buffer: &[u8]) -> Result<()> {
467        validate_guest_function_call_buffer(buffer).map_err(|e| {
468            new_error!(
469                "Guest function call buffer validation failed: {}",
470                e.to_string()
471            )
472        })?;
473
474        self.scratch_mem.push_buffer(
475            self.layout.get_input_data_buffer_scratch_host_offset(),
476            self.layout.sandbox_memory_config.get_input_data_size(),
477            buffer,
478        )?;
479        Ok(())
480    }
481
482    /// Reads a function call result from memory.
483    /// A function call result can be either an error or a successful return value.
484    #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
485    pub(crate) fn get_guest_function_call_result(&mut self) -> Result<FunctionCallResult> {
486        self.scratch_mem.try_pop_buffer_into::<FunctionCallResult>(
487            self.layout.get_output_data_buffer_scratch_host_offset(),
488            self.layout.sandbox_memory_config.get_output_data_size(),
489        )
490    }
491
492    /// Read guest log data from the `SharedMemory` contained within `self`
493    #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
494    pub(crate) fn read_guest_log_data(&mut self) -> Result<GuestLogData> {
495        self.scratch_mem.try_pop_buffer_into::<GuestLogData>(
496            self.layout.get_output_data_buffer_scratch_host_offset(),
497            self.layout.sandbox_memory_config.get_output_data_size(),
498        )
499    }
500
501    pub(crate) fn clear_io_buffers(&mut self) {
502        // Clear the output data buffer
503        loop {
504            let Ok(_) = self.scratch_mem.try_pop_buffer_into::<Vec<u8>>(
505                self.layout.get_output_data_buffer_scratch_host_offset(),
506                self.layout.sandbox_memory_config.get_output_data_size(),
507            ) else {
508                break;
509            };
510        }
511        // Clear the input data buffer
512        loop {
513            let Ok(_) = self.scratch_mem.try_pop_buffer_into::<Vec<u8>>(
514                self.layout.get_input_data_buffer_scratch_host_offset(),
515                self.layout.sandbox_memory_config.get_input_data_size(),
516            ) else {
517                break;
518            };
519        }
520    }
521
522    /// This function restores a memory snapshot from a given snapshot.
523    pub(crate) fn restore_snapshot(
524        &mut self,
525        snapshot: &Snapshot,
526    ) -> Result<(
527        Option<SnapshotSharedMemory<GuestSharedMemory>>,
528        Option<GuestSharedMemory>,
529    )> {
530        let gsnapshot = if *snapshot.memory() == self.shared_mem {
531            // If the snapshot memory is already the correct memory,
532            // which is readonly, don't bother with restoring it,
533            // since its contents must be the same.  Note that in the
534            // #[cfg(unshared_snapshot_mem)] case, this condition will
535            // never be true, since even immediately after a restore,
536            // self.shared_mem is a (writable) copy, not the original
537            // shared_mem.
538            None
539        } else {
540            let new_snapshot_mem = snapshot.memory().to_mgr_snapshot_mem()?;
541            let (hsnapshot, gsnapshot) = new_snapshot_mem.build();
542            self.shared_mem = hsnapshot;
543            Some(gsnapshot)
544        };
545        let new_scratch_size = snapshot.layout().get_scratch_size();
546        let gscratch = if new_scratch_size == self.scratch_mem.mem_size() {
547            self.scratch_mem.zero()?;
548            None
549        } else {
550            let new_scratch_mem = ExclusiveSharedMemory::new(new_scratch_size)?;
551            let (hscratch, gscratch) = new_scratch_mem.build();
552            // Even though this destroys the reference to the host
553            // side of the old scratch mapping, the VM should still
554            // own the reference to the guest side of the old scratch
555            // mapping, so it won't actually be deallocated until it
556            // has been unmapped from the VM.
557            self.scratch_mem = hscratch;
558
559            Some(gscratch)
560        };
561        self.layout = *snapshot.layout();
562        // Inherit the snapshot's own generation number — the
563        // guest-visible counter reflects "which snapshot is the
564        // sandbox currently a clone of", not "how many restores have
565        // happened into this (possibly-reused) partition".
566        self.snapshot_count = snapshot.snapshot_generation();
567
568        self.update_scratch_bookkeeping()?;
569        Ok((gsnapshot, gscratch))
570    }
571
572    #[inline]
573    fn update_scratch_bookkeeping_item(&mut self, offset: u64, value: u64) -> Result<()> {
574        let scratch_size = self.scratch_mem.mem_size();
575        let base_offset = scratch_size - offset as usize;
576        self.scratch_mem.write::<u64>(base_offset, value)
577    }
578
579    fn update_scratch_bookkeeping(&mut self) -> Result<()> {
580        use hyperlight_common::layout::*;
581        let scratch_size = self.scratch_mem.mem_size();
582        self.update_scratch_bookkeeping_item(SCRATCH_TOP_SIZE_OFFSET, scratch_size as u64)?;
583        self.update_scratch_bookkeeping_item(
584            SCRATCH_TOP_ALLOCATOR_OFFSET,
585            self.layout.get_first_free_scratch_gpa(),
586        )?;
587        // Record the GPA of the snapshot's copy of the page tables.
588        // The copy lives at the tail of the snapshot blob; we copy it
589        // into scratch below so the guest walker can run against
590        // mutable, TLB-fresh tables. The guest reads this GPA during
591        // CoW fault-in to follow the original PTs on the first write
592        // — until the HV can execute directly out of the
593        // snapshot-resident PTs, at which point the whole split goes
594        // away.
595        self.update_scratch_bookkeeping_item(
596            SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET,
597            self.layout.get_pt_base_gpa(),
598        )?;
599        self.update_scratch_bookkeeping_item(
600            SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET,
601            self.snapshot_count,
602        )?;
603
604        // Initialise the guest input and output data buffers in
605        // scratch memory. TODO: remove the need for this.
606        self.scratch_mem.write::<u64>(
607            self.layout.get_input_data_buffer_scratch_host_offset(),
608            SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES,
609        )?;
610        self.scratch_mem.write::<u64>(
611            self.layout.get_output_data_buffer_scratch_host_offset(),
612            SandboxMemoryLayout::STACK_POINTER_SIZE_BYTES,
613        )?;
614
615        // Copy page tables from `shared_mem` into scratch. PT bytes
616        // are appended to the snapshot blob at build time and live
617        // just past the end of the guest-visible KVM slot (see
618        // `Snapshot::new`). Keeping them outside the KVM slot avoids
619        // overlapping with `map_file_cow` regions installed
620        // immediately after the snapshot in the guest PA space.
621        let snapshot_pt_end = self.shared_mem.mem_size();
622        let snapshot_pt_size = self.layout.get_pt_size();
623        let snapshot_pt_start = snapshot_pt_end - snapshot_pt_size;
624        self.scratch_mem.with_exclusivity(|scratch| {
625            #[cfg(not(unshared_snapshot_mem))]
626            let bytes = &self.shared_mem.as_slice()[snapshot_pt_start..snapshot_pt_end];
627            #[cfg(unshared_snapshot_mem)]
628            let bytes = {
629                let mut bytes = vec![0u8; snapshot_pt_size];
630                self.shared_mem
631                    .copy_to_slice(&mut bytes, snapshot_pt_start)?;
632                bytes
633            };
634            #[allow(clippy::needless_borrow)]
635            scratch.copy_from_slice(&bytes, self.layout.get_pt_base_scratch_offset())
636        })??;
637
638        Ok(())
639    }
640
641    /// Build the list of guest memory regions for a crash dump.
642    ///
643    /// By default, walks the guest page tables to discover
644    /// GVA→GPA mappings and translates them to host-backed regions.
645    #[cfg(all(feature = "crashdump", not(feature = "i686-guest")))]
646    pub(crate) fn get_guest_memory_regions(
647        &mut self,
648        root_pt: u64,
649        mmap_regions: &[MemoryRegion],
650    ) -> Result<Vec<CrashDumpRegion>> {
651        use crate::sandbox::snapshot::SharedMemoryPageTableBuffer;
652
653        let len = hyperlight_common::layout::MAX_GVA;
654
655        let regions = self.shared_mem.with_contents(|snapshot| {
656            self.scratch_mem.with_contents(|scratch| {
657                let pt_buf =
658                    SharedMemoryPageTableBuffer::new(snapshot, scratch, self.layout, root_pt);
659
660                let mappings: Vec<_> =
661                    unsafe { hyperlight_common::vmem::virt_to_phys(&pt_buf, 0, len as u64) }
662                        .collect();
663
664                if mappings.is_empty() {
665                    return Err(new_error!("No page table mappings found (len {len})",));
666                }
667
668                let mut regions: Vec<CrashDumpRegion> = Vec::new();
669                for mapping in &mappings {
670                    let virt_base = mapping.virt_base as usize;
671                    let virt_end = (mapping.virt_base + mapping.len) as usize;
672
673                    if let Some(resolved) = self.layout.resolve_gpa(mapping.phys_base, mmap_regions)
674                    {
675                        let (flags, region_type) = mapping_kind_to_flags(&mapping.kind);
676                        let resolved = resolved.with_memories(snapshot, scratch);
677                        let contents = resolved.as_ref();
678                        let host_base = contents.as_ptr() as usize;
679                        let host_len = (mapping.len as usize).min(contents.len());
680
681                        if try_coalesce_region(&mut regions, virt_base, virt_end, host_base, flags)
682                        {
683                            continue;
684                        }
685
686                        regions.push(CrashDumpRegion {
687                            guest_region: virt_base..virt_end,
688                            host_region: host_base..host_base + host_len,
689                            flags,
690                            region_type,
691                        });
692                    }
693                }
694
695                Ok(regions)
696            })
697        })???;
698
699        Ok(regions)
700    }
701
702    /// Build the list of guest memory regions for a crash dump (non-paging).
703    ///
704    /// Without paging, GVA == GPA (identity mapped), so we return the
705    /// snapshot and scratch regions directly at their known addresses
706    /// alongside any dynamic mmap regions.
707    #[cfg(all(feature = "crashdump", feature = "i686-guest"))]
708    pub(crate) fn get_guest_memory_regions(
709        &mut self,
710        _root_pt: u64,
711        mmap_regions: &[MemoryRegion],
712    ) -> Result<Vec<CrashDumpRegion>> {
713        use crate::mem::memory_region::HostGuestMemoryRegion;
714
715        let snapshot_base = SandboxMemoryLayout::BASE_ADDRESS;
716        let snapshot_size = self.shared_mem.mem_size();
717        let snapshot_host = self.shared_mem.base_addr();
718
719        let scratch_size = self.scratch_mem.mem_size();
720        let scratch_gva = hyperlight_common::layout::scratch_base_gva(scratch_size) as usize;
721        let scratch_host = self.scratch_mem.base_addr();
722
723        let mut regions = vec![
724            CrashDumpRegion {
725                guest_region: snapshot_base..snapshot_base + snapshot_size,
726                host_region: snapshot_host..snapshot_host + snapshot_size,
727                flags: MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE,
728                region_type: MemoryRegionType::Snapshot,
729            },
730            CrashDumpRegion {
731                guest_region: scratch_gva..scratch_gva + scratch_size,
732                host_region: scratch_host..scratch_host + scratch_size,
733                flags: MemoryRegionFlags::READ
734                    | MemoryRegionFlags::WRITE
735                    | MemoryRegionFlags::EXECUTE,
736                region_type: MemoryRegionType::Scratch,
737            },
738        ];
739        for rgn in mmap_regions {
740            regions.push(CrashDumpRegion {
741                guest_region: rgn.guest_region.clone(),
742                host_region: HostGuestMemoryRegion::to_addr(rgn.host_region.start)
743                    ..HostGuestMemoryRegion::to_addr(rgn.host_region.end),
744                flags: rgn.flags,
745                region_type: rgn.region_type,
746            });
747        }
748
749        Ok(regions)
750    }
751
752    /// Read guest memory at a Guest Virtual Address (GVA) by walking the
753    /// page tables to translate GVA → GPA, then reading from the correct
754    /// backing memory (shared_mem or scratch_mem).
755    ///
756    /// This is necessary because with Copy-on-Write (CoW) the guest's
757    /// virtual pages are backed by physical pages in the scratch
758    /// region rather than being identity-mapped.
759    ///
760    /// # Arguments
761    /// * `gva` - The Guest Virtual Address to read from
762    /// * `len` - The number of bytes to read
763    /// * `root_pt` - The root page table physical address (CR3)
764    #[cfg(feature = "trace_guest")]
765    pub(crate) fn read_guest_memory_by_gva(
766        &mut self,
767        gva: u64,
768        len: usize,
769        root_pt: u64,
770    ) -> Result<Vec<u8>> {
771        use hyperlight_common::vmem::PAGE_SIZE;
772
773        use crate::sandbox::snapshot::{SharedMemoryPageTableBuffer, access_gpa};
774
775        self.shared_mem.with_contents(|snap| {
776            self.scratch_mem.with_contents(|scratch| {
777                let pt_buf = SharedMemoryPageTableBuffer::new(snap, scratch, self.layout, root_pt);
778
779                // Walk page tables to get all mappings that cover the GVA range
780                let mappings: Vec<_> = unsafe {
781                    hyperlight_common::vmem::virt_to_phys(&pt_buf, gva, len as u64)
782                }
783                .collect();
784
785                if mappings.is_empty() {
786                    return Err(new_error!(
787                        "No page table mappings found for GVA {:#x} (len {})",
788                        gva,
789                        len,
790                    ));
791                }
792
793                // Resulting vector of bytes to return
794                let mut result = Vec::with_capacity(len);
795                let mut current_gva = gva;
796
797                for mapping in &mappings {
798                    // The page table walker should only return valid mappings
799                    // that cover our current read position.
800                    if mapping.virt_base > current_gva {
801                        return Err(new_error!(
802                            "Page table walker returned mapping with virt_base {:#x} > current read position {:#x}",
803                            mapping.virt_base,
804                            current_gva,
805                        ));
806                    }
807
808                    // Calculate the offset within this page where to start copying
809                    let page_offset = (current_gva - mapping.virt_base) as usize;
810
811                    let bytes_remaining = len - result.len();
812                    let available_in_page = PAGE_SIZE - page_offset;
813                    let bytes_to_copy = bytes_remaining.min(available_in_page);
814
815                    // Translate the GPA to host memory
816                    let gpa = mapping.phys_base + page_offset as u64;
817                    let (mem, offset) = access_gpa(snap, scratch, self.layout, gpa)
818                        .ok_or_else(|| {
819                            new_error!(
820                                "Failed to resolve GPA {:#x} to host memory (GVA {:#x})",
821                                gpa,
822                                gva
823                            )
824                        })?;
825
826                    let slice = mem
827                        .get(offset..offset + bytes_to_copy)
828                        .ok_or_else(|| {
829                            new_error!(
830                                "GPA {:#x} resolved to out-of-bounds host offset {} (need {} bytes)",
831                                gpa,
832                                offset,
833                                bytes_to_copy
834                            )
835                        })?;
836
837                    result.extend_from_slice(slice);
838                    current_gva += bytes_to_copy as u64;
839                }
840
841                if result.len() != len {
842                    tracing::error!(
843                        "Page table walker returned mappings that don't cover the full requested length: got {}, expected {}",
844                        result.len(),
845                        len,
846                    );
847                    return Err(new_error!(
848                        "Could not read full GVA range: got {} of {} bytes {:?}",
849                        result.len(),
850                        len,
851                        mappings
852                    ));
853                }
854
855                Ok(result)
856            })
857        })??
858    }
859}
860
861#[cfg(test)]
862#[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))]
863mod tests {
864    use hyperlight_common::vmem::{MappingKind, PAGE_TABLE_SIZE};
865    use hyperlight_testing::sandbox_sizes::{LARGE_HEAP_SIZE, MEDIUM_HEAP_SIZE, SMALL_HEAP_SIZE};
866    use hyperlight_testing::simple_guest_as_string;
867
868    use crate::GuestBinary;
869    use crate::mem::memory_region::MemoryRegionFlags;
870    use crate::sandbox::SandboxConfiguration;
871    use crate::sandbox::snapshot::Snapshot;
872
873    /// Verify page tables for a given configuration.
874    /// Creates a Snapshot and verifies every page in every region has correct PTEs.
875    fn verify_page_tables(name: &str, config: SandboxConfiguration) {
876        let path = simple_guest_as_string().expect("failed to get simple guest path");
877        let snapshot = Snapshot::from_env(GuestBinary::FilePath(path), config)
878            .unwrap_or_else(|e| panic!("{}: failed to create snapshot: {}", name, e));
879
880        let regions = snapshot.regions();
881
882        // Verify NULL page (0x0) is NOT mapped
883        assert!(
884            unsafe { hyperlight_common::vmem::virt_to_phys(&snapshot, 0, 1) }
885                .next()
886                .is_none(),
887            "{}: NULL page (0x0) should NOT be mapped",
888            name
889        );
890
891        // Verify every page in every region
892        for region in regions {
893            let mut addr = region.guest_region.start as u64;
894
895            while addr < region.guest_region.end as u64 {
896                let mapping = unsafe { hyperlight_common::vmem::virt_to_phys(&snapshot, addr, 1) }
897                    .next()
898                    .unwrap_or_else(|| {
899                        panic!(
900                            "{}: {:?} region: address 0x{:x} is not mapped",
901                            name, region.region_type, addr
902                        )
903                    });
904
905                // Verify identity mapping (phys == virt for low memory)
906                assert_eq!(
907                    mapping.phys_base, addr,
908                    "{}: {:?} region: address 0x{:x} should identity map, got phys 0x{:x}",
909                    name, region.region_type, addr, mapping.phys_base
910                );
911
912                // Verify kind is Basic
913                let MappingKind::Basic(bm) = mapping.kind else {
914                    panic!(
915                        "{}: {:?} region: address 0x{:x} should be kind basic, got {:?}",
916                        name, region.region_type, addr, mapping.kind
917                    );
918                };
919
920                // Verify writable
921                let actual = bm.writable;
922                let expected = region.flags.contains(MemoryRegionFlags::WRITE);
923                assert_eq!(
924                    actual, expected,
925                    "{}: {:?} region: address 0x{:x} has writable {}, expected {} (region flags: {:?})",
926                    name, region.region_type, addr, actual, expected, region.flags
927                );
928
929                // Verify executable
930                let actual = bm.executable;
931                let expected = region.flags.contains(MemoryRegionFlags::EXECUTE);
932                assert_eq!(
933                    actual, expected,
934                    "{}: {:?} region: address 0x{:x} has executable {}, expected {} (region flags: {:?})",
935                    name, region.region_type, addr, actual, expected, region.flags
936                );
937
938                addr += PAGE_TABLE_SIZE as u64;
939            }
940        }
941    }
942
943    #[test]
944    fn test_page_tables_for_various_configurations() {
945        let test_cases: [(&str, SandboxConfiguration); 4] = [
946            ("default", { SandboxConfiguration::default() }),
947            ("small (8MB heap)", {
948                let mut cfg = SandboxConfiguration::default();
949                cfg.set_heap_size(SMALL_HEAP_SIZE);
950                cfg
951            }),
952            ("medium (64MB heap)", {
953                let mut cfg = SandboxConfiguration::default();
954                cfg.set_heap_size(MEDIUM_HEAP_SIZE);
955                cfg
956            }),
957            ("large (256MB heap)", {
958                let mut cfg = SandboxConfiguration::default();
959                cfg.set_heap_size(LARGE_HEAP_SIZE);
960                cfg.set_scratch_size(0x100000);
961                cfg
962            }),
963        ];
964
965        for (name, config) in test_cases {
966            verify_page_tables(name, config);
967        }
968    }
969}