hyperlight_host/hypervisor/
hyperv_linux.rs

1/*
2Copyright 2025  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17#[cfg(mshv2)]
18extern crate mshv_bindings2 as mshv_bindings;
19#[cfg(mshv2)]
20extern crate mshv_ioctls2 as mshv_ioctls;
21
22#[cfg(mshv3)]
23extern crate mshv_bindings3 as mshv_bindings;
24#[cfg(mshv3)]
25extern crate mshv_ioctls3 as mshv_ioctls;
26
27use std::fmt::{Debug, Formatter};
28use std::sync::Arc;
29use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
30
31use log::{LevelFilter, error};
32#[cfg(mshv2)]
33use mshv_bindings::hv_message;
34use mshv_bindings::{
35    FloatingPointUnit, SegmentRegister, SpecialRegisters, StandardRegisters, hv_message_type,
36    hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA,
37    hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_register_assoc,
38    hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region,
39};
40#[cfg(gdb)]
41use mshv_bindings::{
42    HV_INTERCEPT_ACCESS_MASK_EXECUTE, hv_intercept_parameters,
43    hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT,
44    mshv_install_intercept,
45};
46#[cfg(mshv3)]
47use mshv_bindings::{
48    hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES,
49    hv_partition_synthetic_processor_features,
50};
51use mshv_ioctls::{Mshv, MshvError, VcpuFd, VmFd};
52use tracing::{Span, instrument};
53#[cfg(crashdump)]
54use {super::crashdump, std::path::Path};
55
56use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT};
57#[cfg(gdb)]
58use super::gdb::{
59    DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, MshvDebug, VcpuStopReason,
60};
61#[cfg(gdb)]
62use super::handlers::DbgMemAccessHandlerWrapper;
63use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
64#[cfg(feature = "init-paging")]
65use super::{
66    CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE,
67    EFER_LMA, EFER_LME, EFER_NX, EFER_SCE,
68};
69use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU};
70#[cfg(gdb)]
71use crate::HyperlightError;
72use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
73use crate::mem::ptr::{GuestPtr, RawPtr};
74use crate::sandbox::SandboxConfiguration;
75#[cfg(crashdump)]
76use crate::sandbox::uninitialized::SandboxRuntimeConfig;
77use crate::{Result, log_then_return, new_error};
78
79#[cfg(gdb)]
80mod debug {
81    use std::sync::{Arc, Mutex};
82
83    use super::mshv_bindings::hv_x64_exception_intercept_message;
84    use super::{HypervLinuxDriver, *};
85    use crate::hypervisor::gdb::{DebugMsg, DebugResponse, VcpuStopReason, X86_64Regs};
86    use crate::hypervisor::handlers::DbgMemAccessHandlerCaller;
87    use crate::{Result, new_error};
88
89    impl HypervLinuxDriver {
90        /// Resets the debug information to disable debugging
91        fn disable_debug(&mut self) -> Result<()> {
92            let mut debug = MshvDebug::default();
93
94            debug.set_single_step(&self.vcpu_fd, false)?;
95
96            self.debug = Some(debug);
97
98            Ok(())
99        }
100
101        /// Get the reason the vCPU has stopped
102        pub(crate) fn get_stop_reason(
103            &mut self,
104            ex_info: hv_x64_exception_intercept_message,
105        ) -> Result<VcpuStopReason> {
106            let debug = self
107                .debug
108                .as_mut()
109                .ok_or_else(|| new_error!("Debug is not enabled"))?;
110
111            debug.get_stop_reason(&self.vcpu_fd, ex_info.exception_vector, self.entrypoint)
112        }
113
114        pub(crate) fn process_dbg_request(
115            &mut self,
116            req: DebugMsg,
117            dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
118        ) -> Result<DebugResponse> {
119            if let Some(debug) = self.debug.as_mut() {
120                match req {
121                    DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint(
122                        debug
123                            .add_hw_breakpoint(&self.vcpu_fd, addr)
124                            .map_err(|e| {
125                                log::error!("Failed to add hw breakpoint: {:?}", e);
126
127                                e
128                            })
129                            .is_ok(),
130                    )),
131                    DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint(
132                        debug
133                            .add_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn)
134                            .map_err(|e| {
135                                log::error!("Failed to add sw breakpoint: {:?}", e);
136
137                                e
138                            })
139                            .is_ok(),
140                    )),
141                    DebugMsg::Continue => {
142                        debug.set_single_step(&self.vcpu_fd, false).map_err(|e| {
143                            log::error!("Failed to continue execution: {:?}", e);
144
145                            e
146                        })?;
147
148                        Ok(DebugResponse::Continue)
149                    }
150                    DebugMsg::DisableDebug => {
151                        self.disable_debug().map_err(|e| {
152                            log::error!("Failed to disable debugging: {:?}", e);
153
154                            e
155                        })?;
156
157                        Ok(DebugResponse::DisableDebug)
158                    }
159                    DebugMsg::GetCodeSectionOffset => {
160                        let offset = dbg_mem_access_fn
161                            .try_lock()
162                            .map_err(|e| {
163                                new_error!("Error locking at {}:{}: {}", file!(), line!(), e)
164                            })?
165                            .get_code_offset()
166                            .map_err(|e| {
167                                log::error!("Failed to get code offset: {:?}", e);
168
169                                e
170                            })?;
171
172                        Ok(DebugResponse::GetCodeSectionOffset(offset as u64))
173                    }
174                    DebugMsg::ReadAddr(addr, len) => {
175                        let mut data = vec![0u8; len];
176
177                        debug
178                            .read_addrs(&self.vcpu_fd, addr, &mut data, dbg_mem_access_fn)
179                            .map_err(|e| {
180                                log::error!("Failed to read from address: {:?}", e);
181
182                                e
183                            })?;
184
185                        Ok(DebugResponse::ReadAddr(data))
186                    }
187                    DebugMsg::ReadRegisters => {
188                        let mut regs = X86_64Regs::default();
189
190                        debug
191                            .read_regs(&self.vcpu_fd, &mut regs)
192                            .map_err(|e| {
193                                log::error!("Failed to read registers: {:?}", e);
194
195                                e
196                            })
197                            .map(|_| DebugResponse::ReadRegisters(regs))
198                    }
199                    DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint(
200                        debug
201                            .remove_hw_breakpoint(&self.vcpu_fd, addr)
202                            .map_err(|e| {
203                                log::error!("Failed to remove hw breakpoint: {:?}", e);
204
205                                e
206                            })
207                            .is_ok(),
208                    )),
209                    DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint(
210                        debug
211                            .remove_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn)
212                            .map_err(|e| {
213                                log::error!("Failed to remove sw breakpoint: {:?}", e);
214
215                                e
216                            })
217                            .is_ok(),
218                    )),
219                    DebugMsg::Step => {
220                        debug.set_single_step(&self.vcpu_fd, true).map_err(|e| {
221                            log::error!("Failed to enable step instruction: {:?}", e);
222
223                            e
224                        })?;
225
226                        Ok(DebugResponse::Step)
227                    }
228                    DebugMsg::WriteAddr(addr, data) => {
229                        debug
230                            .write_addrs(&self.vcpu_fd, addr, &data, dbg_mem_access_fn)
231                            .map_err(|e| {
232                                log::error!("Failed to write to address: {:?}", e);
233
234                                e
235                            })?;
236
237                        Ok(DebugResponse::WriteAddr)
238                    }
239                    DebugMsg::WriteRegisters(regs) => debug
240                        .write_regs(&self.vcpu_fd, &regs)
241                        .map_err(|e| {
242                            log::error!("Failed to write registers: {:?}", e);
243
244                            e
245                        })
246                        .map(|_| DebugResponse::WriteRegisters),
247                }
248            } else {
249                Err(new_error!("Debugging is not enabled"))
250            }
251        }
252
253        pub(crate) fn recv_dbg_msg(&mut self) -> Result<DebugMsg> {
254            let gdb_conn = self
255                .gdb_conn
256                .as_mut()
257                .ok_or_else(|| new_error!("Debug is not enabled"))?;
258
259            gdb_conn.recv().map_err(|e| {
260                new_error!(
261                    "Got an error while waiting to receive a
262                    message: {:?}",
263                    e
264                )
265            })
266        }
267
268        pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> {
269            log::debug!("Sending {:?}", cmd);
270
271            let gdb_conn = self
272                .gdb_conn
273                .as_mut()
274                .ok_or_else(|| new_error!("Debug is not enabled"))?;
275
276            gdb_conn
277                .send(cmd)
278                .map_err(|e| new_error!("Got an error while sending a response message {:?}", e))
279        }
280    }
281}
282
283/// Determine whether the HyperV for Linux hypervisor API is present
284/// and functional.
285#[instrument(skip_all, parent = Span::current(), level = "Trace")]
286pub(crate) fn is_hypervisor_present() -> bool {
287    match Mshv::new() {
288        Ok(_) => true,
289        Err(_) => {
290            log::info!("MSHV is not available on this system");
291            false
292        }
293    }
294}
295
296/// A Hypervisor driver for HyperV-on-Linux. This hypervisor is often
297/// called the Microsoft Hypervisor (MSHV)
298pub(crate) struct HypervLinuxDriver {
299    _mshv: Mshv,
300    vm_fd: VmFd,
301    vcpu_fd: VcpuFd,
302    entrypoint: u64,
303    mem_regions: Vec<MemoryRegion>,
304    orig_rsp: GuestPtr,
305    interrupt_handle: Arc<LinuxInterruptHandle>,
306
307    #[cfg(gdb)]
308    debug: Option<MshvDebug>,
309    #[cfg(gdb)]
310    gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
311    #[cfg(crashdump)]
312    rt_cfg: SandboxRuntimeConfig,
313}
314
315impl HypervLinuxDriver {
316    /// Create a new `HypervLinuxDriver`, complete with all registers
317    /// set up to execute a Hyperlight binary inside a HyperV-powered
318    /// sandbox on Linux.
319    ///
320    /// While registers are set up, they will not have been applied to
321    /// the underlying virtual CPU after this function returns. Call the
322    /// `apply_registers` method to do that, or more likely call
323    /// `initialise` to do it for you.
324    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
325    pub(crate) fn new(
326        mem_regions: Vec<MemoryRegion>,
327        entrypoint_ptr: GuestPtr,
328        rsp_ptr: GuestPtr,
329        pml4_ptr: GuestPtr,
330        config: &SandboxConfiguration,
331        #[cfg(gdb)] gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
332        #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig,
333    ) -> Result<Self> {
334        let mshv = Mshv::new()?;
335        let pr = Default::default();
336        #[cfg(mshv2)]
337        let vm_fd = mshv.create_vm_with_config(&pr)?;
338        #[cfg(mshv3)]
339        let vm_fd = {
340            // It's important to avoid create_vm() and explicitly use
341            // create_vm_with_args() with an empty arguments structure
342            // here, because otherwise the partition is set up with a SynIC.
343
344            let vm_fd = mshv.create_vm_with_args(&pr)?;
345            let features: hv_partition_synthetic_processor_features = Default::default();
346            vm_fd.hvcall_set_partition_property(
347                hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES,
348                unsafe { features.as_uint64[0] },
349            )?;
350            vm_fd.initialize()?;
351            vm_fd
352        };
353
354        let mut vcpu_fd = vm_fd.create_vcpu(0)?;
355
356        #[cfg(gdb)]
357        let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn {
358            let mut debug = MshvDebug::new();
359            debug.add_hw_breakpoint(&vcpu_fd, entrypoint_ptr.absolute()?)?;
360
361            // The bellow intercepts make the vCPU exit with the Exception Intercept exit code
362            // Check Table 6-1. Exceptions and Interrupts at Page 6-13 Vol. 1
363            // of Intel 64 and IA-32 Architectures Software Developer's Manual
364            // Install intercept for #DB (1) exception
365            vm_fd
366                .install_intercept(mshv_install_intercept {
367                    access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
368                    intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION,
369                    // Exception handler #DB (1)
370                    intercept_parameter: hv_intercept_parameters {
371                        exception_vector: 0x1,
372                    },
373                })
374                .map_err(|e| new_error!("Cannot install debug exception intercept: {}", e))?;
375
376            // Install intercept for #BP (3) exception
377            vm_fd
378                .install_intercept(mshv_install_intercept {
379                    access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
380                    intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION,
381                    // Exception handler #BP (3)
382                    intercept_parameter: hv_intercept_parameters {
383                        exception_vector: 0x3,
384                    },
385                })
386                .map_err(|e| new_error!("Cannot install breakpoint exception intercept: {}", e))?;
387
388            (Some(debug), Some(gdb_conn))
389        } else {
390            (None, None)
391        };
392
393        mem_regions.iter().try_for_each(|region| {
394            let mshv_region = region.to_owned().into();
395            vm_fd.map_user_memory(mshv_region)
396        })?;
397
398        Self::setup_initial_sregs(&mut vcpu_fd, pml4_ptr.absolute()?)?;
399
400        let interrupt_handle = Arc::new(LinuxInterruptHandle {
401            running: AtomicU64::new(0),
402            cancel_requested: AtomicBool::new(false),
403            #[cfg(gdb)]
404            debug_interrupt: AtomicBool::new(false),
405            #[cfg(all(
406                target_arch = "x86_64",
407                target_vendor = "unknown",
408                target_os = "linux",
409                target_env = "musl"
410            ))]
411            tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }),
412            #[cfg(not(all(
413                target_arch = "x86_64",
414                target_vendor = "unknown",
415                target_os = "linux",
416                target_env = "musl"
417            )))]
418            tid: AtomicU64::new(unsafe { libc::pthread_self() }),
419            retry_delay: config.get_interrupt_retry_delay(),
420            sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(),
421            dropped: AtomicBool::new(false),
422        });
423
424        #[allow(unused_mut)]
425        let mut hv = Self {
426            _mshv: mshv,
427            vm_fd,
428            vcpu_fd,
429            mem_regions,
430            entrypoint: entrypoint_ptr.absolute()?,
431            orig_rsp: rsp_ptr,
432            interrupt_handle: interrupt_handle.clone(),
433            #[cfg(gdb)]
434            debug,
435            #[cfg(gdb)]
436            gdb_conn,
437            #[cfg(crashdump)]
438            rt_cfg,
439        };
440
441        // Send the interrupt handle to the GDB thread if debugging is enabled
442        // This is used to allow the GDB thread to stop the vCPU
443        #[cfg(gdb)]
444        hv.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?;
445
446        Ok(hv)
447    }
448
449    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
450    fn setup_initial_sregs(vcpu: &mut VcpuFd, _pml4_addr: u64) -> Result<()> {
451        #[cfg(feature = "init-paging")]
452        let sregs = SpecialRegisters {
453            cr0: CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP,
454            cr4: CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT,
455            cr3: _pml4_addr,
456            efer: EFER_LME | EFER_LMA | EFER_SCE | EFER_NX,
457            cs: SegmentRegister {
458                type_: 11,
459                present: 1,
460                s: 1,
461                l: 1,
462                ..Default::default()
463            },
464            tr: SegmentRegister {
465                limit: 65535,
466                type_: 11,
467                present: 1,
468                ..Default::default()
469            },
470            ..Default::default()
471        };
472
473        #[cfg(not(feature = "init-paging"))]
474        let sregs = SpecialRegisters {
475            cs: SegmentRegister {
476                base: 0,
477                selector: 0,
478                ..Default::default()
479            },
480            ..Default::default()
481        };
482        vcpu.set_sregs(&sregs)?;
483        Ok(())
484    }
485}
486
487impl Debug for HypervLinuxDriver {
488    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
489        let mut f = f.debug_struct("Hyperv Linux Driver");
490
491        f.field("Entrypoint", &self.entrypoint)
492            .field("Original RSP", &self.orig_rsp);
493
494        for region in &self.mem_regions {
495            f.field("Memory Region", &region);
496        }
497
498        let regs = self.vcpu_fd.get_regs();
499
500        if let Ok(regs) = regs {
501            f.field("Registers", &regs);
502        }
503
504        let sregs = self.vcpu_fd.get_sregs();
505
506        if let Ok(sregs) = sregs {
507            f.field("Special Registers", &sregs);
508        }
509
510        f.finish()
511    }
512}
513
514impl Hypervisor for HypervLinuxDriver {
515    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
516    fn initialise(
517        &mut self,
518        peb_addr: RawPtr,
519        seed: u64,
520        page_size: u32,
521        outb_hdl: OutBHandlerWrapper,
522        mem_access_hdl: MemAccessHandlerWrapper,
523        max_guest_log_level: Option<LevelFilter>,
524        #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
525    ) -> Result<()> {
526        let max_guest_log_level: u64 = match max_guest_log_level {
527            Some(level) => level as u64,
528            None => self.get_max_log_level().into(),
529        };
530
531        let regs = StandardRegisters {
532            rip: self.entrypoint,
533            rsp: self.orig_rsp.absolute()?,
534            rflags: 2, //bit 1 of rlags is required to be set
535
536            // function args
537            rdi: peb_addr.into(),
538            rsi: seed,
539            rdx: page_size.into(),
540            rcx: max_guest_log_level,
541
542            ..Default::default()
543        };
544        self.vcpu_fd.set_regs(&regs)?;
545
546        VirtualCPU::run(
547            self.as_mut_hypervisor(),
548            outb_hdl,
549            mem_access_hdl,
550            #[cfg(gdb)]
551            dbg_mem_access_fn,
552        )?;
553
554        Ok(())
555    }
556
557    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
558    fn dispatch_call_from_host(
559        &mut self,
560        dispatch_func_addr: RawPtr,
561        outb_handle_fn: OutBHandlerWrapper,
562        mem_access_fn: MemAccessHandlerWrapper,
563        #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
564    ) -> Result<()> {
565        // Reset general purpose registers, then set RIP and RSP
566        let regs = StandardRegisters {
567            rip: dispatch_func_addr.into(),
568            rsp: self.orig_rsp.absolute()?,
569            rflags: 2, //bit 1 of rlags is required to be set
570            ..Default::default()
571        };
572        self.vcpu_fd.set_regs(&regs)?;
573
574        // reset fpu state
575        let fpu = FloatingPointUnit {
576            fcw: FP_CONTROL_WORD_DEFAULT,
577            ftwx: FP_TAG_WORD_DEFAULT,
578            mxcsr: MXCSR_DEFAULT,
579            ..Default::default() // zero out the rest
580        };
581        self.vcpu_fd.set_fpu(&fpu)?;
582
583        // run
584        VirtualCPU::run(
585            self.as_mut_hypervisor(),
586            outb_handle_fn,
587            mem_access_fn,
588            #[cfg(gdb)]
589            dbg_mem_access_fn,
590        )?;
591
592        Ok(())
593    }
594
595    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
596    fn handle_io(
597        &mut self,
598        port: u16,
599        data: Vec<u8>,
600        rip: u64,
601        instruction_length: u64,
602        outb_handle_fn: OutBHandlerWrapper,
603    ) -> Result<()> {
604        let mut padded = [0u8; 4];
605        let copy_len = data.len().min(4);
606        padded[..copy_len].copy_from_slice(&data[..copy_len]);
607        let val = u32::from_le_bytes(padded);
608
609        outb_handle_fn
610            .try_lock()
611            .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
612            .call(port, val)?;
613
614        // update rip
615        self.vcpu_fd.set_reg(&[hv_register_assoc {
616            name: hv_register_name_HV_X64_REGISTER_RIP,
617            value: hv_register_value {
618                reg64: rip + instruction_length,
619            },
620            ..Default::default()
621        }])?;
622        Ok(())
623    }
624
625    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
626    fn run(&mut self) -> Result<super::HyperlightExit> {
627        const HALT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_HALT;
628        const IO_PORT_INTERCEPT_MESSAGE: hv_message_type =
629            hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT;
630        const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA;
631        const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT;
632        #[cfg(gdb)]
633        const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT;
634
635        self.interrupt_handle
636            .tid
637            .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
638        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
639        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
640        self.interrupt_handle
641            .set_running_and_increment_generation()
642            .map_err(|e| {
643                new_error!(
644                    "Error setting running state and incrementing generation: {}",
645                    e
646                )
647            })?;
648        #[cfg(not(gdb))]
649        let debug_interrupt = false;
650        #[cfg(gdb)]
651        let debug_interrupt = self
652            .interrupt_handle
653            .debug_interrupt
654            .load(Ordering::Relaxed);
655
656        // Don't run the vcpu if `cancel_requested` is true
657        //
658        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
659        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
660        let exit_reason = if self
661            .interrupt_handle
662            .cancel_requested
663            .load(Ordering::Relaxed)
664            || debug_interrupt
665        {
666            Err(MshvError::Errno(vmm_sys_util::errno::Error::new(
667                libc::EINTR,
668            )))
669        } else {
670            // Note: if a `InterruptHandle::kill()` called while this thread is **here**
671            // Then the vcpu will run, but we will keep sending signals to this thread
672            // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
673            // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
674            // both of which are fine.
675            #[cfg(mshv2)]
676            {
677                let hv_message: hv_message = Default::default();
678                self.vcpu_fd.run(hv_message)
679            }
680            #[cfg(mshv3)]
681            self.vcpu_fd.run()
682        };
683        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
684        // Then signals will be sent to this thread until `running` is set to false.
685        // This is fine since the signal handler is a no-op.
686        let cancel_requested = self
687            .interrupt_handle
688            .cancel_requested
689            .load(Ordering::Relaxed);
690        #[cfg(gdb)]
691        let debug_interrupt = self
692            .interrupt_handle
693            .debug_interrupt
694            .load(Ordering::Relaxed);
695        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
696        // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
697        // Additionally signals will be sent to this thread until `running` is set to false.
698        // This is fine since the signal handler is a no-op.
699        self.interrupt_handle.clear_running_bit();
700        // At this point, `running` is false so no more signals will be sent to this thread,
701        // but we may still receive async signals that were sent before this point.
702        // To prevent those signals from interrupting subsequent calls to `run()`,
703        // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
704        let result = match exit_reason {
705            Ok(m) => match m.header.message_type {
706                HALT_MESSAGE => {
707                    crate::debug!("mshv - Halt Details : {:#?}", &self);
708                    HyperlightExit::Halt()
709                }
710                IO_PORT_INTERCEPT_MESSAGE => {
711                    let io_message = m.to_ioport_info()?;
712                    let port_number = io_message.port_number;
713                    let rip = io_message.header.rip;
714                    let rax = io_message.rax;
715                    let instruction_length = io_message.header.instruction_length() as u64;
716                    crate::debug!("mshv IO Details : \nPort : {}\n{:#?}", port_number, &self);
717                    HyperlightExit::IoOut(
718                        port_number,
719                        rax.to_le_bytes().to_vec(),
720                        rip,
721                        instruction_length,
722                    )
723                }
724                UNMAPPED_GPA_MESSAGE => {
725                    let mimo_message = m.to_memory_info()?;
726                    let addr = mimo_message.guest_physical_address;
727                    crate::debug!(
728                        "mshv MMIO unmapped GPA -Details: Address: {} \n {:#?}",
729                        addr,
730                        &self
731                    );
732                    HyperlightExit::Mmio(addr)
733                }
734                INVALID_GPA_ACCESS_MESSAGE => {
735                    let mimo_message = m.to_memory_info()?;
736                    let gpa = mimo_message.guest_physical_address;
737                    let access_info = MemoryRegionFlags::try_from(mimo_message)?;
738                    crate::debug!(
739                        "mshv MMIO invalid GPA access -Details: Address: {} \n {:#?}",
740                        gpa,
741                        &self
742                    );
743                    match self.get_memory_access_violation(
744                        gpa as usize,
745                        &self.mem_regions,
746                        access_info,
747                    ) {
748                        Some(access_info_violation) => access_info_violation,
749                        None => HyperlightExit::Mmio(gpa),
750                    }
751                }
752                // The only case an intercept exit is expected is when debugging is enabled
753                // and the intercepts are installed.
754                // Provide the extra information about the exception to accurately determine
755                // the stop reason
756                #[cfg(gdb)]
757                EXCEPTION_INTERCEPT => {
758                    // Extract exception info from the message so we can figure out
759                    // more information about the vCPU state
760                    let ex_info = match m.to_exception_info() {
761                        Ok(info) => info,
762                        Err(e) => {
763                            log_then_return!("Error converting to exception info: {:?}", e);
764                        }
765                    };
766
767                    match self.get_stop_reason(ex_info) {
768                        Ok(reason) => HyperlightExit::Debug(reason),
769                        Err(e) => {
770                            log_then_return!("Error getting stop reason: {:?}", e);
771                        }
772                    }
773                }
774                other => {
775                    crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self);
776                    log_then_return!("unknown Hyper-V run message type {:?}", other);
777                }
778            },
779            Err(e) => match e.errno() {
780                // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
781                libc::EINTR => {
782                    // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
783                    // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
784                    if cancel_requested {
785                        self.interrupt_handle
786                            .cancel_requested
787                            .store(false, Ordering::Relaxed);
788                        HyperlightExit::Cancelled()
789                    } else {
790                        #[cfg(gdb)]
791                        if debug_interrupt {
792                            self.interrupt_handle
793                                .debug_interrupt
794                                .store(false, Ordering::Relaxed);
795
796                            // If the vCPU was stopped because of an interrupt, we need to
797                            // return a special exit reason so that the gdb thread can handle it
798                            // and resume execution
799                            HyperlightExit::Debug(VcpuStopReason::Interrupt)
800                        } else {
801                            HyperlightExit::Retry()
802                        }
803
804                        #[cfg(not(gdb))]
805                        HyperlightExit::Retry()
806                    }
807                }
808                libc::EAGAIN => HyperlightExit::Retry(),
809                _ => {
810                    crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self);
811                    log_then_return!("Error running VCPU {:?}", e);
812                }
813            },
814        };
815        Ok(result)
816    }
817
818    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
819    fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor {
820        self as &mut dyn Hypervisor
821    }
822
823    fn interrupt_handle(&self) -> Arc<dyn InterruptHandle> {
824        self.interrupt_handle.clone()
825    }
826
827    #[cfg(crashdump)]
828    fn crashdump_context(&self) -> Result<Option<super::crashdump::CrashDumpContext>> {
829        if self.rt_cfg.guest_core_dump {
830            let mut regs = [0; 27];
831
832            let vcpu_regs = self.vcpu_fd.get_regs()?;
833            let sregs = self.vcpu_fd.get_sregs()?;
834            let xsave = self.vcpu_fd.get_xsave()?;
835
836            // Set up the registers for the crash dump
837            regs[0] = vcpu_regs.r15; // r15
838            regs[1] = vcpu_regs.r14; // r14
839            regs[2] = vcpu_regs.r13; // r13
840            regs[3] = vcpu_regs.r12; // r12
841            regs[4] = vcpu_regs.rbp; // rbp
842            regs[5] = vcpu_regs.rbx; // rbx
843            regs[6] = vcpu_regs.r11; // r11
844            regs[7] = vcpu_regs.r10; // r10
845            regs[8] = vcpu_regs.r9; // r9
846            regs[9] = vcpu_regs.r8; // r8
847            regs[10] = vcpu_regs.rax; // rax
848            regs[11] = vcpu_regs.rcx; // rcx
849            regs[12] = vcpu_regs.rdx; // rdx
850            regs[13] = vcpu_regs.rsi; // rsi
851            regs[14] = vcpu_regs.rdi; // rdi
852            regs[15] = 0; // orig rax
853            regs[16] = vcpu_regs.rip; // rip
854            regs[17] = sregs.cs.selector as u64; // cs
855            regs[18] = vcpu_regs.rflags; // eflags
856            regs[19] = vcpu_regs.rsp; // rsp
857            regs[20] = sregs.ss.selector as u64; // ss
858            regs[21] = sregs.fs.base; // fs_base
859            regs[22] = sregs.gs.base; // gs_base
860            regs[23] = sregs.ds.selector as u64; // ds
861            regs[24] = sregs.es.selector as u64; // es
862            regs[25] = sregs.fs.selector as u64; // fs
863            regs[26] = sregs.gs.selector as u64; // gs
864
865            // Get the filename from the binary path
866            let filename = self.rt_cfg.binary_path.clone().and_then(|path| {
867                Path::new(&path)
868                    .file_name()
869                    .and_then(|name| name.to_os_string().into_string().ok())
870            });
871
872            Ok(Some(crashdump::CrashDumpContext::new(
873                &self.mem_regions,
874                regs,
875                xsave.buffer.to_vec(),
876                self.entrypoint,
877                self.rt_cfg.binary_path.clone(),
878                filename,
879            )))
880        } else {
881            Ok(None)
882        }
883    }
884
885    #[cfg(gdb)]
886    fn handle_debug(
887        &mut self,
888        dbg_mem_access_fn: std::sync::Arc<
889            std::sync::Mutex<dyn super::handlers::DbgMemAccessHandlerCaller>,
890        >,
891        stop_reason: VcpuStopReason,
892    ) -> Result<()> {
893        if self.debug.is_none() {
894            return Err(new_error!("Debugging is not enabled"));
895        }
896
897        match stop_reason {
898            // If the vCPU stopped because of a crash, we need to handle it differently
899            // We do not want to allow resuming execution or placing breakpoints
900            // because the guest has crashed.
901            // We only allow reading registers and memory
902            VcpuStopReason::Crash => {
903                self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))
904                    .map_err(|e| {
905                        new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e)
906                    })?;
907
908                loop {
909                    log::debug!("Debug wait for event to resume vCPU");
910                    // Wait for a message from gdb
911                    let req = self.recv_dbg_msg()?;
912
913                    // Flag to store if we should deny continue or step requests
914                    let mut deny_continue = false;
915                    // Flag to store if we should detach from the gdb session
916                    let mut detach = false;
917
918                    let response = match req {
919                        // Allow the detach request to disable debugging by continuing resuming
920                        // hypervisor crash error reporting
921                        DebugMsg::DisableDebug => {
922                            detach = true;
923                            DebugResponse::DisableDebug
924                        }
925                        // Do not allow continue or step requests
926                        DebugMsg::Continue | DebugMsg::Step => {
927                            deny_continue = true;
928                            DebugResponse::NotAllowed
929                        }
930                        // Do not allow adding/removing breakpoints and writing to memory or registers
931                        DebugMsg::AddHwBreakpoint(_)
932                        | DebugMsg::AddSwBreakpoint(_)
933                        | DebugMsg::RemoveHwBreakpoint(_)
934                        | DebugMsg::RemoveSwBreakpoint(_)
935                        | DebugMsg::WriteAddr(_, _)
936                        | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed,
937
938                        // For all other requests, we will process them normally
939                        _ => {
940                            let result = self.process_dbg_request(req, dbg_mem_access_fn.clone());
941                            match result {
942                                Ok(response) => response,
943                                Err(HyperlightError::TranslateGuestAddress(_)) => {
944                                    // Treat non fatal errors separately so the guest doesn't fail
945                                    DebugResponse::ErrorOccurred
946                                }
947                                Err(e) => {
948                                    log::error!("Error processing debug request: {:?}", e);
949                                    return Err(e);
950                                }
951                            }
952                        }
953                    };
954
955                    // Send the response to the request back to gdb
956                    self.send_dbg_msg(response)
957                        .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?;
958
959                    // If we are denying continue or step requests, the debugger assumes the
960                    // execution started so we need to report a stop reason as a crash and let
961                    // it request to read registers/memory to figure out what happened
962                    if deny_continue {
963                        self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash))
964                            .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?;
965                    }
966
967                    // If we are detaching, we will break the loop and the Hypervisor will continue
968                    // to handle the Crash reason
969                    if detach {
970                        break;
971                    }
972                }
973            }
974            // If the vCPU stopped because of any other reason except a crash, we can handle it
975            // normally
976            _ => {
977                // Send the stop reason to the gdb thread
978                self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))
979                    .map_err(|e| {
980                        new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e)
981                    })?;
982
983                loop {
984                    log::debug!("Debug wait for event to resume vCPU");
985                    // Wait for a message from gdb
986                    let req = self.recv_dbg_msg()?;
987
988                    let result = self.process_dbg_request(req, dbg_mem_access_fn.clone());
989
990                    let response = match result {
991                        Ok(response) => response,
992                        // Treat non fatal errors separately so the guest doesn't fail
993                        Err(HyperlightError::TranslateGuestAddress(_)) => {
994                            DebugResponse::ErrorOccurred
995                        }
996                        Err(e) => {
997                            return Err(e);
998                        }
999                    };
1000
1001                    let cont = matches!(
1002                        response,
1003                        DebugResponse::Continue | DebugResponse::Step | DebugResponse::DisableDebug
1004                    );
1005
1006                    self.send_dbg_msg(response)
1007                        .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?;
1008
1009                    // Check if we should continue execution
1010                    // We continue if the response is one of the following: Step, Continue, or DisableDebug
1011                    if cont {
1012                        break;
1013                    }
1014                }
1015            }
1016        }
1017
1018        Ok(())
1019    }
1020}
1021
1022impl Drop for HypervLinuxDriver {
1023    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
1024    fn drop(&mut self) {
1025        self.interrupt_handle.dropped.store(true, Ordering::Relaxed);
1026        for region in &self.mem_regions {
1027            let mshv_region: mshv_user_mem_region = region.to_owned().into();
1028            match self.vm_fd.unmap_user_memory(mshv_region) {
1029                Ok(_) => (),
1030                Err(e) => error!("Failed to unmap user memory in HyperVOnLinux ({:?})", e),
1031            }
1032        }
1033    }
1034}
1035
1036#[cfg(test)]
1037mod tests {
1038    use super::*;
1039    use crate::mem::memory_region::MemoryRegionVecBuilder;
1040    use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory};
1041
1042    #[rustfmt::skip]
1043    const CODE: [u8; 12] = [
1044        0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
1045        0x00, 0xd8, /* add %bl, %al */
1046        0x04, b'0', /* add $'0', %al */
1047        0xee, /* out %al, (%dx) */
1048        /* send a 0 to indicate we're done */
1049        0xb0, b'\0', /* mov $'\0', %al */
1050        0xee, /* out %al, (%dx) */
1051        0xf4, /* HLT */
1052    ];
1053
1054    fn shared_mem_with_code(
1055        code: &[u8],
1056        mem_size: usize,
1057        load_offset: usize,
1058    ) -> Result<Box<ExclusiveSharedMemory>> {
1059        if load_offset > mem_size {
1060            log_then_return!(
1061                "code load offset ({}) > memory size ({})",
1062                load_offset,
1063                mem_size
1064            );
1065        }
1066        let mut shared_mem = ExclusiveSharedMemory::new(mem_size)?;
1067        shared_mem.copy_from_slice(code, load_offset)?;
1068        Ok(Box::new(shared_mem))
1069    }
1070
1071    #[test]
1072    fn create_driver() {
1073        if !super::is_hypervisor_present() {
1074            return;
1075        }
1076        const MEM_SIZE: usize = 0x3000;
1077        let gm = shared_mem_with_code(CODE.as_slice(), MEM_SIZE, 0).unwrap();
1078        let rsp_ptr = GuestPtr::try_from(0).unwrap();
1079        let pml4_ptr = GuestPtr::try_from(0).unwrap();
1080        let entrypoint_ptr = GuestPtr::try_from(0).unwrap();
1081        let mut regions = MemoryRegionVecBuilder::new(0, gm.base_addr());
1082        regions.push_page_aligned(
1083            MEM_SIZE,
1084            MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE,
1085            crate::mem::memory_region::MemoryRegionType::Code,
1086        );
1087        let config: SandboxConfiguration = Default::default();
1088
1089        super::HypervLinuxDriver::new(
1090            regions.build(),
1091            entrypoint_ptr,
1092            rsp_ptr,
1093            pml4_ptr,
1094            &config,
1095            #[cfg(gdb)]
1096            None,
1097            #[cfg(crashdump)]
1098            SandboxRuntimeConfig {
1099                #[cfg(crashdump)]
1100                binary_path: None,
1101                #[cfg(gdb)]
1102                debug_info: None,
1103                #[cfg(crashdump)]
1104                guest_core_dump: true,
1105            },
1106        )
1107        .unwrap();
1108    }
1109}