hyperlight_host/hypervisor/
hyperv_linux.rs

1/*
2Copyright 2024 The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17#[cfg(mshv2)]
18extern crate mshv_bindings2 as mshv_bindings;
19#[cfg(mshv2)]
20extern crate mshv_ioctls2 as mshv_ioctls;
21
22#[cfg(mshv3)]
23extern crate mshv_bindings3 as mshv_bindings;
24#[cfg(mshv3)]
25extern crate mshv_ioctls3 as mshv_ioctls;
26
27use std::fmt::{Debug, Formatter};
28
29use log::error;
30#[cfg(mshv2)]
31use mshv_bindings::hv_message;
32use mshv_bindings::{
33    hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA,
34    hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_register_assoc,
35    hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region,
36    FloatingPointUnit, SegmentRegister, SpecialRegisters, StandardRegisters,
37};
38#[cfg(mshv3)]
39use mshv_bindings::{
40    hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES,
41    hv_partition_synthetic_processor_features,
42};
43use mshv_ioctls::{Mshv, VcpuFd, VmFd};
44use tracing::{instrument, Span};
45
46use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT};
47#[cfg(gdb)]
48use super::handlers::DbgMemAccessHandlerWrapper;
49use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
50use super::{
51    Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR,
52    CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE,
53};
54use crate::hypervisor::hypervisor_handler::HypervisorHandler;
55use crate::hypervisor::HyperlightExit;
56use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
57use crate::mem::ptr::{GuestPtr, RawPtr};
58use crate::{log_then_return, new_error, Result};
59
60/// Determine whether the HyperV for Linux hypervisor API is present
61/// and functional.
62#[instrument(skip_all, parent = Span::current(), level = "Trace")]
63pub(crate) fn is_hypervisor_present() -> bool {
64    match Mshv::open_with_cloexec(true) {
65        Ok(fd) => {
66            unsafe {
67                libc::close(fd);
68            } // must explicitly close fd to avoid a leak
69            true
70        }
71        Err(e) => {
72            log::info!("Error creating MSHV object: {:?}", e);
73            false
74        }
75    }
76}
77
78/// A Hypervisor driver for HyperV-on-Linux. This hypervisor is often
79/// called the Microsoft Hypervisor (MSHV)
80pub(super) struct HypervLinuxDriver {
81    _mshv: Mshv,
82    vm_fd: VmFd,
83    vcpu_fd: VcpuFd,
84    entrypoint: u64,
85    mem_regions: Vec<MemoryRegion>,
86    orig_rsp: GuestPtr,
87}
88
89impl HypervLinuxDriver {
90    /// Create a new `HypervLinuxDriver`, complete with all registers
91    /// set up to execute a Hyperlight binary inside a HyperV-powered
92    /// sandbox on Linux.
93    ///
94    /// While registers are set up, they will not have been applied to
95    /// the underlying virtual CPU after this function returns. Call the
96    /// `apply_registers` method to do that, or more likely call
97    /// `initialise` to do it for you.
98    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
99    pub(super) fn new(
100        mem_regions: Vec<MemoryRegion>,
101        entrypoint_ptr: GuestPtr,
102        rsp_ptr: GuestPtr,
103        pml4_ptr: GuestPtr,
104    ) -> Result<Self> {
105        let mshv = Mshv::new()?;
106        let pr = Default::default();
107        #[cfg(mshv2)]
108        let vm_fd = mshv.create_vm_with_config(&pr)?;
109        #[cfg(mshv3)]
110        let vm_fd = {
111            // It's important to avoid create_vm() and explicitly use
112            // create_vm_with_args() with an empty arguments structure
113            // here, because otherwise the partition is set up with a SynIC.
114
115            let vm_fd = mshv.create_vm_with_args(&pr)?;
116            let features: hv_partition_synthetic_processor_features = Default::default();
117            vm_fd.hvcall_set_partition_property(
118                hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES,
119                unsafe { features.as_uint64[0] },
120            )?;
121            vm_fd.initialize()?;
122            vm_fd
123        };
124
125        let mut vcpu_fd = vm_fd.create_vcpu(0)?;
126
127        mem_regions.iter().try_for_each(|region| {
128            let mshv_region = region.to_owned().into();
129            vm_fd.map_user_memory(mshv_region)
130        })?;
131
132        Self::setup_initial_sregs(&mut vcpu_fd, pml4_ptr.absolute()?)?;
133
134        Ok(Self {
135            _mshv: mshv,
136            vm_fd,
137            vcpu_fd,
138            mem_regions,
139            entrypoint: entrypoint_ptr.absolute()?,
140            orig_rsp: rsp_ptr,
141        })
142    }
143
144    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
145    fn setup_initial_sregs(vcpu: &mut VcpuFd, pml4_addr: u64) -> Result<()> {
146        let sregs = SpecialRegisters {
147            cr0: CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP,
148            cr4: CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT,
149            cr3: pml4_addr,
150            efer: EFER_LME | EFER_LMA | EFER_SCE | EFER_NX,
151            cs: SegmentRegister {
152                type_: 11,
153                present: 1,
154                s: 1,
155                l: 1,
156                ..Default::default()
157            },
158            tr: SegmentRegister {
159                limit: 65535,
160                type_: 11,
161                present: 1,
162                ..Default::default()
163            },
164            ..Default::default()
165        };
166        vcpu.set_sregs(&sregs)?;
167        Ok(())
168    }
169}
170
171impl Debug for HypervLinuxDriver {
172    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
173        let mut f = f.debug_struct("Hyperv Linux Driver");
174
175        f.field("Entrypoint", &self.entrypoint)
176            .field("Original RSP", &self.orig_rsp);
177
178        for region in &self.mem_regions {
179            f.field("Memory Region", &region);
180        }
181
182        let regs = self.vcpu_fd.get_regs();
183
184        if let Ok(regs) = regs {
185            f.field("Registers", &regs);
186        }
187
188        let sregs = self.vcpu_fd.get_sregs();
189
190        if let Ok(sregs) = sregs {
191            f.field("Special Registers", &sregs);
192        }
193
194        f.finish()
195    }
196}
197
198impl Hypervisor for HypervLinuxDriver {
199    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
200    fn initialise(
201        &mut self,
202        peb_addr: RawPtr,
203        seed: u64,
204        page_size: u32,
205        outb_hdl: OutBHandlerWrapper,
206        mem_access_hdl: MemAccessHandlerWrapper,
207        hv_handler: Option<HypervisorHandler>,
208        #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
209    ) -> Result<()> {
210        let regs = StandardRegisters {
211            rip: self.entrypoint,
212            rsp: self.orig_rsp.absolute()?,
213            rflags: 2, //bit 1 of rlags is required to be set
214
215            // function args
216            rcx: peb_addr.into(),
217            rdx: seed,
218            r8: page_size.into(),
219            r9: self.get_max_log_level().into(),
220
221            ..Default::default()
222        };
223        self.vcpu_fd.set_regs(&regs)?;
224
225        VirtualCPU::run(
226            self.as_mut_hypervisor(),
227            hv_handler,
228            outb_hdl,
229            mem_access_hdl,
230            #[cfg(gdb)]
231            dbg_mem_access_fn,
232        )?;
233
234        // reset RSP to what it was before initialise
235        self.vcpu_fd.set_regs(&StandardRegisters {
236            rsp: self.orig_rsp.absolute()?,
237            rflags: 2, //bit 1 of rlags is required to be set
238            ..Default::default()
239        })?;
240        Ok(())
241    }
242
243    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
244    fn dispatch_call_from_host(
245        &mut self,
246        dispatch_func_addr: RawPtr,
247        outb_handle_fn: OutBHandlerWrapper,
248        mem_access_fn: MemAccessHandlerWrapper,
249        hv_handler: Option<HypervisorHandler>,
250        #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
251    ) -> Result<()> {
252        // Reset general purpose registers except RSP, then set RIP
253        let rsp_before = self.vcpu_fd.get_regs()?.rsp;
254        let regs = StandardRegisters {
255            rip: dispatch_func_addr.into(),
256            rsp: rsp_before,
257            rflags: 2, //bit 1 of rlags is required to be set
258            ..Default::default()
259        };
260        self.vcpu_fd.set_regs(&regs)?;
261
262        // reset fpu state
263        let fpu = FloatingPointUnit {
264            fcw: FP_CONTROL_WORD_DEFAULT,
265            ftwx: FP_TAG_WORD_DEFAULT,
266            mxcsr: MXCSR_DEFAULT,
267            ..Default::default() // zero out the rest
268        };
269        self.vcpu_fd.set_fpu(&fpu)?;
270
271        // run
272        VirtualCPU::run(
273            self.as_mut_hypervisor(),
274            hv_handler,
275            outb_handle_fn,
276            mem_access_fn,
277            #[cfg(gdb)]
278            dbg_mem_access_fn,
279        )?;
280
281        // reset RSP to what it was before function call
282        self.vcpu_fd.set_regs(&StandardRegisters {
283            rsp: rsp_before,
284            rflags: 2, //bit 1 of rlags is required to be set
285            ..Default::default()
286        })?;
287        Ok(())
288    }
289
290    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
291    fn handle_io(
292        &mut self,
293        port: u16,
294        data: Vec<u8>,
295        rip: u64,
296        instruction_length: u64,
297        outb_handle_fn: OutBHandlerWrapper,
298    ) -> Result<()> {
299        let payload = data[..8].try_into()?;
300        outb_handle_fn
301            .try_lock()
302            .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
303            .call(port, u64::from_le_bytes(payload))?;
304
305        // update rip
306        self.vcpu_fd.set_reg(&[hv_register_assoc {
307            name: hv_register_name_HV_X64_REGISTER_RIP,
308            value: hv_register_value {
309                reg64: rip + instruction_length,
310            },
311            ..Default::default()
312        }])?;
313        Ok(())
314    }
315
316    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
317    fn run(&mut self) -> Result<super::HyperlightExit> {
318        const HALT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_HALT;
319        const IO_PORT_INTERCEPT_MESSAGE: hv_message_type =
320            hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT;
321        const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA;
322        const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT;
323
324        #[cfg(mshv2)]
325        let run_result = {
326            let hv_message: hv_message = Default::default();
327            &self.vcpu_fd.run(hv_message)
328        };
329        #[cfg(mshv3)]
330        let run_result = &self.vcpu_fd.run();
331
332        let result = match run_result {
333            Ok(m) => match m.header.message_type {
334                HALT_MESSAGE => {
335                    crate::debug!("mshv - Halt Details : {:#?}", &self);
336                    HyperlightExit::Halt()
337                }
338                IO_PORT_INTERCEPT_MESSAGE => {
339                    let io_message = m.to_ioport_info()?;
340                    let port_number = io_message.port_number;
341                    let rip = io_message.header.rip;
342                    let rax = io_message.rax;
343                    let instruction_length = io_message.header.instruction_length() as u64;
344                    crate::debug!("mshv IO Details : \nPort : {}\n{:#?}", port_number, &self);
345                    HyperlightExit::IoOut(
346                        port_number,
347                        rax.to_le_bytes().to_vec(),
348                        rip,
349                        instruction_length,
350                    )
351                }
352                UNMAPPED_GPA_MESSAGE => {
353                    let mimo_message = m.to_memory_info()?;
354                    let addr = mimo_message.guest_physical_address;
355                    crate::debug!(
356                        "mshv MMIO unmapped GPA -Details: Address: {} \n {:#?}",
357                        addr,
358                        &self
359                    );
360                    HyperlightExit::Mmio(addr)
361                }
362                INVALID_GPA_ACCESS_MESSAGE => {
363                    let mimo_message = m.to_memory_info()?;
364                    let gpa = mimo_message.guest_physical_address;
365                    let access_info = MemoryRegionFlags::try_from(mimo_message)?;
366                    crate::debug!(
367                        "mshv MMIO invalid GPA access -Details: Address: {} \n {:#?}",
368                        gpa,
369                        &self
370                    );
371                    match self.get_memory_access_violation(
372                        gpa as usize,
373                        &self.mem_regions,
374                        access_info,
375                    ) {
376                        Some(access_info_violation) => access_info_violation,
377                        None => HyperlightExit::Mmio(gpa),
378                    }
379                }
380                other => {
381                    crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self);
382                    log_then_return!("unknown Hyper-V run message type {:?}", other);
383                }
384            },
385            Err(e) => match e.errno() {
386                // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
387                libc::EINTR => HyperlightExit::Cancelled(),
388                libc::EAGAIN => HyperlightExit::Retry(),
389                _ => {
390                    crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self);
391                    log_then_return!("Error running VCPU {:?}", e);
392                }
393            },
394        };
395        Ok(result)
396    }
397
398    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
399    fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor {
400        self as &mut dyn Hypervisor
401    }
402
403    #[cfg(crashdump)]
404    fn get_memory_regions(&self) -> &[MemoryRegion] {
405        &self.mem_regions
406    }
407}
408
409impl Drop for HypervLinuxDriver {
410    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
411    fn drop(&mut self) {
412        for region in &self.mem_regions {
413            let mshv_region: mshv_user_mem_region = region.to_owned().into();
414            match self.vm_fd.unmap_user_memory(mshv_region) {
415                Ok(_) => (),
416                Err(e) => error!("Failed to unmap user memory in HyperVOnLinux ({:?})", e),
417            }
418        }
419    }
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425    use crate::mem::memory_region::MemoryRegionVecBuilder;
426    use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory};
427
428    #[rustfmt::skip]
429    const CODE: [u8; 12] = [
430        0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
431        0x00, 0xd8, /* add %bl, %al */
432        0x04, b'0', /* add $'0', %al */
433        0xee, /* out %al, (%dx) */
434        /* send a 0 to indicate we're done */
435        0xb0, b'\0', /* mov $'\0', %al */
436        0xee, /* out %al, (%dx) */
437        0xf4, /* HLT */
438    ];
439
440    fn shared_mem_with_code(
441        code: &[u8],
442        mem_size: usize,
443        load_offset: usize,
444    ) -> Result<Box<ExclusiveSharedMemory>> {
445        if load_offset > mem_size {
446            log_then_return!(
447                "code load offset ({}) > memory size ({})",
448                load_offset,
449                mem_size
450            );
451        }
452        let mut shared_mem = ExclusiveSharedMemory::new(mem_size)?;
453        shared_mem.copy_from_slice(code, load_offset)?;
454        Ok(Box::new(shared_mem))
455    }
456
457    #[test]
458    fn create_driver() {
459        if !super::is_hypervisor_present() {
460            return;
461        }
462        const MEM_SIZE: usize = 0x3000;
463        let gm = shared_mem_with_code(CODE.as_slice(), MEM_SIZE, 0).unwrap();
464        let rsp_ptr = GuestPtr::try_from(0).unwrap();
465        let pml4_ptr = GuestPtr::try_from(0).unwrap();
466        let entrypoint_ptr = GuestPtr::try_from(0).unwrap();
467        let mut regions = MemoryRegionVecBuilder::new(0, gm.base_addr());
468        regions.push_page_aligned(
469            MEM_SIZE,
470            MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE,
471            crate::mem::memory_region::MemoryRegionType::Code,
472        );
473        super::HypervLinuxDriver::new(regions.build(), entrypoint_ptr, rsp_ptr, pml4_ptr).unwrap();
474    }
475}