hyperlight_host/hypervisor/
kvm.rs

1/*
2Copyright 2024 The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17use std::convert::TryFrom;
18use std::fmt::Debug;
19#[cfg(gdb)]
20use std::sync::{Arc, Mutex};
21
22use kvm_bindings::{kvm_fpu, kvm_regs, kvm_userspace_memory_region, KVM_MEM_READONLY};
23use kvm_ioctls::Cap::UserMemory;
24use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
25use tracing::{instrument, Span};
26
27use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT};
28#[cfg(gdb)]
29use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, VcpuStopReason};
30#[cfg(gdb)]
31use super::handlers::DbgMemAccessHandlerWrapper;
32use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
33use super::{
34    HyperlightExit, Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP,
35    CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE,
36};
37use crate::hypervisor::hypervisor_handler::HypervisorHandler;
38use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
39use crate::mem::ptr::{GuestPtr, RawPtr};
40#[cfg(gdb)]
41use crate::HyperlightError;
42use crate::{log_then_return, new_error, Result};
43
44/// Return `true` if the KVM API is available, version 12, and has UserMemory capability, or `false` otherwise
45#[instrument(skip_all, parent = Span::current(), level = "Trace")]
46pub(crate) fn is_hypervisor_present() -> bool {
47    if let Ok(kvm) = Kvm::new() {
48        let api_version = kvm.get_api_version();
49        match api_version {
50            version if version == 12 && kvm.check_extension(UserMemory) => true,
51            12 => {
52                log::info!("KVM does not have KVM_CAP_USER_MEMORY capability");
53                false
54            }
55            version => {
56                log::info!("KVM GET_API_VERSION returned {}, expected 12", version);
57                false
58            }
59        }
60    } else {
61        log::info!("Error creating KVM object");
62        false
63    }
64}
65
66#[cfg(gdb)]
67mod debug {
68    use std::collections::HashMap;
69    use std::sync::{Arc, Mutex};
70
71    use hyperlight_common::mem::PAGE_SIZE;
72    use kvm_bindings::{
73        kvm_guest_debug, kvm_regs, KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP,
74        KVM_GUESTDBG_USE_HW_BP, KVM_GUESTDBG_USE_SW_BP,
75    };
76    use kvm_ioctls::VcpuFd;
77
78    use super::KVMDriver;
79    use crate::hypervisor::gdb::{DebugMsg, DebugResponse, VcpuStopReason, X86_64Regs};
80    use crate::hypervisor::handlers::DbgMemAccessHandlerCaller;
81    use crate::mem::layout::SandboxMemoryLayout;
82    use crate::{new_error, HyperlightError, Result};
83
84    /// Software Breakpoint size in memory
85    pub const SW_BP_SIZE: usize = 1;
86    /// Software Breakpoint opcode
87    const SW_BP_OP: u8 = 0xCC;
88    /// Software Breakpoint written to memory
89    pub const SW_BP: [u8; SW_BP_SIZE] = [SW_BP_OP];
90
91    /// KVM Debug struct
92    /// This struct is used to abstract the internal details of the kvm
93    /// guest debugging settings
94    #[derive(Default)]
95    pub struct KvmDebug {
96        /// vCPU stepping state
97        single_step: bool,
98
99        /// Array of addresses for HW breakpoints
100        hw_breakpoints: Vec<u64>,
101        /// Saves the bytes modified to enable SW breakpoints
102        sw_breakpoints: HashMap<u64, [u8; SW_BP_SIZE]>,
103
104        /// Sent to KVM for enabling guest debug
105        pub dbg_cfg: kvm_guest_debug,
106    }
107
108    impl KvmDebug {
109        const MAX_NO_OF_HW_BP: usize = 4;
110
111        pub fn new() -> Self {
112            let dbg = kvm_guest_debug {
113                control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP,
114                ..Default::default()
115            };
116
117            Self {
118                single_step: false,
119                hw_breakpoints: vec![],
120                sw_breakpoints: HashMap::new(),
121                dbg_cfg: dbg,
122            }
123        }
124
125        /// This method sets the kvm debugreg fields to enable breakpoints at
126        /// specific addresses
127        ///
128        /// The first 4 debug registers are used to set the addresses
129        /// The 4th and 5th debug registers are obsolete and not used
130        /// The 7th debug register is used to enable the breakpoints
131        /// For more information see: DEBUG REGISTERS chapter in the architecture
132        /// manual
133        fn set_debug_config(&mut self, vcpu_fd: &VcpuFd, step: bool) -> Result<()> {
134            let addrs = &self.hw_breakpoints;
135
136            self.dbg_cfg.arch.debugreg = [0; 8];
137            for (k, addr) in addrs.iter().enumerate() {
138                self.dbg_cfg.arch.debugreg[k] = *addr;
139                self.dbg_cfg.arch.debugreg[7] |= 1 << (k * 2);
140            }
141
142            if !addrs.is_empty() {
143                self.dbg_cfg.control |= KVM_GUESTDBG_USE_HW_BP;
144            } else {
145                self.dbg_cfg.control &= !KVM_GUESTDBG_USE_HW_BP;
146            }
147
148            if step {
149                self.dbg_cfg.control |= KVM_GUESTDBG_SINGLESTEP;
150            } else {
151                self.dbg_cfg.control &= !KVM_GUESTDBG_SINGLESTEP;
152            }
153
154            log::debug!("Setting bp: {:?} cfg: {:?}", addrs, self.dbg_cfg);
155            vcpu_fd
156                .set_guest_debug(&self.dbg_cfg)
157                .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?;
158
159            self.single_step = step;
160
161            Ok(())
162        }
163
164        /// Method that adds a breakpoint
165        fn add_breakpoint(&mut self, vcpu_fd: &VcpuFd, addr: u64) -> Result<bool> {
166            if self.hw_breakpoints.len() >= Self::MAX_NO_OF_HW_BP {
167                Ok(false)
168            } else if self.hw_breakpoints.contains(&addr) {
169                Ok(true)
170            } else {
171                self.hw_breakpoints.push(addr);
172                self.set_debug_config(vcpu_fd, self.single_step)?;
173
174                Ok(true)
175            }
176        }
177
178        /// Method that removes a breakpoint
179        fn remove_breakpoint(&mut self, vcpu_fd: &VcpuFd, addr: u64) -> Result<bool> {
180            if self.hw_breakpoints.contains(&addr) {
181                self.hw_breakpoints.retain(|&a| a != addr);
182                self.set_debug_config(vcpu_fd, self.single_step)?;
183
184                Ok(true)
185            } else {
186                Ok(false)
187            }
188        }
189    }
190
191    impl KVMDriver {
192        /// Resets the debug information to disable debugging
193        fn disable_debug(&mut self) -> Result<()> {
194            self.debug = Some(KvmDebug::default());
195
196            self.set_single_step(false)
197        }
198
199        /// Returns the instruction pointer from the stopped vCPU
200        fn get_instruction_pointer(&self) -> Result<u64> {
201            let regs = self
202                .vcpu_fd
203                .get_regs()
204                .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?;
205
206            Ok(regs.rip)
207        }
208
209        /// Sets or clears stepping for vCPU
210        fn set_single_step(&mut self, enable: bool) -> Result<()> {
211            let debug = self
212                .debug
213                .as_mut()
214                .ok_or_else(|| new_error!("Debug is not enabled"))?;
215
216            debug.set_debug_config(&self.vcpu_fd, enable)
217        }
218
219        /// Translates the guest address to physical address
220        fn translate_gva(&self, gva: u64) -> Result<u64> {
221            let tr = self
222                .vcpu_fd
223                .translate_gva(gva)
224                .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?;
225
226            if tr.valid == 0 {
227                Err(HyperlightError::TranslateGuestAddress(gva))
228            } else {
229                Ok(tr.physical_address)
230            }
231        }
232
233        fn read_addrs(
234            &mut self,
235            mut gva: u64,
236            mut data: &mut [u8],
237            dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
238        ) -> Result<()> {
239            let data_len = data.len();
240            log::debug!("Read addr: {:X} len: {:X}", gva, data_len);
241
242            while !data.is_empty() {
243                let gpa = self.translate_gva(gva)?;
244
245                let read_len = std::cmp::min(
246                    data.len(),
247                    (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(),
248                );
249                let offset = gpa as usize - SandboxMemoryLayout::BASE_ADDRESS;
250
251                dbg_mem_access_fn
252                    .try_lock()
253                    .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
254                    .read(offset, &mut data[..read_len])?;
255
256                data = &mut data[read_len..];
257                gva += read_len as u64;
258            }
259
260            Ok(())
261        }
262
263        fn write_addrs(
264            &mut self,
265            mut gva: u64,
266            mut data: &[u8],
267            dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
268        ) -> Result<()> {
269            let data_len = data.len();
270            log::debug!("Write addr: {:X} len: {:X}", gva, data_len);
271
272            while !data.is_empty() {
273                let gpa = self.translate_gva(gva)?;
274
275                let write_len = std::cmp::min(
276                    data.len(),
277                    (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(),
278                );
279                let offset = gpa as usize - SandboxMemoryLayout::BASE_ADDRESS;
280
281                dbg_mem_access_fn
282                    .try_lock()
283                    .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
284                    .write(offset, data)?;
285
286                data = &data[write_len..];
287                gva += write_len as u64;
288            }
289
290            Ok(())
291        }
292
293        fn read_regs(&self, regs: &mut X86_64Regs) -> Result<()> {
294            log::debug!("Read registers");
295            let vcpu_regs = self
296                .vcpu_fd
297                .get_regs()
298                .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?;
299
300            regs.rax = vcpu_regs.rax;
301            regs.rbx = vcpu_regs.rbx;
302            regs.rcx = vcpu_regs.rcx;
303            regs.rdx = vcpu_regs.rdx;
304            regs.rsi = vcpu_regs.rsi;
305            regs.rdi = vcpu_regs.rdi;
306            regs.rbp = vcpu_regs.rbp;
307            regs.rsp = vcpu_regs.rsp;
308            regs.r8 = vcpu_regs.r8;
309            regs.r9 = vcpu_regs.r9;
310            regs.r10 = vcpu_regs.r10;
311            regs.r11 = vcpu_regs.r11;
312            regs.r12 = vcpu_regs.r12;
313            regs.r13 = vcpu_regs.r13;
314            regs.r14 = vcpu_regs.r14;
315            regs.r15 = vcpu_regs.r15;
316
317            regs.rip = vcpu_regs.rip;
318            regs.rflags = vcpu_regs.rflags;
319
320            Ok(())
321        }
322
323        fn write_regs(&self, regs: &X86_64Regs) -> Result<()> {
324            log::debug!("Write registers");
325            let new_regs = kvm_regs {
326                rax: regs.rax,
327                rbx: regs.rbx,
328                rcx: regs.rcx,
329                rdx: regs.rdx,
330                rsi: regs.rsi,
331                rdi: regs.rdi,
332                rbp: regs.rbp,
333                rsp: regs.rsp,
334                r8: regs.r8,
335                r9: regs.r9,
336                r10: regs.r10,
337                r11: regs.r11,
338                r12: regs.r12,
339                r13: regs.r13,
340                r14: regs.r14,
341                r15: regs.r15,
342
343                rip: regs.rip,
344                rflags: regs.rflags,
345            };
346
347            self.vcpu_fd
348                .set_regs(&new_regs)
349                .map_err(|e| new_error!("Could not write guest registers: {:?}", e))
350        }
351
352        fn add_hw_breakpoint(&mut self, addr: u64) -> Result<bool> {
353            let addr = self.translate_gva(addr)?;
354
355            if let Some(debug) = self.debug.as_mut() {
356                debug.add_breakpoint(&self.vcpu_fd, addr)
357            } else {
358                Ok(false)
359            }
360        }
361
362        fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<bool> {
363            let addr = self.translate_gva(addr)?;
364
365            if let Some(debug) = self.debug.as_mut() {
366                debug.remove_breakpoint(&self.vcpu_fd, addr)
367            } else {
368                Ok(false)
369            }
370        }
371
372        fn add_sw_breakpoint(
373            &mut self,
374            addr: u64,
375            dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
376        ) -> Result<bool> {
377            let addr = {
378                let debug = self
379                    .debug
380                    .as_ref()
381                    .ok_or_else(|| new_error!("Debug is not enabled"))?;
382                let addr = self.translate_gva(addr)?;
383                if debug.sw_breakpoints.contains_key(&addr) {
384                    return Ok(true);
385                }
386
387                addr
388            };
389
390            let mut save_data = [0; SW_BP_SIZE];
391            self.read_addrs(addr, &mut save_data[..], dbg_mem_access_fn.clone())?;
392            self.write_addrs(addr, &SW_BP, dbg_mem_access_fn)?;
393
394            {
395                let debug = self
396                    .debug
397                    .as_mut()
398                    .ok_or_else(|| new_error!("Debug is not enabled"))?;
399                debug.sw_breakpoints.insert(addr, save_data);
400            }
401
402            Ok(true)
403        }
404
405        fn remove_sw_breakpoint(
406            &mut self,
407            addr: u64,
408            dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
409        ) -> Result<bool> {
410            let (ret, data) = {
411                let addr = self.translate_gva(addr)?;
412                let debug = self
413                    .debug
414                    .as_mut()
415                    .ok_or_else(|| new_error!("Debug is not enabled"))?;
416
417                if debug.sw_breakpoints.contains_key(&addr) {
418                    let save_data = debug
419                        .sw_breakpoints
420                        .remove(&addr)
421                        .ok_or_else(|| new_error!("Expected the hashmap to contain the address"))?;
422
423                    (true, Some(save_data))
424                } else {
425                    (false, None)
426                }
427            };
428
429            if ret {
430                self.write_addrs(addr, &data.unwrap(), dbg_mem_access_fn)?;
431            }
432
433            Ok(ret)
434        }
435
436        /// Gdb expects the target to be stopped when connected.
437        /// This method provides a way to set a breakpoint at the entry point
438        /// it does not keep this breakpoint set after the vCPU already stopped at the address
439        pub fn set_entrypoint_bp(&self) -> Result<()> {
440            if self.debug.is_some() {
441                log::debug!("Setting entrypoint bp {:X}", self.entrypoint);
442                let mut entrypoint_debug = KvmDebug::new();
443                entrypoint_debug.add_breakpoint(&self.vcpu_fd, self.entrypoint)?;
444
445                Ok(())
446            } else {
447                Ok(())
448            }
449        }
450
451        /// Get the reason the vCPU has stopped
452        pub fn get_stop_reason(&self) -> Result<VcpuStopReason> {
453            let debug = self
454                .debug
455                .as_ref()
456                .ok_or_else(|| new_error!("Debug is not enabled"))?;
457
458            if debug.single_step {
459                return Ok(VcpuStopReason::DoneStep);
460            }
461
462            let ip = self.get_instruction_pointer()?;
463            let gpa = self.translate_gva(ip)?;
464            if debug.sw_breakpoints.contains_key(&gpa) {
465                return Ok(VcpuStopReason::SwBp);
466            }
467
468            if debug.hw_breakpoints.contains(&gpa) {
469                return Ok(VcpuStopReason::HwBp);
470            }
471
472            if ip == self.entrypoint {
473                return Ok(VcpuStopReason::HwBp);
474            }
475
476            Ok(VcpuStopReason::Unknown)
477        }
478
479        pub fn process_dbg_request(
480            &mut self,
481            req: DebugMsg,
482            dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
483        ) -> Result<DebugResponse> {
484            match req {
485                DebugMsg::AddHwBreakpoint(addr) => self
486                    .add_hw_breakpoint(addr)
487                    .map(DebugResponse::AddHwBreakpoint),
488                DebugMsg::AddSwBreakpoint(addr) => self
489                    .add_sw_breakpoint(addr, dbg_mem_access_fn)
490                    .map(DebugResponse::AddSwBreakpoint),
491                DebugMsg::Continue => {
492                    self.set_single_step(false)?;
493                    Ok(DebugResponse::Continue)
494                }
495                DebugMsg::DisableDebug => {
496                    self.disable_debug()?;
497
498                    Ok(DebugResponse::DisableDebug)
499                }
500                DebugMsg::GetCodeSectionOffset => {
501                    let offset = dbg_mem_access_fn
502                        .try_lock()
503                        .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
504                        .get_code_offset()?;
505
506                    Ok(DebugResponse::GetCodeSectionOffset(offset as u64))
507                }
508                DebugMsg::ReadAddr(addr, len) => {
509                    let mut data = vec![0u8; len];
510
511                    self.read_addrs(addr, &mut data, dbg_mem_access_fn)?;
512
513                    Ok(DebugResponse::ReadAddr(data))
514                }
515                DebugMsg::ReadRegisters => {
516                    let mut regs = X86_64Regs::default();
517
518                    self.read_regs(&mut regs)
519                        .map(|_| DebugResponse::ReadRegisters(regs))
520                }
521                DebugMsg::RemoveHwBreakpoint(addr) => self
522                    .remove_hw_breakpoint(addr)
523                    .map(DebugResponse::RemoveHwBreakpoint),
524                DebugMsg::RemoveSwBreakpoint(addr) => self
525                    .remove_sw_breakpoint(addr, dbg_mem_access_fn)
526                    .map(DebugResponse::RemoveSwBreakpoint),
527                DebugMsg::Step => {
528                    self.set_single_step(true)?;
529                    Ok(DebugResponse::Step)
530                }
531                DebugMsg::WriteAddr(addr, data) => {
532                    self.write_addrs(addr, &data, dbg_mem_access_fn)?;
533
534                    Ok(DebugResponse::WriteAddr)
535                }
536                DebugMsg::WriteRegisters(regs) => self
537                    .write_regs(&regs)
538                    .map(|_| DebugResponse::WriteRegisters),
539            }
540        }
541
542        pub fn recv_dbg_msg(&mut self) -> Result<DebugMsg> {
543            let gdb_conn = self
544                .gdb_conn
545                .as_mut()
546                .ok_or_else(|| new_error!("Debug is not enabled"))?;
547
548            gdb_conn.recv().map_err(|e| {
549                new_error!(
550                    "Got an error while waiting to receive a message from the gdb thread: {:?}",
551                    e
552                )
553            })
554        }
555
556        pub fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> {
557            log::debug!("Sending {:?}", cmd);
558
559            let gdb_conn = self
560                .gdb_conn
561                .as_mut()
562                .ok_or_else(|| new_error!("Debug is not enabled"))?;
563
564            gdb_conn.send(cmd).map_err(|e| {
565                new_error!(
566                    "Got an error while sending a response message to the gdb thread: {:?}",
567                    e
568                )
569            })
570        }
571    }
572}
573
574/// A Hypervisor driver for KVM on Linux
575pub(super) struct KVMDriver {
576    _kvm: Kvm,
577    _vm_fd: VmFd,
578    vcpu_fd: VcpuFd,
579    entrypoint: u64,
580    orig_rsp: GuestPtr,
581    mem_regions: Vec<MemoryRegion>,
582
583    #[cfg(gdb)]
584    debug: Option<debug::KvmDebug>,
585    #[cfg(gdb)]
586    gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
587}
588
589impl KVMDriver {
590    /// Create a new instance of a `KVMDriver`, with only control registers
591    /// set. Standard registers will not be set, and `initialise` must
592    /// be called to do so.
593    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
594    pub(super) fn new(
595        mem_regions: Vec<MemoryRegion>,
596        pml4_addr: u64,
597        entrypoint: u64,
598        rsp: u64,
599        #[cfg(gdb)] gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
600    ) -> Result<Self> {
601        let kvm = Kvm::new()?;
602
603        let vm_fd = kvm.create_vm_with_type(0)?;
604
605        let perm_flags =
606            MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE;
607
608        mem_regions.iter().enumerate().try_for_each(|(i, region)| {
609            let perm_flags = perm_flags.intersection(region.flags);
610            let kvm_region = kvm_userspace_memory_region {
611                slot: i as u32,
612                guest_phys_addr: region.guest_region.start as u64,
613                memory_size: (region.guest_region.end - region.guest_region.start) as u64,
614                userspace_addr: region.host_region.start as u64,
615                flags: match perm_flags {
616                    MemoryRegionFlags::READ => KVM_MEM_READONLY,
617                    _ => 0, // normal, RWX
618                },
619            };
620            unsafe { vm_fd.set_user_memory_region(kvm_region) }
621        })?;
622
623        let mut vcpu_fd = vm_fd.create_vcpu(0)?;
624        Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?;
625
626        #[cfg(gdb)]
627        let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn {
628            (Some(debug::KvmDebug::new()), Some(gdb_conn))
629        } else {
630            (None, None)
631        };
632
633        let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?;
634
635        let ret = Self {
636            _kvm: kvm,
637            _vm_fd: vm_fd,
638            vcpu_fd,
639            entrypoint,
640            orig_rsp: rsp_gp,
641            mem_regions,
642
643            #[cfg(gdb)]
644            debug,
645            #[cfg(gdb)]
646            gdb_conn,
647        };
648
649        #[cfg(gdb)]
650        ret.set_entrypoint_bp()?;
651
652        Ok(ret)
653    }
654
655    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
656    fn setup_initial_sregs(vcpu_fd: &mut VcpuFd, pml4_addr: u64) -> Result<()> {
657        // setup paging and IA-32e (64-bit) mode
658        let mut sregs = vcpu_fd.get_sregs()?;
659        sregs.cr3 = pml4_addr;
660        sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
661        sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP;
662        sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX;
663        sregs.cs.l = 1; // required for 64-bit mode
664        vcpu_fd.set_sregs(&sregs)?;
665        Ok(())
666    }
667}
668
669impl Debug for KVMDriver {
670    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
671        let mut f = f.debug_struct("KVM Driver");
672        // Output each memory region
673
674        for region in &self.mem_regions {
675            f.field("Memory Region", &region);
676        }
677        let regs = self.vcpu_fd.get_regs();
678        // check that regs is OK and then set field in debug struct
679
680        if let Ok(regs) = regs {
681            f.field("Registers", &regs);
682        }
683
684        let sregs = self.vcpu_fd.get_sregs();
685
686        // check that sregs is OK and then set field in debug struct
687
688        if let Ok(sregs) = sregs {
689            f.field("Special Registers", &sregs);
690        }
691
692        f.finish()
693    }
694}
695
696impl Hypervisor for KVMDriver {
697    /// Implementation of initialise for Hypervisor trait.
698    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
699    fn initialise(
700        &mut self,
701        peb_addr: RawPtr,
702        seed: u64,
703        page_size: u32,
704        outb_hdl: OutBHandlerWrapper,
705        mem_access_hdl: MemAccessHandlerWrapper,
706        hv_handler: Option<HypervisorHandler>,
707        #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
708    ) -> Result<()> {
709        let regs = kvm_regs {
710            rip: self.entrypoint,
711            rsp: self.orig_rsp.absolute()?,
712
713            // function args
714            rcx: peb_addr.into(),
715            rdx: seed,
716            r8: page_size.into(),
717            r9: self.get_max_log_level().into(),
718
719            ..Default::default()
720        };
721        self.vcpu_fd.set_regs(&regs)?;
722
723        VirtualCPU::run(
724            self.as_mut_hypervisor(),
725            hv_handler,
726            outb_hdl,
727            mem_access_hdl,
728            #[cfg(gdb)]
729            dbg_mem_access_fn,
730        )?;
731
732        // reset RSP to what it was before initialise
733        self.vcpu_fd.set_regs(&kvm_regs {
734            rsp: self.orig_rsp.absolute()?,
735            ..Default::default()
736        })?;
737        Ok(())
738    }
739
740    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
741    fn dispatch_call_from_host(
742        &mut self,
743        dispatch_func_addr: RawPtr,
744        outb_handle_fn: OutBHandlerWrapper,
745        mem_access_fn: MemAccessHandlerWrapper,
746        hv_handler: Option<HypervisorHandler>,
747        #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
748    ) -> Result<()> {
749        // Reset general purpose registers except RSP, then set RIP
750        let rsp_before = self.vcpu_fd.get_regs()?.rsp;
751        let regs = kvm_regs {
752            rip: dispatch_func_addr.into(),
753            rsp: rsp_before,
754            ..Default::default()
755        };
756        self.vcpu_fd.set_regs(&regs)?;
757
758        // reset fpu state
759        let fpu = kvm_fpu {
760            fcw: FP_CONTROL_WORD_DEFAULT,
761            ftwx: FP_TAG_WORD_DEFAULT,
762            mxcsr: MXCSR_DEFAULT,
763            ..Default::default() // zero out the rest
764        };
765        self.vcpu_fd.set_fpu(&fpu)?;
766
767        // run
768        VirtualCPU::run(
769            self.as_mut_hypervisor(),
770            hv_handler,
771            outb_handle_fn,
772            mem_access_fn,
773            #[cfg(gdb)]
774            dbg_mem_access_fn,
775        )?;
776
777        // reset RSP to what it was before function call
778        self.vcpu_fd.set_regs(&kvm_regs {
779            rsp: rsp_before,
780            ..Default::default()
781        })?;
782        Ok(())
783    }
784
785    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
786    fn handle_io(
787        &mut self,
788        port: u16,
789        data: Vec<u8>,
790        _rip: u64,
791        _instruction_length: u64,
792        outb_handle_fn: OutBHandlerWrapper,
793    ) -> Result<()> {
794        // KVM does not need RIP or instruction length, as it automatically sets the RIP
795
796        // The payload param for the outb_handle_fn is the first byte
797        // of the data array cast to an u64. Thus, we need to make sure
798        // the data array has at least one u8, then convert that to an u64
799        if data.is_empty() {
800            log_then_return!("no data was given in IO interrupt");
801        } else {
802            let payload_u64 = u64::from(data[0]);
803            outb_handle_fn
804                .try_lock()
805                .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
806                .call(port, payload_u64)?;
807        }
808
809        Ok(())
810    }
811
812    #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
813    fn run(&mut self) -> Result<HyperlightExit> {
814        let exit_reason = self.vcpu_fd.run();
815        let result = match exit_reason {
816            Ok(VcpuExit::Hlt) => {
817                crate::debug!("KVM - Halt Details : {:#?}", &self);
818                HyperlightExit::Halt()
819            }
820            Ok(VcpuExit::IoOut(port, data)) => {
821                // because vcpufd.run() mutably borrows self we cannot pass self to crate::debug! macro here
822                crate::debug!("KVM IO Details : \nPort : {}\nData : {:?}", port, data);
823                // KVM does not need to set RIP or instruction length so these are set to 0
824                HyperlightExit::IoOut(port, data.to_vec(), 0, 0)
825            }
826            Ok(VcpuExit::MmioRead(addr, _)) => {
827                crate::debug!("KVM MMIO Read -Details: Address: {} \n {:#?}", addr, &self);
828
829                match self.get_memory_access_violation(
830                    addr as usize,
831                    &self.mem_regions,
832                    MemoryRegionFlags::READ,
833                ) {
834                    Some(access_violation_exit) => access_violation_exit,
835                    None => HyperlightExit::Mmio(addr),
836                }
837            }
838            Ok(VcpuExit::MmioWrite(addr, _)) => {
839                crate::debug!("KVM MMIO Write -Details: Address: {} \n {:#?}", addr, &self);
840
841                match self.get_memory_access_violation(
842                    addr as usize,
843                    &self.mem_regions,
844                    MemoryRegionFlags::WRITE,
845                ) {
846                    Some(access_violation_exit) => access_violation_exit,
847                    None => HyperlightExit::Mmio(addr),
848                }
849            }
850            #[cfg(gdb)]
851            Ok(VcpuExit::Debug(_)) => match self.get_stop_reason() {
852                Ok(reason) => HyperlightExit::Debug(reason),
853                Err(e) => {
854                    log_then_return!("Error getting stop reason: {:?}", e);
855                }
856            },
857            Err(e) => match e.errno() {
858                // In case of the gdb feature, the timeout is not enabled, this
859                // exit is because of a signal sent from the gdb thread to the
860                // hypervisor thread to cancel execution
861                #[cfg(gdb)]
862                libc::EINTR => HyperlightExit::Debug(VcpuStopReason::Interrupt),
863                // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
864                #[cfg(not(gdb))]
865                libc::EINTR => HyperlightExit::Cancelled(),
866                libc::EAGAIN => HyperlightExit::Retry(),
867                _ => {
868                    crate::debug!("KVM Error -Details: Address: {} \n {:#?}", e, &self);
869                    log_then_return!("Error running VCPU {:?}", e);
870                }
871            },
872            Ok(other) => {
873                crate::debug!("KVM Other Exit {:?}", other);
874                HyperlightExit::Unknown(format!("Unexpected KVM Exit {:?}", other))
875            }
876        };
877        Ok(result)
878    }
879
880    #[instrument(skip_all, parent = Span::current(), level = "Trace")]
881    fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor {
882        self as &mut dyn Hypervisor
883    }
884
885    #[cfg(crashdump)]
886    fn get_memory_regions(&self) -> &[MemoryRegion] {
887        &self.mem_regions
888    }
889
890    #[cfg(gdb)]
891    fn handle_debug(
892        &mut self,
893        dbg_mem_access_fn: Arc<Mutex<dyn super::handlers::DbgMemAccessHandlerCaller>>,
894        stop_reason: VcpuStopReason,
895    ) -> Result<()> {
896        self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))
897            .map_err(|e| new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e))?;
898
899        loop {
900            log::debug!("Debug wait for event to resume vCPU");
901            // Wait for a message from gdb
902            let req = self.recv_dbg_msg()?;
903
904            let result = self.process_dbg_request(req, dbg_mem_access_fn.clone());
905
906            let response = match result {
907                Ok(response) => response,
908                // Treat non fatal errors separately so the guest doesn't fail
909                Err(HyperlightError::TranslateGuestAddress(_)) => DebugResponse::ErrorOccurred,
910                Err(e) => {
911                    return Err(e);
912                }
913            };
914
915            // If the command was either step or continue, we need to run the vcpu
916            let cont = matches!(
917                response,
918                DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug
919            );
920
921            self.send_dbg_msg(response)
922                .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?;
923
924            if cont {
925                break;
926            }
927        }
928
929        Ok(())
930    }
931}
932
933#[cfg(test)]
934mod tests {
935    use std::sync::{Arc, Mutex};
936
937    #[cfg(gdb)]
938    use crate::hypervisor::handlers::DbgMemAccessHandlerCaller;
939    use crate::hypervisor::handlers::{MemAccessHandler, OutBHandler};
940    use crate::hypervisor::tests::test_initialise;
941    use crate::Result;
942
943    #[cfg(gdb)]
944    struct DbgMemAccessHandler {}
945
946    #[cfg(gdb)]
947    impl DbgMemAccessHandlerCaller for DbgMemAccessHandler {
948        fn read(&mut self, _offset: usize, _data: &mut [u8]) -> Result<()> {
949            Ok(())
950        }
951
952        fn write(&mut self, _offset: usize, _data: &[u8]) -> Result<()> {
953            Ok(())
954        }
955
956        fn get_code_offset(&mut self) -> Result<usize> {
957            Ok(0)
958        }
959    }
960
961    #[test]
962    fn test_init() {
963        if !super::is_hypervisor_present() {
964            return;
965        }
966
967        let outb_handler: Arc<Mutex<OutBHandler>> = {
968            let func: Box<dyn FnMut(u16, u64) -> Result<()> + Send> =
969                Box::new(|_, _| -> Result<()> { Ok(()) });
970            Arc::new(Mutex::new(OutBHandler::from(func)))
971        };
972        let mem_access_handler = {
973            let func: Box<dyn FnMut() -> Result<()> + Send> = Box::new(|| -> Result<()> { Ok(()) });
974            Arc::new(Mutex::new(MemAccessHandler::from(func)))
975        };
976        #[cfg(gdb)]
977        let dbg_mem_access_handler = Arc::new(Mutex::new(DbgMemAccessHandler {}));
978
979        test_initialise(
980            outb_handler,
981            mem_access_handler,
982            #[cfg(gdb)]
983            dbg_mem_access_handler,
984        )
985        .unwrap();
986    }
987}