1use std::convert::TryFrom;
18use std::fmt::Debug;
19use std::sync::Arc;
20#[cfg(gdb)]
21use std::sync::Mutex;
22use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
23
24use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region};
25use kvm_ioctls::Cap::UserMemory;
26use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
27use log::LevelFilter;
28use tracing::{Span, instrument};
29
30use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT};
31#[cfg(gdb)]
32use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, KvmDebug, VcpuStopReason};
33#[cfg(gdb)]
34use super::handlers::DbgMemAccessHandlerWrapper;
35use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
36use super::{
37 CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE,
38 EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, HyperlightExit, Hypervisor, InterruptHandle,
39 LinuxInterruptHandle, VirtualCPU,
40};
41#[cfg(gdb)]
42use crate::HyperlightError;
43use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
44use crate::mem::ptr::{GuestPtr, RawPtr};
45use crate::sandbox::SandboxConfiguration;
46use crate::{Result, log_then_return, new_error};
47
48#[instrument(skip_all, parent = Span::current(), level = "Trace")]
50pub(crate) fn is_hypervisor_present() -> bool {
51 if let Ok(kvm) = Kvm::new() {
52 let api_version = kvm.get_api_version();
53 match api_version {
54 version if version == 12 && kvm.check_extension(UserMemory) => true,
55 12 => {
56 log::info!("KVM does not have KVM_CAP_USER_MEMORY capability");
57 false
58 }
59 version => {
60 log::info!("KVM GET_API_VERSION returned {}, expected 12", version);
61 false
62 }
63 }
64 } else {
65 log::info!("KVM is not available on this system");
66 false
67 }
68}
69
70#[cfg(gdb)]
71mod debug {
72 use std::sync::{Arc, Mutex};
73
74 use kvm_bindings::kvm_debug_exit_arch;
75
76 use super::KVMDriver;
77 use crate::hypervisor::gdb::{
78 DebugMsg, DebugResponse, GuestDebug, KvmDebug, VcpuStopReason, X86_64Regs,
79 };
80 use crate::hypervisor::handlers::DbgMemAccessHandlerCaller;
81 use crate::{Result, new_error};
82
83 impl KVMDriver {
84 fn disable_debug(&mut self) -> Result<()> {
86 let mut debug = KvmDebug::default();
87
88 debug.set_single_step(&self.vcpu_fd, false)?;
89
90 self.debug = Some(debug);
91
92 Ok(())
93 }
94
95 pub(crate) fn get_stop_reason(
97 &mut self,
98 debug_exit: kvm_debug_exit_arch,
99 ) -> Result<VcpuStopReason> {
100 let debug = self
101 .debug
102 .as_mut()
103 .ok_or_else(|| new_error!("Debug is not enabled"))?;
104
105 debug.get_stop_reason(&self.vcpu_fd, debug_exit, self.entrypoint)
106 }
107
108 pub(crate) fn process_dbg_request(
109 &mut self,
110 req: DebugMsg,
111 dbg_mem_access_fn: Arc<Mutex<dyn DbgMemAccessHandlerCaller>>,
112 ) -> Result<DebugResponse> {
113 if let Some(debug) = self.debug.as_mut() {
114 match req {
115 DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint(
116 debug
117 .add_hw_breakpoint(&self.vcpu_fd, addr)
118 .map_err(|e| {
119 log::error!("Failed to add hw breakpoint: {:?}", e);
120
121 e
122 })
123 .is_ok(),
124 )),
125 DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint(
126 debug
127 .add_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn)
128 .map_err(|e| {
129 log::error!("Failed to add sw breakpoint: {:?}", e);
130
131 e
132 })
133 .is_ok(),
134 )),
135 DebugMsg::Continue => {
136 debug.set_single_step(&self.vcpu_fd, false).map_err(|e| {
137 log::error!("Failed to continue execution: {:?}", e);
138
139 e
140 })?;
141
142 Ok(DebugResponse::Continue)
143 }
144 DebugMsg::DisableDebug => {
145 self.disable_debug().map_err(|e| {
146 log::error!("Failed to disable debugging: {:?}", e);
147
148 e
149 })?;
150
151 Ok(DebugResponse::DisableDebug)
152 }
153 DebugMsg::GetCodeSectionOffset => {
154 let offset = dbg_mem_access_fn
155 .try_lock()
156 .map_err(|e| {
157 new_error!("Error locking at {}:{}: {}", file!(), line!(), e)
158 })?
159 .get_code_offset()
160 .map_err(|e| {
161 log::error!("Failed to get code offset: {:?}", e);
162
163 e
164 })?;
165
166 Ok(DebugResponse::GetCodeSectionOffset(offset as u64))
167 }
168 DebugMsg::ReadAddr(addr, len) => {
169 let mut data = vec![0u8; len];
170
171 debug
172 .read_addrs(&self.vcpu_fd, addr, &mut data, dbg_mem_access_fn)
173 .map_err(|e| {
174 log::error!("Failed to read from address: {:?}", e);
175
176 e
177 })?;
178
179 Ok(DebugResponse::ReadAddr(data))
180 }
181 DebugMsg::ReadRegisters => {
182 let mut regs = X86_64Regs::default();
183
184 debug
185 .read_regs(&self.vcpu_fd, &mut regs)
186 .map_err(|e| {
187 log::error!("Failed to read registers: {:?}", e);
188
189 e
190 })
191 .map(|_| DebugResponse::ReadRegisters(regs))
192 }
193 DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint(
194 debug
195 .remove_hw_breakpoint(&self.vcpu_fd, addr)
196 .map_err(|e| {
197 log::error!("Failed to remove hw breakpoint: {:?}", e);
198
199 e
200 })
201 .is_ok(),
202 )),
203 DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint(
204 debug
205 .remove_sw_breakpoint(&self.vcpu_fd, addr, dbg_mem_access_fn)
206 .map_err(|e| {
207 log::error!("Failed to remove sw breakpoint: {:?}", e);
208
209 e
210 })
211 .is_ok(),
212 )),
213 DebugMsg::Step => {
214 debug.set_single_step(&self.vcpu_fd, true).map_err(|e| {
215 log::error!("Failed to enable step instruction: {:?}", e);
216
217 e
218 })?;
219
220 Ok(DebugResponse::Step)
221 }
222 DebugMsg::WriteAddr(addr, data) => {
223 debug
224 .write_addrs(&self.vcpu_fd, addr, &data, dbg_mem_access_fn)
225 .map_err(|e| {
226 log::error!("Failed to write to address: {:?}", e);
227
228 e
229 })?;
230
231 Ok(DebugResponse::WriteAddr)
232 }
233 DebugMsg::WriteRegisters(regs) => debug
234 .write_regs(&self.vcpu_fd, ®s)
235 .map_err(|e| {
236 log::error!("Failed to write registers: {:?}", e);
237
238 e
239 })
240 .map(|_| DebugResponse::WriteRegisters),
241 }
242 } else {
243 Err(new_error!("Debugging is not enabled"))
244 }
245 }
246
247 pub(crate) fn recv_dbg_msg(&mut self) -> Result<DebugMsg> {
248 let gdb_conn = self
249 .gdb_conn
250 .as_mut()
251 .ok_or_else(|| new_error!("Debug is not enabled"))?;
252
253 gdb_conn.recv().map_err(|e| {
254 new_error!(
255 "Got an error while waiting to receive a message from the gdb thread: {:?}",
256 e
257 )
258 })
259 }
260
261 pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> {
262 log::debug!("Sending {:?}", cmd);
263
264 let gdb_conn = self
265 .gdb_conn
266 .as_mut()
267 .ok_or_else(|| new_error!("Debug is not enabled"))?;
268
269 gdb_conn.send(cmd).map_err(|e| {
270 new_error!(
271 "Got an error while sending a response message to the gdb thread: {:?}",
272 e
273 )
274 })
275 }
276 }
277}
278
279pub(crate) struct KVMDriver {
281 _kvm: Kvm,
282 _vm_fd: VmFd,
283 vcpu_fd: VcpuFd,
284 entrypoint: u64,
285 orig_rsp: GuestPtr,
286 mem_regions: Vec<MemoryRegion>,
287 interrupt_handle: Arc<LinuxInterruptHandle>,
288
289 #[cfg(gdb)]
290 debug: Option<KvmDebug>,
291 #[cfg(gdb)]
292 gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
293}
294
295impl KVMDriver {
296 #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
300 pub(crate) fn new(
301 mem_regions: Vec<MemoryRegion>,
302 pml4_addr: u64,
303 entrypoint: u64,
304 rsp: u64,
305 config: &SandboxConfiguration,
306 #[cfg(gdb)] gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
307 ) -> Result<Self> {
308 let kvm = Kvm::new()?;
309
310 let vm_fd = kvm.create_vm_with_type(0)?;
311
312 let perm_flags =
313 MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE;
314
315 mem_regions.iter().enumerate().try_for_each(|(i, region)| {
316 let perm_flags = perm_flags.intersection(region.flags);
317 let kvm_region = kvm_userspace_memory_region {
318 slot: i as u32,
319 guest_phys_addr: region.guest_region.start as u64,
320 memory_size: (region.guest_region.end - region.guest_region.start) as u64,
321 userspace_addr: region.host_region.start as u64,
322 flags: match perm_flags {
323 MemoryRegionFlags::READ => KVM_MEM_READONLY,
324 _ => 0, },
326 };
327 unsafe { vm_fd.set_user_memory_region(kvm_region) }
328 })?;
329
330 let mut vcpu_fd = vm_fd.create_vcpu(0)?;
331 Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?;
332
333 #[cfg(gdb)]
334 let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn {
335 let mut debug = KvmDebug::new();
336 debug.add_hw_breakpoint(&vcpu_fd, entrypoint)?;
338
339 (Some(debug), Some(gdb_conn))
340 } else {
341 (None, None)
342 };
343
344 let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?;
345
346 let ret = Self {
347 _kvm: kvm,
348 _vm_fd: vm_fd,
349 vcpu_fd,
350 entrypoint,
351 orig_rsp: rsp_gp,
352 mem_regions,
353 interrupt_handle: Arc::new(LinuxInterruptHandle {
354 running: AtomicU64::new(0),
355 cancel_requested: AtomicBool::new(false),
356 #[cfg(all(
357 target_arch = "x86_64",
358 target_vendor = "unknown",
359 target_os = "linux",
360 target_env = "musl"
361 ))]
362 tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }),
363 #[cfg(not(all(
364 target_arch = "x86_64",
365 target_vendor = "unknown",
366 target_os = "linux",
367 target_env = "musl"
368 )))]
369 tid: AtomicU64::new(unsafe { libc::pthread_self() }),
370 retry_delay: config.get_interrupt_retry_delay(),
371 dropped: AtomicBool::new(false),
372 sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(),
373 }),
374
375 #[cfg(gdb)]
376 debug,
377 #[cfg(gdb)]
378 gdb_conn,
379 };
380 Ok(ret)
381 }
382
383 #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
384 fn setup_initial_sregs(vcpu_fd: &mut VcpuFd, pml4_addr: u64) -> Result<()> {
385 let mut sregs = vcpu_fd.get_sregs()?;
387 sregs.cr3 = pml4_addr;
388 sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
389 sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP;
390 sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX;
391 sregs.cs.l = 1; vcpu_fd.set_sregs(&sregs)?;
393 Ok(())
394 }
395}
396
397impl Debug for KVMDriver {
398 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
399 let mut f = f.debug_struct("KVM Driver");
400 for region in &self.mem_regions {
403 f.field("Memory Region", ®ion);
404 }
405 let regs = self.vcpu_fd.get_regs();
406 if let Ok(regs) = regs {
409 f.field("Registers", ®s);
410 }
411
412 let sregs = self.vcpu_fd.get_sregs();
413
414 if let Ok(sregs) = sregs {
417 f.field("Special Registers", &sregs);
418 }
419
420 f.finish()
421 }
422}
423
424impl Hypervisor for KVMDriver {
425 #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
427 fn initialise(
428 &mut self,
429 peb_addr: RawPtr,
430 seed: u64,
431 page_size: u32,
432 outb_hdl: OutBHandlerWrapper,
433 mem_access_hdl: MemAccessHandlerWrapper,
434 max_guest_log_level: Option<LevelFilter>,
435 #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
436 ) -> Result<()> {
437 let max_guest_log_level: u64 = match max_guest_log_level {
438 Some(level) => level as u64,
439 None => self.get_max_log_level().into(),
440 };
441
442 let regs = kvm_regs {
443 rip: self.entrypoint,
444 rsp: self.orig_rsp.absolute()?,
445
446 rdi: peb_addr.into(),
448 rsi: seed,
449 rdx: page_size.into(),
450 rcx: max_guest_log_level,
451
452 ..Default::default()
453 };
454 self.vcpu_fd.set_regs(®s)?;
455
456 VirtualCPU::run(
457 self.as_mut_hypervisor(),
458 outb_hdl,
459 mem_access_hdl,
460 #[cfg(gdb)]
461 dbg_mem_access_fn,
462 )?;
463
464 Ok(())
465 }
466
467 #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
468 fn dispatch_call_from_host(
469 &mut self,
470 dispatch_func_addr: RawPtr,
471 outb_handle_fn: OutBHandlerWrapper,
472 mem_access_fn: MemAccessHandlerWrapper,
473 #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper,
474 ) -> Result<()> {
475 let regs = kvm_regs {
477 rip: dispatch_func_addr.into(),
478 rsp: self.orig_rsp.absolute()?,
479 ..Default::default()
480 };
481 self.vcpu_fd.set_regs(®s)?;
482
483 let fpu = kvm_fpu {
485 fcw: FP_CONTROL_WORD_DEFAULT,
486 ftwx: FP_TAG_WORD_DEFAULT,
487 mxcsr: MXCSR_DEFAULT,
488 ..Default::default() };
490 self.vcpu_fd.set_fpu(&fpu)?;
491
492 VirtualCPU::run(
494 self.as_mut_hypervisor(),
495 outb_handle_fn,
496 mem_access_fn,
497 #[cfg(gdb)]
498 dbg_mem_access_fn,
499 )?;
500
501 Ok(())
502 }
503
504 #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
505 fn handle_io(
506 &mut self,
507 port: u16,
508 data: Vec<u8>,
509 _rip: u64,
510 _instruction_length: u64,
511 outb_handle_fn: OutBHandlerWrapper,
512 ) -> Result<()> {
513 if data.is_empty() {
519 log_then_return!("no data was given in IO interrupt");
520 } else {
521 let mut padded = [0u8; 4];
522 let copy_len = data.len().min(4);
523 padded[..copy_len].copy_from_slice(&data[..copy_len]);
524 let value = u32::from_le_bytes(padded);
525
526 outb_handle_fn
527 .try_lock()
528 .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
529 .call(port, value)?;
530 }
531
532 Ok(())
533 }
534
535 #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
536 fn run(&mut self) -> Result<HyperlightExit> {
537 self.interrupt_handle
538 .tid
539 .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
540 self.interrupt_handle
543 .set_running_and_increment_generation()
544 .map_err(|e| {
545 new_error!(
546 "Error setting running state and incrementing generation: {}",
547 e
548 )
549 })?;
550 let exit_reason = if self
555 .interrupt_handle
556 .cancel_requested
557 .load(Ordering::Relaxed)
558 {
559 Err(kvm_ioctls::Error::new(libc::EINTR))
560 } else {
561 self.vcpu_fd.run()
567 };
568 #[allow(unused_variables)]
572 let cancel_requested = self
574 .interrupt_handle
575 .cancel_requested
576 .load(Ordering::Relaxed);
577 self.interrupt_handle.clear_running_bit();
582 let result = match exit_reason {
587 Ok(VcpuExit::Hlt) => {
588 crate::debug!("KVM - Halt Details : {:#?}", &self);
589 HyperlightExit::Halt()
590 }
591 Ok(VcpuExit::IoOut(port, data)) => {
592 crate::debug!("KVM IO Details : \nPort : {}\nData : {:?}", port, data);
594 HyperlightExit::IoOut(port, data.to_vec(), 0, 0)
596 }
597 Ok(VcpuExit::MmioRead(addr, _)) => {
598 crate::debug!("KVM MMIO Read -Details: Address: {} \n {:#?}", addr, &self);
599
600 match self.get_memory_access_violation(
601 addr as usize,
602 &self.mem_regions,
603 MemoryRegionFlags::READ,
604 ) {
605 Some(access_violation_exit) => access_violation_exit,
606 None => HyperlightExit::Mmio(addr),
607 }
608 }
609 Ok(VcpuExit::MmioWrite(addr, _)) => {
610 crate::debug!("KVM MMIO Write -Details: Address: {} \n {:#?}", addr, &self);
611
612 match self.get_memory_access_violation(
613 addr as usize,
614 &self.mem_regions,
615 MemoryRegionFlags::WRITE,
616 ) {
617 Some(access_violation_exit) => access_violation_exit,
618 None => HyperlightExit::Mmio(addr),
619 }
620 }
621 #[cfg(gdb)]
622 Ok(VcpuExit::Debug(debug_exit)) => match self.get_stop_reason(debug_exit) {
624 Ok(reason) => HyperlightExit::Debug(reason),
625 Err(e) => {
626 log_then_return!("Error getting stop reason: {:?}", e);
627 }
628 },
629 Err(e) => match e.errno() {
630 #[cfg(gdb)]
634 libc::EINTR => HyperlightExit::Debug(VcpuStopReason::Interrupt),
635 #[cfg(not(gdb))]
637 libc::EINTR => {
638 if cancel_requested {
641 self.interrupt_handle
642 .cancel_requested
643 .store(false, Ordering::Relaxed);
644 HyperlightExit::Cancelled()
645 } else {
646 HyperlightExit::Retry()
647 }
648 }
649 libc::EAGAIN => HyperlightExit::Retry(),
650 _ => {
651 crate::debug!("KVM Error -Details: Address: {} \n {:#?}", e, &self);
652 log_then_return!("Error running VCPU {:?}", e);
653 }
654 },
655 Ok(other) => {
656 let err_msg = format!("Unexpected KVM Exit {:?}", other);
657 crate::debug!("KVM Other Exit Details: {:#?}", &self);
658 HyperlightExit::Unknown(err_msg)
659 }
660 };
661 Ok(result)
662 }
663
664 #[instrument(skip_all, parent = Span::current(), level = "Trace")]
665 fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor {
666 self as &mut dyn Hypervisor
667 }
668
669 fn interrupt_handle(&self) -> Arc<dyn InterruptHandle> {
670 self.interrupt_handle.clone()
671 }
672
673 #[cfg(crashdump)]
674 fn get_memory_regions(&self) -> &[MemoryRegion] {
675 &self.mem_regions
676 }
677
678 #[cfg(gdb)]
679 fn handle_debug(
680 &mut self,
681 dbg_mem_access_fn: Arc<Mutex<dyn super::handlers::DbgMemAccessHandlerCaller>>,
682 stop_reason: VcpuStopReason,
683 ) -> Result<()> {
684 self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))
685 .map_err(|e| new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e))?;
686
687 loop {
688 log::debug!("Debug wait for event to resume vCPU");
689 let req = self.recv_dbg_msg()?;
691
692 let result = self.process_dbg_request(req, dbg_mem_access_fn.clone());
693
694 let response = match result {
695 Ok(response) => response,
696 Err(HyperlightError::TranslateGuestAddress(_)) => DebugResponse::ErrorOccurred,
698 Err(e) => {
699 return Err(e);
700 }
701 };
702
703 let cont = matches!(
705 response,
706 DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug
707 );
708
709 self.send_dbg_msg(response)
710 .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?;
711
712 if cont {
713 break;
714 }
715 }
716
717 Ok(())
718 }
719}
720
721impl Drop for KVMDriver {
722 fn drop(&mut self) {
723 self.interrupt_handle.dropped.store(true, Ordering::Relaxed);
724 }
725}