Skip to main content

arcbox_hypervisor/linux/
vcpu.rs

1//! Virtual CPU implementation for Linux KVM.
2
3use std::sync::Arc;
4use std::sync::atomic::{AtomicBool, Ordering};
5
6use crate::{
7    error::HypervisorError,
8    traits::Vcpu,
9    types::{CpuArch, Registers, VcpuExit, VcpuSnapshot},
10};
11
12use super::ffi::{
13    KVM_EXIT_DEBUG, KVM_EXIT_FAIL_ENTRY, KVM_EXIT_HLT, KVM_EXIT_INTERNAL_ERROR, KVM_EXIT_IO,
14    KVM_EXIT_IO_IN, KVM_EXIT_IO_OUT, KVM_EXIT_MMIO, KVM_EXIT_SHUTDOWN, KVM_EXIT_SYSTEM_EVENT,
15    KvmVcpuFd,
16};
17
18#[cfg(target_arch = "x86_64")]
19use super::ffi::{KvmRegs, KvmSegment, KvmSregs};
20
21/// Virtual CPU implementation for Linux KVM.
22///
23/// Each vCPU represents a virtual processor that can execute guest code.
24/// vCPUs are created via the KVM VM file descriptor and run using the
25/// KVM_RUN ioctl.
26pub struct KvmVcpu {
27    /// vCPU ID.
28    id: u32,
29    /// KVM vCPU file descriptor.
30    vcpu_fd: KvmVcpuFd,
31    /// Whether the vCPU is running.
32    running: Arc<AtomicBool>,
33}
34
35impl KvmVcpu {
36    /// Creates a new vCPU wrapper.
37    pub(crate) fn new(id: u32, vcpu_fd: KvmVcpuFd) -> Result<Self, HypervisorError> {
38        let vcpu = Self {
39            id,
40            vcpu_fd,
41            running: Arc::new(AtomicBool::new(false)),
42        };
43
44        // Initialize architecture-specific state
45        #[cfg(target_arch = "x86_64")]
46        vcpu.init_x86()?;
47
48        #[cfg(target_arch = "aarch64")]
49        vcpu.init_arm64()?;
50
51        Ok(vcpu)
52    }
53
54    /// Initializes x86 vCPU state.
55    #[cfg(target_arch = "x86_64")]
56    fn init_x86(&self) -> Result<(), HypervisorError> {
57        // Set up initial special registers for real mode
58        let mut sregs =
59            self.vcpu_fd
60                .get_sregs()
61                .map_err(|e| HypervisorError::VcpuCreationFailed {
62                    id: self.id,
63                    reason: format!("Failed to get sregs: {}", e),
64                })?;
65
66        // Set up code segment for real mode
67        sregs.cs = KvmSegment {
68            base: 0,
69            limit: 0xffff,
70            selector: 0,
71            type_: 0xb, // Code: execute/read, accessed
72            present: 1,
73            dpl: 0,
74            db: 0,
75            s: 1,
76            l: 0,
77            g: 0,
78            avl: 0,
79            unusable: 0,
80            padding: 0,
81        };
82
83        // Set up data segment
84        sregs.ds = KvmSegment {
85            base: 0,
86            limit: 0xffff,
87            selector: 0,
88            type_: 0x3, // Data: read/write, accessed
89            present: 1,
90            dpl: 0,
91            db: 0,
92            s: 1,
93            l: 0,
94            g: 0,
95            avl: 0,
96            unusable: 0,
97            padding: 0,
98        };
99
100        sregs.es = sregs.ds.clone();
101        sregs.fs = sregs.ds.clone();
102        sregs.gs = sregs.ds.clone();
103        sregs.ss = sregs.ds.clone();
104
105        // CR0: PE=0 (real mode), disable paging
106        sregs.cr0 = 0x6000_0010; // ET=1, NE=1
107
108        self.vcpu_fd
109            .set_sregs(&sregs)
110            .map_err(|e| HypervisorError::VcpuCreationFailed {
111                id: self.id,
112                reason: format!("Failed to set sregs: {}", e),
113            })?;
114
115        Ok(())
116    }
117
118    /// Initializes ARM64 vCPU state.
119    #[cfg(target_arch = "aarch64")]
120    fn init_arm64(&self) -> Result<(), HypervisorError> {
121        // ARM64 vCPU initialization is handled differently
122        // The preferred target is set at VM creation time
123        // Individual registers are set using KVM_SET_ONE_REG
124
125        // For now, we don't do any special initialization here
126        // Real implementation would set up initial register state
127
128        Ok(())
129    }
130
131    /// Returns whether the vCPU is currently running.
132    #[must_use]
133    pub fn is_running(&self) -> bool {
134        self.running.load(Ordering::SeqCst)
135    }
136
137    /// Returns a clone of the running flag for external monitoring.
138    #[must_use]
139    pub fn running_flag(&self) -> Arc<AtomicBool> {
140        Arc::clone(&self.running)
141    }
142
143    /// Signals the vCPU to exit immediately.
144    ///
145    /// This causes the next KVM_RUN to return immediately.
146    pub fn signal_exit(&self) {
147        self.vcpu_fd.set_immediate_exit(true);
148    }
149
150    /// Sets up initial register state for Linux boot (x86_64).
151    #[cfg(target_arch = "x86_64")]
152    pub fn setup_linux_boot(
153        &self,
154        entry_point: u64,
155        boot_params_addr: u64,
156    ) -> Result<(), HypervisorError> {
157        // Set up special registers for protected mode
158        let mut sregs = self
159            .vcpu_fd
160            .get_sregs()
161            .map_err(|e| HypervisorError::VcpuRunError(format!("Failed to get sregs: {}", e)))?;
162
163        // Enable protected mode with paging disabled
164        // Linux 64-bit kernel expects to be entered in protected mode
165        sregs.cr0 = 0x6000_0011; // PE=1, ET=1, NE=1
166        sregs.cr3 = 0;
167        sregs.cr4 = 0;
168
169        // Set up code segment for 32-bit protected mode
170        // (Linux kernel will switch to long mode itself)
171        sregs.cs = KvmSegment {
172            base: 0,
173            limit: 0xffff_ffff,
174            selector: 0x10,
175            type_: 0xb, // Code: execute/read, accessed
176            present: 1,
177            dpl: 0,
178            db: 1, // 32-bit segment
179            s: 1,
180            l: 0,
181            g: 1, // 4KB granularity
182            avl: 0,
183            unusable: 0,
184            padding: 0,
185        };
186
187        // Set up data segment
188        sregs.ds = KvmSegment {
189            base: 0,
190            limit: 0xffff_ffff,
191            selector: 0x18,
192            type_: 0x3, // Data: read/write, accessed
193            present: 1,
194            dpl: 0,
195            db: 1,
196            s: 1,
197            l: 0,
198            g: 1,
199            avl: 0,
200            unusable: 0,
201            padding: 0,
202        };
203
204        sregs.es = sregs.ds.clone();
205        sregs.fs = sregs.ds.clone();
206        sregs.gs = sregs.ds.clone();
207        sregs.ss = sregs.ds.clone();
208
209        self.vcpu_fd
210            .set_sregs(&sregs)
211            .map_err(|e| HypervisorError::VcpuRunError(format!("Failed to set sregs: {}", e)))?;
212
213        // Set up general purpose registers
214        let regs = KvmRegs {
215            rip: entry_point,
216            rsi: boot_params_addr, // Linux boot protocol: RSI = boot_params
217            rflags: 0x2,           // Reserved bit always set
218            ..Default::default()
219        };
220
221        self.vcpu_fd
222            .set_regs(&regs)
223            .map_err(|e| HypervisorError::VcpuRunError(format!("Failed to set regs: {}", e)))?;
224
225        tracing::debug!(
226            "vCPU {} setup for Linux boot: entry={:#x}, boot_params={:#x}",
227            self.id,
228            entry_point,
229            boot_params_addr
230        );
231
232        Ok(())
233    }
234
235    /// Sets up initial register state for Linux boot (ARM64).
236    #[cfg(target_arch = "aarch64")]
237    pub fn setup_linux_boot(&self, entry_point: u64, dtb_addr: u64) -> Result<(), HypervisorError> {
238        use super::ffi::arm64_regs;
239
240        // ARM64 Linux boot protocol:
241        // x0 = physical address of DTB
242        // PC = kernel entry point
243        // All other registers should be 0
244        // PSTATE should be EL1h with interrupts masked
245
246        // Set x0 = DTB address
247        self.vcpu_fd
248            .set_one_reg(arm64_regs::X0, dtb_addr)
249            .map_err(|e| HypervisorError::VcpuRunError(format!("Failed to set x0: {}", e)))?;
250
251        // Set PC = entry point
252        self.vcpu_fd
253            .set_one_reg(arm64_regs::PC, entry_point)
254            .map_err(|e| HypervisorError::VcpuRunError(format!("Failed to set PC: {}", e)))?;
255
256        // Set PSTATE for EL1h with interrupts masked
257        let pstate = arm64_regs::PSTATE_EL1H
258            | arm64_regs::PSTATE_D
259            | arm64_regs::PSTATE_A
260            | arm64_regs::PSTATE_I
261            | arm64_regs::PSTATE_F;
262        self.vcpu_fd
263            .set_one_reg(arm64_regs::PSTATE, pstate)
264            .map_err(|e| HypervisorError::VcpuRunError(format!("Failed to set PSTATE: {}", e)))?;
265
266        // Clear other important registers
267        self.vcpu_fd.set_one_reg(arm64_regs::X1, 0).ok(); // Ignore errors for optional regs
268        self.vcpu_fd.set_one_reg(arm64_regs::X2, 0).ok();
269        self.vcpu_fd.set_one_reg(arm64_regs::X3, 0).ok();
270        self.vcpu_fd.set_one_reg(arm64_regs::SP, 0).ok();
271
272        tracing::debug!(
273            "vCPU {} setup for Linux boot: entry={:#x}, dtb={:#x}, pstate={:#x}",
274            self.id,
275            entry_point,
276            dtb_addr,
277            pstate
278        );
279
280        Ok(())
281    }
282
283    /// Converts KVM exit reason to our VcpuExit type.
284    fn convert_exit(&self) -> VcpuExit {
285        let exit_reason = self.vcpu_fd.exit_reason();
286
287        match exit_reason {
288            KVM_EXIT_HLT => VcpuExit::Halt,
289
290            KVM_EXIT_IO => {
291                let io = unsafe { (*self.vcpu_fd.kvm_run()).exit_data.io };
292                if io.direction == KVM_EXIT_IO_OUT {
293                    // For OUT instructions, data is at kvm_run + data_offset
294                    let data_ptr = unsafe {
295                        (self.vcpu_fd.kvm_run() as *const _ as *const u8)
296                            .add(io.data_offset as usize)
297                    };
298                    let data = match io.size {
299                        1 => (unsafe { *data_ptr }) as u64,
300                        2 => (unsafe { *(data_ptr as *const u16) }) as u64,
301                        4 => (unsafe { *(data_ptr as *const u32) }) as u64,
302                        _ => 0,
303                    };
304                    VcpuExit::IoOut {
305                        port: io.port,
306                        size: io.size,
307                        data,
308                    }
309                } else {
310                    VcpuExit::IoIn {
311                        port: io.port,
312                        size: io.size,
313                    }
314                }
315            }
316
317            KVM_EXIT_MMIO => {
318                let mmio = unsafe { (*self.vcpu_fd.kvm_run()).exit_data.mmio };
319                if mmio.is_write != 0 {
320                    let data = match mmio.len {
321                        1 => mmio.data[0] as u64,
322                        2 => u16::from_le_bytes([mmio.data[0], mmio.data[1]]) as u64,
323                        4 => u32::from_le_bytes([
324                            mmio.data[0],
325                            mmio.data[1],
326                            mmio.data[2],
327                            mmio.data[3],
328                        ]) as u64,
329                        8 => u64::from_le_bytes([
330                            mmio.data[0],
331                            mmio.data[1],
332                            mmio.data[2],
333                            mmio.data[3],
334                            mmio.data[4],
335                            mmio.data[5],
336                            mmio.data[6],
337                            mmio.data[7],
338                        ]),
339                        _ => 0,
340                    };
341                    VcpuExit::MmioWrite {
342                        addr: mmio.phys_addr,
343                        size: mmio.len as u8,
344                        data,
345                    }
346                } else {
347                    VcpuExit::MmioRead {
348                        addr: mmio.phys_addr,
349                        size: mmio.len as u8,
350                    }
351                }
352            }
353
354            KVM_EXIT_SHUTDOWN => VcpuExit::Shutdown,
355
356            KVM_EXIT_DEBUG => VcpuExit::Debug,
357
358            KVM_EXIT_SYSTEM_EVENT => {
359                let event = unsafe { (*self.vcpu_fd.kvm_run()).exit_data.system_event };
360                match event.type_ {
361                    1 => VcpuExit::Shutdown,    // KVM_SYSTEM_EVENT_SHUTDOWN
362                    2 => VcpuExit::SystemReset, // KVM_SYSTEM_EVENT_RESET
363                    _ => VcpuExit::Unknown(exit_reason as i32),
364                }
365            }
366
367            KVM_EXIT_FAIL_ENTRY | KVM_EXIT_INTERNAL_ERROR => {
368                tracing::error!(
369                    "vCPU {} internal error: exit_reason={}",
370                    self.id,
371                    exit_reason
372                );
373                VcpuExit::Unknown(exit_reason as i32)
374            }
375
376            _ => VcpuExit::Unknown(exit_reason as i32),
377        }
378    }
379
380    /// Provides data for an I/O IN instruction.
381    pub fn set_io_in_data(&self, data: &[u8]) {
382        unsafe {
383            let kvm_run = self.vcpu_fd.kvm_run_mut();
384            let io = kvm_run.exit_data.io;
385            let data_ptr = (kvm_run as *mut _ as *mut u8).add(io.data_offset as usize);
386            std::ptr::copy_nonoverlapping(data.as_ptr(), data_ptr, data.len());
387        }
388    }
389
390    /// Provides data for an MMIO read.
391    pub fn set_mmio_read_data(&self, data: &[u8]) {
392        unsafe {
393            let kvm_run = self.vcpu_fd.kvm_run_mut();
394            let mmio = &mut kvm_run.exit_data.mmio;
395            let len = std::cmp::min(data.len(), mmio.data.len());
396            mmio.data[..len].copy_from_slice(&data[..len]);
397        }
398    }
399}
400
401impl Vcpu for KvmVcpu {
402    fn run(&mut self) -> Result<VcpuExit, HypervisorError> {
403        self.running.store(true, Ordering::SeqCst);
404
405        // Clear immediate exit flag
406        self.vcpu_fd.set_immediate_exit(false);
407
408        // Run the vCPU
409        let result = self.vcpu_fd.run();
410
411        self.running.store(false, Ordering::SeqCst);
412
413        match result {
414            Ok(()) => Ok(self.convert_exit()),
415            Err(e) => Err(HypervisorError::VcpuRunError(format!(
416                "vCPU {} run failed: {}",
417                self.id, e
418            ))),
419        }
420    }
421
422    fn get_regs(&self) -> Result<Registers, HypervisorError> {
423        let kvm_regs = self.vcpu_fd.get_regs().map_err(|e| {
424            HypervisorError::VcpuRunError(format!("Failed to get registers: {}", e))
425        })?;
426
427        #[cfg(target_arch = "x86_64")]
428        {
429            Ok(Registers {
430                rax: kvm_regs.rax,
431                rbx: kvm_regs.rbx,
432                rcx: kvm_regs.rcx,
433                rdx: kvm_regs.rdx,
434                rsi: kvm_regs.rsi,
435                rdi: kvm_regs.rdi,
436                rsp: kvm_regs.rsp,
437                rbp: kvm_regs.rbp,
438                r8: kvm_regs.r8,
439                r9: kvm_regs.r9,
440                r10: kvm_regs.r10,
441                r11: kvm_regs.r11,
442                r12: kvm_regs.r12,
443                r13: kvm_regs.r13,
444                r14: kvm_regs.r14,
445                r15: kvm_regs.r15,
446                rip: kvm_regs.rip,
447                rflags: kvm_regs.rflags,
448            })
449        }
450
451        #[cfg(target_arch = "aarch64")]
452        {
453            // ARM64 uses a different register structure
454            // Map to our generic x86-style Registers for now
455            Ok(Registers {
456                rax: kvm_regs.regs[0],
457                rbx: kvm_regs.regs[1],
458                rcx: kvm_regs.regs[2],
459                rdx: kvm_regs.regs[3],
460                rsi: kvm_regs.regs[4],
461                rdi: kvm_regs.regs[5],
462                rsp: kvm_regs.sp,
463                rbp: kvm_regs.regs[29],
464                r8: kvm_regs.regs[8],
465                r9: kvm_regs.regs[9],
466                r10: kvm_regs.regs[10],
467                r11: kvm_regs.regs[11],
468                r12: kvm_regs.regs[12],
469                r13: kvm_regs.regs[13],
470                r14: kvm_regs.regs[14],
471                r15: kvm_regs.regs[15],
472                rip: kvm_regs.pc,
473                rflags: kvm_regs.pstate,
474            })
475        }
476    }
477
478    fn set_regs(&mut self, regs: &Registers) -> Result<(), HypervisorError> {
479        #[cfg(target_arch = "x86_64")]
480        {
481            let kvm_regs = KvmRegs {
482                rax: regs.rax,
483                rbx: regs.rbx,
484                rcx: regs.rcx,
485                rdx: regs.rdx,
486                rsi: regs.rsi,
487                rdi: regs.rdi,
488                rsp: regs.rsp,
489                rbp: regs.rbp,
490                r8: regs.r8,
491                r9: regs.r9,
492                r10: regs.r10,
493                r11: regs.r11,
494                r12: regs.r12,
495                r13: regs.r13,
496                r14: regs.r14,
497                r15: regs.r15,
498                rip: regs.rip,
499                rflags: regs.rflags,
500            };
501
502            self.vcpu_fd.set_regs(&kvm_regs).map_err(|e| {
503                HypervisorError::VcpuRunError(format!("Failed to set registers: {}", e))
504            })?;
505        }
506
507        #[cfg(target_arch = "aarch64")]
508        {
509            // ARM64 registers would need to be set individually
510            // using KVM_SET_ONE_REG
511            let _ = regs; // Suppress unused warning
512            return Err(HypervisorError::VcpuRunError(
513                "ARM64 register setting not fully implemented".to_string(),
514            ));
515        }
516
517        Ok(())
518    }
519
520    fn id(&self) -> u32 {
521        self.id
522    }
523
524    fn set_io_result(&mut self, value: u64) -> Result<(), HypervisorError> {
525        // For I/O IN operations, write the result back to the data area
526        let bytes = value.to_le_bytes();
527        unsafe {
528            let kvm_run = self.vcpu_fd.kvm_run_mut();
529            let io = kvm_run.exit_data.io;
530            let size = io.size as usize;
531            let data_ptr = (kvm_run as *mut _ as *mut u8).add(io.data_offset as usize);
532            std::ptr::copy_nonoverlapping(bytes.as_ptr(), data_ptr, size.min(8));
533        }
534        Ok(())
535    }
536
537    fn set_mmio_result(&mut self, value: u64) -> Result<(), HypervisorError> {
538        // For MMIO read operations, write the result back to the mmio data area
539        let bytes = value.to_le_bytes();
540        unsafe {
541            let kvm_run = self.vcpu_fd.kvm_run_mut();
542            let mmio = &mut kvm_run.exit_data.mmio;
543            let len = (mmio.len as usize).min(8);
544            mmio.data[..len].copy_from_slice(&bytes[..len]);
545        }
546        Ok(())
547    }
548
549    fn snapshot(&self) -> Result<VcpuSnapshot, HypervisorError> {
550        #[cfg(target_arch = "x86_64")]
551        {
552            let regs = self.get_regs()?;
553            Ok(VcpuSnapshot::new_x86(self.id, regs))
554        }
555
556        #[cfg(target_arch = "aarch64")]
557        {
558            use super::ffi::arm64_regs;
559
560            // Read ARM64 registers
561            let mut arm_regs = crate::types::Arm64Registers::default();
562
563            // Read X0-X30
564            for i in 0..31 {
565                let reg_id = arm64_regs::X0 + (i as u64) * 2; // Each reg is 2 u64s apart in encoding
566                if let Ok(val) = self.vcpu_fd.get_one_reg(reg_id) {
567                    arm_regs.x[i] = val;
568                }
569            }
570
571            // Read SP, PC, PSTATE
572            arm_regs.sp = self.vcpu_fd.get_one_reg(arm64_regs::SP).unwrap_or(0);
573            arm_regs.pc = self.vcpu_fd.get_one_reg(arm64_regs::PC).unwrap_or(0);
574            arm_regs.pstate = self.vcpu_fd.get_one_reg(arm64_regs::PSTATE).unwrap_or(0);
575
576            // Note: FPCR, FPSR, and vector registers would need additional KVM_GET_ONE_REG calls
577
578            Ok(VcpuSnapshot::new_arm64(self.id, arm_regs))
579        }
580    }
581
582    fn restore(&mut self, snapshot: &VcpuSnapshot) -> Result<(), HypervisorError> {
583        if snapshot.id != self.id {
584            return Err(HypervisorError::SnapshotError(format!(
585                "vCPU ID mismatch: expected {}, got {}",
586                self.id, snapshot.id
587            )));
588        }
589
590        #[cfg(target_arch = "x86_64")]
591        {
592            if let Some(regs) = &snapshot.x86_regs {
593                self.set_regs(regs)?;
594            }
595        }
596
597        #[cfg(target_arch = "aarch64")]
598        {
599            use super::ffi::arm64_regs;
600
601            if let Some(arm_regs) = &snapshot.arm64_regs {
602                // Restore X0-X30
603                for i in 0..31 {
604                    let reg_id = arm64_regs::X0 + (i as u64) * 2;
605                    let _ = self.vcpu_fd.set_one_reg(reg_id, arm_regs.x[i]);
606                }
607
608                // Restore SP, PC, PSTATE
609                let _ = self.vcpu_fd.set_one_reg(arm64_regs::SP, arm_regs.sp);
610                let _ = self.vcpu_fd.set_one_reg(arm64_regs::PC, arm_regs.pc);
611                let _ = self
612                    .vcpu_fd
613                    .set_one_reg(arm64_regs::PSTATE, arm_regs.pstate);
614            }
615        }
616
617        Ok(())
618    }
619}
620
621/// Extended vCPU state for x86_64.
622#[cfg(target_arch = "x86_64")]
623#[derive(Debug, Clone, Default)]
624pub struct SpecialRegisters {
625    /// Code segment.
626    pub cs: SegmentRegister,
627    /// Data segment.
628    pub ds: SegmentRegister,
629    /// Stack segment.
630    pub ss: SegmentRegister,
631    /// Extra segment.
632    pub es: SegmentRegister,
633    /// FS segment.
634    pub fs: SegmentRegister,
635    /// GS segment.
636    pub gs: SegmentRegister,
637    /// Global descriptor table.
638    pub gdt: DescriptorTable,
639    /// Interrupt descriptor table.
640    pub idt: DescriptorTable,
641    /// Control register 0.
642    pub cr0: u64,
643    /// Control register 3 (page table base).
644    pub cr3: u64,
645    /// Control register 4.
646    pub cr4: u64,
647    /// Extended feature enable register.
648    pub efer: u64,
649}
650
651/// Segment register.
652#[cfg(target_arch = "x86_64")]
653#[derive(Debug, Clone, Default)]
654pub struct SegmentRegister {
655    /// Base address.
656    pub base: u64,
657    /// Limit.
658    pub limit: u32,
659    /// Selector.
660    pub selector: u16,
661    /// Type.
662    pub type_: u8,
663    /// Present.
664    pub present: u8,
665    /// Descriptor privilege level.
666    pub dpl: u8,
667    /// Default operation size.
668    pub db: u8,
669    /// Granularity.
670    pub granularity: u8,
671    /// Long mode.
672    pub long_mode: u8,
673}
674
675/// Descriptor table (GDT/IDT).
676#[cfg(target_arch = "x86_64")]
677#[derive(Debug, Clone, Default)]
678pub struct DescriptorTable {
679    /// Base address.
680    pub base: u64,
681    /// Limit.
682    pub limit: u16,
683}
684
685#[cfg(test)]
686mod tests {
687    use super::*;
688
689    #[test]
690    fn test_vcpu_running_flag() {
691        // Just test that we can create and check the running flag
692        let running = Arc::new(AtomicBool::new(false));
693        assert!(!running.load(Ordering::SeqCst));
694        running.store(true, Ordering::SeqCst);
695        assert!(running.load(Ordering::SeqCst));
696    }
697}