Skip to main content

krun_vmm/
lib.rs

1// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the THIRD-PARTY file.
7
8//! Virtual Machine Monitor that leverages the Linux Kernel-based Virtual Machine (KVM),
9//! and other virtualization features to run a single lightweight micro-virtual
10//! machine (microVM).
11//#![deny(missing_docs)]
12
13#[macro_use]
14extern crate log;
15
16/// Handles setup and initialization a `Vmm` object.
17pub mod builder;
18pub(crate) mod device_manager;
19/// Resource store for configured microVM resources.
20pub mod resources;
21/// Signal handling utilities.
22#[cfg(target_os = "linux")]
23pub mod signal_handler;
24/// Wrappers over structures used to configure the VMM.
25pub mod vmm_config;
26
27#[cfg(target_os = "linux")]
28mod linux;
29#[cfg(target_os = "linux")]
30use crate::linux::vstate;
31#[cfg(target_os = "macos")]
32mod macos;
33mod terminal;
34pub mod worker;
35
36#[cfg(target_os = "macos")]
37use macos::vstate;
38
39use std::fmt::{Display, Formatter};
40use std::io;
41use std::os::unix::io::AsRawFd;
42use std::sync::atomic::{AtomicI32, Ordering};
43use std::sync::{Arc, Mutex};
44#[cfg(target_os = "linux")]
45use std::time::Duration;
46
47#[cfg(target_arch = "x86_64")]
48use crate::device_manager::legacy::PortIODeviceManager;
49use crate::device_manager::mmio::MMIODeviceManager;
50#[cfg(target_os = "linux")]
51use crate::vstate::VcpuEvent;
52use crate::vstate::{Vcpu, VcpuHandle, VcpuResponse, Vm};
53
54use arch::{ArchMemoryInfo, InitrdConfig};
55#[cfg(target_os = "macos")]
56use crossbeam_channel::Sender;
57#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
58use devices::fdt;
59use devices::legacy::IrqChip;
60use devices::virtio::VmmExitObserver;
61use devices::{BusDevice, DeviceType};
62use kernel::cmdline::Cmdline as KernelCmdline;
63use polly::event_manager::{self, EventManager, Subscriber};
64use utils::epoll::{EpollEvent, EventSet};
65use utils::eventfd::EventFd;
66use vm_memory::GuestMemoryMmap;
67
68/// Success exit code.
69pub const FC_EXIT_CODE_OK: u8 = 0;
70/// Generic error exit code.
71pub const FC_EXIT_CODE_GENERIC_ERROR: u8 = 1;
72/// Generic exit code for an error considered not possible to occur if the program logic is sound.
73pub const FC_EXIT_CODE_UNEXPECTED_ERROR: u8 = 2;
74/// Firecracker was shut down after intercepting a restricted system call.
75pub const FC_EXIT_CODE_BAD_SYSCALL: u8 = 148;
76/// Firecracker was shut down after intercepting `SIGBUS`.
77pub const FC_EXIT_CODE_SIGBUS: u8 = 149;
78/// Firecracker was shut down after intercepting `SIGSEGV`.
79pub const FC_EXIT_CODE_SIGSEGV: u8 = 150;
80/// Bad configuration for microvm's resources, when using a single json.
81pub const FC_EXIT_CODE_BAD_CONFIGURATION: u8 = 152;
82/// Command line arguments parsing error.
83pub const FC_EXIT_CODE_ARG_PARSING: u8 = 153;
84
85/// Errors associated with the VMM internal logic. These errors cannot be generated by direct user
86/// input, but can result from bad configuration of the host (for example if Firecracker doesn't
87/// have permissions to open the KVM fd).
88#[derive(Debug)]
89pub enum Error {
90    /// This error is thrown by the minimal boot loader implementation.
91    ConfigureSystem(arch::Error),
92    /// Legacy devices work with Event file descriptors and the creation can fail because
93    /// of resource exhaustion.
94    #[cfg(target_arch = "x86_64")]
95    CreateLegacyDevice(device_manager::legacy::Error),
96    /// Cannot read from an Event file descriptor.
97    EventFd(io::Error),
98    /// Polly error wrapper.
99    EventManager(event_manager::Error),
100    /// I8042 Error.
101    #[cfg(target_arch = "x86_64")]
102    I8042Error(devices::legacy::I8042DeviceError),
103    /// Cannot access kernel file.
104    KernelFile(io::Error),
105    /// Cannot open /dev/kvm. Either the host does not have KVM or Firecracker does not have
106    /// permission to open the file descriptor.
107    KvmContext(vstate::Error),
108    #[cfg(target_arch = "x86_64")]
109    /// Cannot add devices to the Legacy I/O Bus.
110    LegacyIOBus(device_manager::legacy::Error),
111    /// Cannot load command line.
112    LoadCommandline(kernel::cmdline::Error),
113    /// Cannot add a device to the MMIO Bus.
114    RegisterMMIODevice(device_manager::mmio::Error),
115    /// Write to the serial console failed.
116    Serial(io::Error),
117    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
118    /// Cannot generate or write FDT
119    SetupFDT(devices::fdt::Error),
120    /// Cannot create Timer file descriptor.
121    TimerFd(io::Error),
122    /// Vcpu error.
123    Vcpu(vstate::Error),
124    /// Cannot send event to vCPU.
125    VcpuEvent(vstate::Error),
126    /// Cannot create a vCPU handle.
127    VcpuHandle(vstate::Error),
128    /// vCPU resume failed.
129    VcpuResume,
130    /// Cannot spawn a new Vcpu thread.
131    VcpuSpawn(std::io::Error),
132    /// Vm error.
133    Vm(vstate::Error),
134    /// Error thrown by observer object on Vmm initialization.
135    VmmObserverInit(utils::errno::Error),
136    /// Error thrown by observer object on Vmm teardown.
137    VmmObserverTeardown(utils::errno::Error),
138}
139
140impl Display for Error {
141    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
142        use self::Error::*;
143
144        match self {
145            ConfigureSystem(e) => write!(f, "System configuration error: {e:?}"),
146            #[cfg(target_arch = "x86_64")]
147            CreateLegacyDevice(e) => write!(f, "Error creating legacy device: {e:?}"),
148            EventFd(e) => write!(f, "Event fd error: {e}"),
149            EventManager(e) => write!(f, "Event manager error: {e:?}"),
150            #[cfg(target_arch = "x86_64")]
151            I8042Error(e) => write!(f, "I8042 error: {e}"),
152            KernelFile(e) => write!(f, "Cannot access kernel file: {e}"),
153            KvmContext(e) => write!(f, "Failed to validate KVM support: {e:?}"),
154            #[cfg(target_arch = "x86_64")]
155            LegacyIOBus(e) => write!(f, "Cannot add devices to the legacy I/O Bus. {e}"),
156            LoadCommandline(e) => write!(f, "Cannot load command line: {e}"),
157            RegisterMMIODevice(e) => write!(f, "Cannot add a device to the MMIO Bus. {e}"),
158            Serial(e) => write!(f, "Error writing to the serial console: {e:?}"),
159            #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
160            SetupFDT(e) => write!(f, "Error generating or writing FDT: {e:?}"),
161            TimerFd(e) => write!(f, "Error creating timer fd: {e}"),
162            Vcpu(e) => write!(f, "Vcpu error: {e}"),
163            VcpuEvent(e) => write!(f, "Cannot send event to vCPU. {e:?}"),
164            VcpuHandle(e) => write!(f, "Cannot create a vCPU handle. {e}"),
165            VcpuResume => write!(f, "vCPUs resume failed."),
166            VcpuSpawn(e) => write!(f, "Cannot spawn Vcpu thread: {e}"),
167            Vm(e) => write!(f, "Vm error: {e}"),
168            VmmObserverInit(e) => write!(
169                f,
170                "Error thrown by observer object on Vmm initialization: {e}"
171            ),
172            VmmObserverTeardown(e) => {
173                write!(f, "Error thrown by observer object on Vmm teardown: {e}")
174            }
175        }
176    }
177}
178
179/// Trait for objects that need custom initialization and teardown during the Vmm lifetime.
180pub trait VmmEventsObserver {
181    /// This function will be called during microVm boot.
182    fn on_vmm_boot(&mut self) -> std::result::Result<(), utils::errno::Error> {
183        Ok(())
184    }
185    /// This function will be called on microVm teardown.
186    fn on_vmm_stop(&mut self) -> std::result::Result<(), utils::errno::Error> {
187        Ok(())
188    }
189}
190
191/// Shorthand result type for internal VMM commands.
192pub type Result<T> = std::result::Result<T, Error>;
193
194/// Contains the state and associated methods required for the Firecracker VMM.
195pub struct Vmm {
196    // Guest VM core resources.
197    guest_memory: GuestMemoryMmap,
198    arch_memory_info: ArchMemoryInfo,
199
200    kernel_cmdline: KernelCmdline,
201
202    vcpus_handles: Vec<VcpuHandle>,
203    exit_evt: EventFd,
204    vm: Vm,
205    exit_observers: Vec<Arc<Mutex<dyn VmmExitObserver>>>,
206    exit_code: Arc<AtomicI32>,
207
208    // Guest VM devices.
209    mmio_device_manager: MMIODeviceManager,
210    #[cfg(target_arch = "x86_64")]
211    pio_device_manager: PortIODeviceManager,
212}
213
214impl Vmm {
215    /// Gets the the specified bus device.
216    pub fn get_bus_device(
217        &self,
218        device_type: DeviceType,
219        device_id: &str,
220    ) -> Option<&Mutex<dyn BusDevice>> {
221        self.mmio_device_manager.get_device(device_type, device_id)
222    }
223
224    /// Starts the microVM vcpus.
225    pub fn start_vcpus(&mut self, mut vcpus: Vec<Vcpu>) -> Result<()> {
226        let vcpu_count = vcpus.len();
227
228        Vcpu::register_kick_signal_handler();
229
230        self.vcpus_handles.reserve(vcpu_count);
231
232        for mut vcpu in vcpus.drain(..) {
233            vcpu.set_mmio_bus(self.mmio_device_manager.bus.clone());
234
235            self.vcpus_handles
236                .push(vcpu.start_threaded().map_err(Error::VcpuHandle)?);
237        }
238
239        // The vcpus start off in the `Paused` state, let them run.
240        self.resume_vcpus()?;
241
242        Ok(())
243    }
244
245    /// Sends a resume command to the vcpus.
246    #[cfg(target_os = "linux")]
247    pub fn resume_vcpus(&mut self) -> Result<()> {
248        for handle in self.vcpus_handles.iter() {
249            handle
250                .send_event(VcpuEvent::Resume)
251                .map_err(Error::VcpuEvent)?;
252        }
253        for handle in self.vcpus_handles.iter() {
254            match handle
255                .response_receiver()
256                .recv_timeout(Duration::from_millis(1000))
257            {
258                Ok(VcpuResponse::Resumed) => (),
259                _ => return Err(Error::VcpuResume),
260            }
261        }
262        Ok(())
263    }
264
265    #[cfg(target_os = "macos")]
266    pub fn resume_vcpus(&mut self) -> Result<()> {
267        Ok(())
268    }
269
270    /// Configures the system for boot.
271    pub fn configure_system(
272        &self,
273        vcpus: &[Vcpu],
274        _intc: &IrqChip,
275        initrd: &Option<InitrdConfig>,
276        _smbios_oem_strings: &Option<Vec<String>>,
277    ) -> Result<()> {
278        #[cfg(target_arch = "x86_64")]
279        {
280            let cmdline_len = if cfg!(feature = "tee") {
281                arch::x86_64::layout::CMDLINE_SEV_SIZE
282            } else {
283                self.kernel_cmdline.len() + 1
284            };
285
286            arch::x86_64::configure_system(
287                &self.guest_memory,
288                &self.arch_memory_info,
289                vm_memory::GuestAddress(arch::x86_64::layout::CMDLINE_START),
290                cmdline_len,
291                initrd,
292                vcpus.len() as u8,
293            )
294            .map_err(Error::ConfigureSystem)?;
295        }
296
297        #[cfg(target_arch = "aarch64")]
298        {
299            let vcpu_mpidr = vcpus.iter().map(|cpu| cpu.get_mpidr()).collect();
300            fdt::create_fdt(
301                &self.guest_memory,
302                &self.arch_memory_info,
303                vcpu_mpidr,
304                self.kernel_cmdline.as_str(),
305                self.mmio_device_manager.get_device_info(),
306                _intc,
307                initrd,
308            )
309            .map_err(Error::SetupFDT)?;
310        }
311
312        #[cfg(target_arch = "aarch64")]
313        {
314            arch::aarch64::configure_system(
315                &self.guest_memory,
316                &self.arch_memory_info,
317                _smbios_oem_strings,
318            )
319            .map_err(Error::ConfigureSystem)?;
320        }
321
322        #[cfg(target_arch = "riscv64")]
323        {
324            fdt::create_fdt(
325                &self.guest_memory,
326                &self.arch_memory_info,
327                vcpus.len() as u32,
328                self.kernel_cmdline.as_str(),
329                self.mmio_device_manager.get_device_info(),
330                _intc,
331                initrd,
332            )
333            .map_err(Error::SetupFDT)?;
334
335            arch::riscv64::configure_system(&self.guest_memory, _smbios_oem_strings)
336                .map_err(Error::ConfigureSystem)?;
337        }
338
339        Ok(())
340    }
341
342    /// Returns a reference to the inner `GuestMemoryMmap` object if present, or `None` otherwise.
343    pub fn guest_memory(&self) -> &GuestMemoryMmap {
344        &self.guest_memory
345    }
346
347    /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device.
348    #[cfg(target_arch = "x86_64")]
349    pub fn send_ctrl_alt_del(&mut self) -> Result<()> {
350        self.pio_device_manager
351            .i8042
352            .lock()
353            .expect("i8042 lock was poisoned")
354            .trigger_ctrl_alt_del()
355            .map_err(Error::I8042Error)
356    }
357
358    /// Waits for all vCPUs to exit and terminates the Firecracker process.
359    pub fn stop(&mut self, exit_code: i32) {
360        info!("Vmm is stopping.");
361
362        for observer in &self.exit_observers {
363            observer
364                .lock()
365                .expect("Poisoned mutex for exit observer")
366                .on_vmm_exit();
367        }
368
369        // Exit from Firecracker using the provided exit code. Safe because we're terminating
370        // the process anyway.
371        unsafe {
372            libc::_exit(exit_code);
373        }
374    }
375
376    /// Returns a reference to the inner KVM Vm object.
377    pub fn kvm_vm(&self) -> &Vm {
378        &self.vm
379    }
380
381    #[cfg(target_os = "macos")]
382    pub fn add_mapping(
383        &self,
384        reply_sender: Sender<bool>,
385        host_addr: u64,
386        guest_addr: u64,
387        len: u64,
388    ) {
389        self.vm
390            .add_mapping(reply_sender, host_addr, guest_addr, len);
391    }
392
393    #[cfg(target_os = "macos")]
394    pub fn remove_mapping(&self, reply_sender: Sender<bool>, guest_addr: u64, len: u64) {
395        self.vm.remove_mapping(reply_sender, guest_addr, len);
396    }
397}
398
399impl Subscriber for Vmm {
400    /// Handle a read event (EPOLLIN).
401    fn process(&mut self, event: &EpollEvent, _: &mut EventManager) {
402        let source = event.fd();
403        let event_set = event.event_set();
404
405        if source == self.exit_evt.as_raw_fd() && event_set == EventSet::IN {
406            let _ = self.exit_evt.read();
407            // Query each vcpu for the exit_code.
408            // If the exit_code can't be found on any vcpu, it means that the exit signal
409            // has been issued by the i8042 controller in which case we exit with
410            // FC_EXIT_CODE_OK.
411            //
412            // The exit code set up by the guest takes preference over the one reported
413            // by either a vcpu or the i8042 controller.
414            let vcpu_exit_code = self
415                .vcpus_handles
416                .iter()
417                .find_map(|handle| match handle.response_receiver().try_recv() {
418                    Ok(VcpuResponse::Exited(exit_code)) => Some(exit_code),
419                    _ => None,
420                })
421                .unwrap_or(FC_EXIT_CODE_OK);
422            let vmm_exit_code = self.exit_code.load(Ordering::SeqCst);
423            let exit_code = if vmm_exit_code != i32::MAX {
424                debug!("using vmm exit code: {vmm_exit_code}");
425                vmm_exit_code
426            } else {
427                debug!("using vcpu exit code: {vcpu_exit_code}");
428                vcpu_exit_code as i32
429            };
430            self.stop(exit_code);
431        } else {
432            error!("Spurious EventManager event for handler: Vmm");
433        }
434    }
435
436    fn interest_list(&self) -> Vec<EpollEvent> {
437        vec![EpollEvent::new(
438            EventSet::IN,
439            self.exit_evt.as_raw_fd() as u64,
440        )]
441    }
442}