use std::convert::Infallible;
use std::path::PathBuf;
use std::time::SystemTime;
#[cfg(target_os = "linux")]
use std::env;
#[cfg(target_os = "linux")]
use std::ffi::CString;
use crossbeam_channel::unbounded;
use log::error;
use polly::event_manager::EventManager;
use utils::eventfd::EventFd;
use vmm::resources::VmResources;
use vmm::vmm_config::kernel_bundle::KernelBundle;
use vmm::vmm_config::kernel_cmdline::KernelCmdlineConfig;
use vmm::vmm_config::vsock::VsockDeviceConfig;
use super::error::{BuildError, Error, Result, RuntimeError};
const INIT_PATH: &str = "/init.krun";
pub struct Vm {
vmr: VmResources,
kernel_cmdline: Option<String>,
exec_path: Option<String>,
args: Option<String>,
env: Option<String>,
workdir: Option<String>,
rlimits: Option<String>,
krunfw_path: Option<PathBuf>,
init_path: Option<String>,
_krunfw_library: Option<libloading::Library>,
}
impl Vm {
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
vmr: VmResources,
kernel_cmdline: Option<String>,
exec_path: Option<String>,
args: Option<String>,
env: Option<String>,
workdir: Option<String>,
rlimits: Option<String>,
krunfw_path: Option<PathBuf>,
init_path: Option<String>,
) -> Self {
Self {
vmr,
kernel_cmdline,
exec_path,
args,
env,
workdir,
rlimits,
krunfw_path,
init_path,
_krunfw_library: None,
}
}
pub fn enter(mut self) -> Result<Infallible> {
#[cfg(target_os = "linux")]
{
let prname = match env::var("HOSTNAME") {
Ok(val) => CString::new(format!("VM:{val}")).unwrap_or_default(),
Err(_) => CString::new("libkrun VM").unwrap_or_default(),
};
unsafe { libc::prctl(libc::PR_SET_NAME, prname.as_ptr()) };
}
let mut event_manager = EventManager::new()
.map_err(|e| Error::Build(BuildError::Start(format!("EventManager: {e:?}"))))?;
if self.vmr.external_kernel.is_none()
&& self.vmr.kernel_bundle.is_none()
&& self.vmr.firmware_config.is_none()
&& cfg!(not(feature = "efi"))
{
self.load_krunfw()?;
}
let boot_start_ns = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos() as u64;
let kernel_cmdline = self.build_kernel_cmdline(boot_start_ns);
self.vmr
.set_kernel_cmdline(kernel_cmdline)
.map_err(|e| Error::Build(BuildError::Start(format!("kernel cmdline: {e:?}"))))?;
self.configure_vsock()?;
let shutdown_efd = if cfg!(target_arch = "aarch64") && cfg!(target_os = "macos") {
Some(
EventFd::new(utils::eventfd::EFD_NONBLOCK)
.map_err(|e| Error::Build(BuildError::Start(format!("shutdown_efd: {e:?}"))))?,
)
} else {
None
};
let (sender, _receiver) = unbounded();
let _vmm = vmm::builder::build_microvm(&self.vmr, &mut event_manager, shutdown_efd, sender)
.map_err(|e| Error::Build(BuildError::Start(format!("build_microvm: {e:?}"))))?;
#[cfg(target_os = "macos")]
if self.vmr.gpu_virgl_flags.is_some() {
vmm::worker::start_worker_thread(_vmm.clone(), _receiver)
.map_err(|e| Error::Runtime(RuntimeError::EventLoop(format!("{e:?}"))))?;
}
#[cfg(target_arch = "x86_64")]
if self.vmr.split_irqchip {
vmm::worker::start_worker_thread(_vmm.clone(), _receiver.clone())
.map_err(|e| Error::Runtime(RuntimeError::EventLoop(format!("{e:?}"))))?;
}
#[cfg(any(feature = "amd-sev", feature = "tdx"))]
vmm::worker::start_worker_thread(_vmm.clone(), _receiver.clone())
.map_err(|e| Error::Runtime(RuntimeError::EventLoop(format!("{e:?}"))))?;
loop {
match event_manager.run() {
Ok(_) => {}
Err(e) => {
error!("Error in EventManager loop: {e:?}");
return Err(Error::Runtime(RuntimeError::EventLoop(format!("{e:?}"))));
}
}
}
}
fn load_krunfw(&mut self) -> Result<()> {
let krunfw = load_krunfw_library(self.krunfw_path.as_deref())?;
let mut kernel_guest_addr: u64 = 0;
let mut kernel_entry_addr: u64 = 0;
let mut kernel_size: usize = 0;
let kernel_host_addr = unsafe {
(krunfw.get_kernel)(
&mut kernel_guest_addr as *mut u64,
&mut kernel_entry_addr as *mut u64,
&mut kernel_size as *mut usize,
)
};
let kernel_bundle = KernelBundle {
host_addr: kernel_host_addr as u64,
guest_addr: kernel_guest_addr,
entry_addr: kernel_entry_addr,
size: kernel_size,
};
self.vmr
.set_kernel_bundle(kernel_bundle)
.map_err(|e| Error::Build(BuildError::Krunfw(format!("{e:?}"))))?;
self._krunfw_library = Some(krunfw.library);
Ok(())
}
fn configure_vsock(&mut self) -> Result<()> {
use devices::virtio::TsiFlags;
let mut tsi_flags = TsiFlags::empty();
#[cfg(feature = "net")]
if self.vmr.net.list.is_empty() {
tsi_flags |= TsiFlags::HIJACK_INET;
}
#[cfg(not(feature = "net"))]
{
tsi_flags |= TsiFlags::HIJACK_INET;
}
#[cfg(not(feature = "tee"))]
{
tsi_flags = self.maybe_enable_hijack_unix(tsi_flags);
}
let vsock_config = VsockDeviceConfig {
vsock_id: "vsock0".to_string(),
guest_cid: 3,
host_port_map: None,
unix_ipc_port_map: None,
tsi_flags,
};
self.vmr
.set_vsock_device(vsock_config)
.map_err(|e| Error::Build(BuildError::DeviceRegistration(format!("vsock: {e:?}"))))?;
Ok(())
}
fn get_exec_path(&self) -> String {
self.exec_path
.as_ref()
.map(|p| format!("KRUN_INIT={p}"))
.unwrap_or_default()
}
fn get_workdir(&self) -> String {
self.workdir
.as_ref()
.map(|p| format!("KRUN_WORKDIR={p}"))
.unwrap_or_default()
}
fn get_rlimits(&self) -> String {
self.rlimits
.as_ref()
.map(|r| format!("KRUN_RLIMITS={r}"))
.unwrap_or_default()
}
fn get_env(&self) -> String {
self.env
.as_ref()
.map(|e| format!("KRUN_ENV={e}"))
.unwrap_or_default()
}
fn get_args(&self) -> String {
self.args.clone().unwrap_or_default()
}
fn build_kernel_cmdline(&self, boot_start_ns: u64) -> KernelCmdlineConfig {
let init = self.init_path.as_deref().unwrap_or(INIT_PATH);
let user_cmdline = self
.kernel_cmdline
.as_deref()
.map(|cmdline| format!(" {cmdline}"))
.unwrap_or_default();
KernelCmdlineConfig {
prolog: Some(format!(
"{}{} root=/dev/root init={init}",
vmm::vmm_config::kernel_cmdline::DEFAULT_KERNEL_CMDLINE,
user_cmdline,
)),
krun_env: Some(format!(
" {} {} {} {} KRUN_BOOT_START_NS={boot_start_ns}",
self.get_exec_path(),
self.get_workdir(),
self.get_rlimits(),
self.get_env(),
)),
epilog: Some(format!(" -- {}", self.get_args())),
}
}
#[cfg(not(feature = "tee"))]
fn maybe_enable_hijack_unix(
&self,
mut tsi_flags: devices::virtio::TsiFlags,
) -> devices::virtio::TsiFlags {
if cfg!(target_os = "macos") {
return tsi_flags;
}
if tsi_flags.contains(devices::virtio::TsiFlags::HIJACK_INET)
&& self.vmr.fs.len() == 1
&& self.vmr.fs[0].fs_id == "/dev/root"
{
tsi_flags |= devices::virtio::TsiFlags::HIJACK_UNIX;
}
tsi_flags
}
}
struct KrunfwBindings {
get_kernel: unsafe extern "C" fn(*mut u64, *mut u64, *mut usize) -> *mut std::ffi::c_char,
library: libloading::Library,
}
#[cfg(target_os = "linux")]
const KRUNFW_NAME: &str = "libkrunfw.so.5";
#[cfg(target_os = "macos")]
const KRUNFW_NAME: &str = "libkrunfw.5.dylib";
fn load_krunfw_library(path: Option<&std::path::Path>) -> Result<KrunfwBindings> {
let name = path
.map(|p| p.as_os_str().to_os_string())
.unwrap_or_else(|| std::ffi::OsString::from(KRUNFW_NAME));
let library = unsafe { libloading::Library::new(&name) }.map_err(|e| {
Error::Build(BuildError::Krunfw(format!(
"load {}: {e}",
name.to_string_lossy()
)))
})?;
let get_kernel = unsafe {
*library
.get::<unsafe extern "C" fn(*mut u64, *mut u64, *mut usize) -> *mut std::ffi::c_char>(
b"krunfw_get_kernel\0",
)
.map_err(|e| Error::Build(BuildError::Krunfw(format!("krunfw_get_kernel: {e}"))))?
};
Ok(KrunfwBindings {
get_kernel,
library,
})
}
#[cfg(test)]
mod tests {
use super::*;
use devices::virtio::TsiFlags;
#[cfg(not(feature = "tee"))]
use vmm::vmm_config::fs::FsDeviceConfig;
fn make_vm() -> Vm {
Vm::new(
VmResources::default(),
Some("debug loglevel=7".to_string()),
None,
Some("\"--flag\"".to_string()),
None,
None,
None,
None,
None,
)
}
#[test]
fn build_kernel_cmdline_keeps_user_cmdline() {
let vm = make_vm();
let cmdline = vm.build_kernel_cmdline(42);
let prolog = cmdline.prolog.expect("missing prolog");
assert!(prolog.contains("debug loglevel=7"));
assert!(prolog.contains("init=/init.krun"));
}
#[cfg(not(feature = "tee"))]
#[test]
fn maybe_enable_hijack_unix_respects_platform_support() {
let mut vm = make_vm();
vm.vmr.fs.push(FsDeviceConfig {
fs_id: "/dev/root".to_string(),
shared_dir: "/tmp/rootfs".to_string(),
shm_size: None,
allow_root_dir_delete: false,
});
let flags = vm.maybe_enable_hijack_unix(TsiFlags::HIJACK_INET);
#[cfg(target_os = "macos")]
assert!(!flags.contains(TsiFlags::HIJACK_UNIX));
#[cfg(not(target_os = "macos"))]
assert!(flags.contains(TsiFlags::HIJACK_UNIX));
}
#[cfg(all(not(feature = "tee"), not(target_os = "macos")))]
#[test]
fn maybe_enable_hijack_unix_requires_root_fs_id() {
let mut vm = make_vm();
vm.vmr.fs.push(FsDeviceConfig {
fs_id: "data".to_string(),
shared_dir: "/".to_string(),
shm_size: None,
allow_root_dir_delete: false,
});
let flags = vm.maybe_enable_hijack_unix(TsiFlags::HIJACK_INET);
assert!(!flags.contains(TsiFlags::HIJACK_UNIX));
}
}