use std::collections::HashMap;
use std::fs;
use std::io::{Read, Write};
use std::os::unix::net::UnixStream;
use std::path::{Path, PathBuf};
use std::process::{Child, Command, Stdio};
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use std::time::{Duration, Instant};
use crate::{
composer::FirecrackerComposer,
console::{ConsoleCapture, ConsoleConfig},
error::{VmRuntimeError, VmRuntimeResult},
jailer::{self, VmJail},
model::{
DriveSpec, NetworkInterface, RateLimiter, SnapshotRef, TokenBucket, VmSpec, VmStatus,
VmView, VsockSpec,
},
provider::{VmProvider, VmQuery},
shutdown::graceful_shutdown,
uffd::{UffdConfig, UffdHandler, snapshot_load_mem_backend_uffd},
};
const DEFAULT_FIRECRACKER_BIN: &str = "/usr/local/bin/firecracker";
const DEFAULT_KERNEL_PATH: &str = "/var/lib/firecracker/vmlinux";
const DEFAULT_ROOTFS_PATH: &str = "/var/lib/firecracker/rootfs/default.ext4";
const DEFAULT_BOOT_ARGS: &str =
"console=ttyS0 reboot=k panic=1 pci=off quiet i8042.nokbd i8042.noaux";
const DEFAULT_API_TIMEOUT_MS: u64 = 5_000;
const DEFAULT_SOCKET_READY_TIMEOUT_MS: u64 = 5_000;
const UFFD_SOCKET_BASENAME: &str = "uffd.sock";
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum MemBackend {
#[default]
File,
Uffd,
}
impl std::str::FromStr for MemBackend {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.trim().to_ascii_lowercase().as_str() {
"file" => Ok(Self::File),
"uffd" => Ok(Self::Uffd),
other => Err(format!(
"invalid memory backend '{other}' (expected 'file' or 'uffd')"
)),
}
}
}
fn mem_backend_from_env_value(value: Option<&str>) -> MemBackend {
match value {
None => MemBackend::default(),
Some(v) => v
.parse::<MemBackend>()
.unwrap_or_else(|e| panic!("MICROVM_MEM_BACKEND: {e}")),
}
}
fn read_http_response(stream: &mut UnixStream) -> std::io::Result<Vec<u8>> {
const MAX_HEADER_BYTES: usize = 64 * 1024;
let mut response: Vec<u8> = Vec::with_capacity(1024);
let mut chunk = [0u8; 4096];
let header_end = loop {
if let Some(pos) = find_subslice(&response, b"\r\n\r\n") {
break pos + 4;
}
if response.len() > MAX_HEADER_BYTES {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"response headers exceed 64 KiB without terminator",
));
}
let n = stream.read(&mut chunk)?;
if n == 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"connection closed before response headers completed",
));
}
response.extend_from_slice(&chunk[..n]);
};
let headers_text = String::from_utf8_lossy(&response[..header_end]);
if headers_text
.lines()
.any(|l| l.to_ascii_lowercase().starts_with("transfer-encoding:"))
{
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"transfer-encoded responses are not supported",
));
}
let content_length = headers_text
.lines()
.find_map(|l| {
let (name, value) = l.split_once(':')?;
name.trim()
.eq_ignore_ascii_case("content-length")
.then(|| value.trim().parse::<usize>())
})
.transpose()
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("invalid Content-Length: {e}"),
)
})?
.unwrap_or(0);
let total = header_end.checked_add(content_length).ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::InvalidData, "Content-Length overflow")
})?;
while response.len() < total {
let n = stream.read(&mut chunk)?;
if n == 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"connection closed mid-body",
));
}
response.extend_from_slice(&chunk[..n]);
}
response.truncate(total);
Ok(response)
}
fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack
.windows(needle.len())
.position(|window| window == needle)
}
#[derive(Debug, Clone)]
struct VmRecord {
status: VmStatus,
snapshots: Vec<String>,
socket_path: PathBuf,
state_dir: PathBuf,
}
impl VmRecord {
fn view(&self, vm_id: &str) -> VmView {
VmView {
vm_id: vm_id.to_owned(),
status: self.status,
snapshots: self.snapshots.clone(),
}
}
}
#[derive(Debug, Clone)]
pub struct FirecrackerConfig {
pub binary_path: PathBuf,
pub kernel_path: PathBuf,
pub rootfs_path: PathBuf,
pub boot_args: String,
pub socket_dir: PathBuf,
pub state_dir: PathBuf,
pub vcpu_count: u8,
pub mem_size_mib: u32,
pub rootfs_read_only: bool,
pub api_timeout: Duration,
pub socket_ready_timeout: Duration,
pub mem_backend: MemBackend,
}
impl FirecrackerConfig {
pub fn from_env() -> Self {
let binary_path = std::env::var("MICROVM_FIRECRACKER_BIN")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from(DEFAULT_FIRECRACKER_BIN));
let kernel_path = std::env::var("MICROVM_FIRECRACKER_KERNEL")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from(DEFAULT_KERNEL_PATH));
let rootfs_path = std::env::var("MICROVM_FIRECRACKER_ROOTFS")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from(DEFAULT_ROOTFS_PATH));
let boot_args = std::env::var("MICROVM_FIRECRACKER_BOOT_ARGS")
.unwrap_or_else(|_| DEFAULT_BOOT_ARGS.to_string());
let socket_dir = std::env::var("MICROVM_FIRECRACKER_SOCKET_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("/tmp/microvm-firecracker/sockets"));
let state_dir = std::env::var("MICROVM_FIRECRACKER_STATE_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("/tmp/microvm-firecracker/state"));
let vcpu_count = std::env::var("MICROVM_FIRECRACKER_VCPU_COUNT")
.ok()
.and_then(|v| v.parse::<u8>().ok())
.filter(|v| *v > 0)
.unwrap_or(2);
let mem_size_mib = std::env::var("MICROVM_FIRECRACKER_MEM_MIB")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.filter(|v| *v > 0)
.unwrap_or(1024);
let rootfs_read_only = std::env::var("MICROVM_FIRECRACKER_ROOTFS_RO")
.ok()
.map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True"))
.unwrap_or(true);
let api_timeout = Duration::from_millis(
std::env::var("MICROVM_FIRECRACKER_API_TIMEOUT_MS")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.filter(|v| *v > 0)
.unwrap_or(DEFAULT_API_TIMEOUT_MS),
);
let socket_ready_timeout = Duration::from_millis(
std::env::var("MICROVM_FIRECRACKER_SOCKET_READY_TIMEOUT_MS")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.filter(|v| *v > 0)
.unwrap_or(DEFAULT_SOCKET_READY_TIMEOUT_MS),
);
let mem_backend =
mem_backend_from_env_value(std::env::var("MICROVM_MEM_BACKEND").ok().as_deref());
Self {
binary_path,
kernel_path,
rootfs_path,
boot_args,
socket_dir,
state_dir,
vcpu_count,
mem_size_mib,
rootfs_read_only,
api_timeout,
socket_ready_timeout,
mem_backend,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct SnapshotArtifactPaths {
durable_vmstate: PathBuf,
durable_mem: PathBuf,
fc_vmstate: PathBuf,
fc_mem: PathBuf,
staged_vmstate: PathBuf,
staged_mem: PathBuf,
}
impl SnapshotArtifactPaths {
fn is_staged(&self) -> bool {
self.staged_vmstate != self.durable_vmstate
}
}
fn snapshot_artifact_paths(
snap_dir: &Path,
snapshot_id: &str,
chroot: Option<&Path>,
) -> VmRuntimeResult<SnapshotArtifactPaths> {
if snapshot_id.is_empty()
|| !snapshot_id
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.')
|| snapshot_id.contains("..")
{
return Err(VmRuntimeError::Unsupported(format!(
"snapshot id '{snapshot_id}' is not a safe filename \
(allowed: [A-Za-z0-9._-], no '..')"
)));
}
let vmstate_name = format!("{snapshot_id}.vmstate");
let mem_name = format!("{snapshot_id}.mem");
let durable_vmstate = snap_dir.join(&vmstate_name);
let durable_mem = snap_dir.join(&mem_name);
Ok(match chroot {
Some(chroot) => SnapshotArtifactPaths {
fc_vmstate: PathBuf::from("/").join(&vmstate_name),
fc_mem: PathBuf::from("/").join(&mem_name),
staged_vmstate: chroot.join(&vmstate_name),
staged_mem: chroot.join(&mem_name),
durable_vmstate,
durable_mem,
},
None => SnapshotArtifactPaths {
fc_vmstate: durable_vmstate.clone(),
fc_mem: durable_mem.clone(),
staged_vmstate: durable_vmstate.clone(),
staged_mem: durable_mem.clone(),
durable_vmstate,
durable_mem,
},
})
}
fn move_into_place(from: &Path, to: &Path) -> VmRuntimeResult<()> {
match fs::rename(from, to) {
Ok(()) => Ok(()),
Err(e) if e.raw_os_error() == Some(nix::errno::Errno::EXDEV as i32) => {
fs::copy(from, to).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed copying snapshot artifact {} -> {}: {e}",
from.display(),
to.display()
))
})?;
let _ = fs::remove_file(from);
Ok(())
}
Err(e) => Err(VmRuntimeError::Unsupported(format!(
"failed moving snapshot artifact {} -> {}: {e}",
from.display(),
to.display()
))),
}
}
fn build_snapshot_load_body(
snapshot: &SnapshotRef,
fc_vmstate: &Path,
mem_backend: serde_json::Value,
) -> serde_json::Value {
let mut body = serde_json::json!({
"snapshot_path": fc_vmstate,
"mem_backend": mem_backend,
"enable_diff_snapshots": false,
"resume_vm": snapshot.resume_immediately,
});
if !snapshot.network_overrides.is_empty() {
let overrides: Vec<_> = snapshot
.network_overrides
.iter()
.map(|iface| {
let mut entry = serde_json::json!({
"iface_id": iface.iface_id,
"host_dev_name": iface.host_dev_name,
});
if let Some(mac) = &iface.guest_mac {
entry["guest_mac"] = serde_json::Value::String(mac.clone());
}
entry
})
.collect();
body["network_interfaces"] = serde_json::Value::Array(overrides);
}
body
}
#[derive(Clone)]
pub struct FirecrackerVmProvider {
pub config: FirecrackerConfig,
composer: Option<Arc<FirecrackerComposer>>,
state: Arc<RwLock<HashMap<String, VmRecord>>>,
processes: Arc<Mutex<HashMap<String, Child>>>,
#[cfg(feature = "firecracker")]
consoles: Arc<Mutex<HashMap<String, ConsoleCapture>>>,
composed: Arc<Mutex<HashMap<String, ComposedAttachments>>>,
uffd_handlers: Arc<Mutex<HashMap<String, UffdHandler>>>,
}
#[derive(Default, Clone)]
struct ComposedAttachments {
network_attached: bool,
vsock_attached: bool,
firewall_installed: bool,
jail: Option<crate::jailer::VmJail>,
}
impl std::fmt::Debug for FirecrackerVmProvider {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FirecrackerVmProvider")
.field("config", &self.config)
.field("composer", &self.composer.is_some())
.finish_non_exhaustive()
}
}
impl FirecrackerVmProvider {
pub fn new(config: FirecrackerConfig) -> Self {
Self {
config,
composer: None,
state: Arc::new(RwLock::new(HashMap::new())),
processes: Arc::new(Mutex::new(HashMap::new())),
consoles: Arc::new(Mutex::new(HashMap::new())),
composed: Arc::new(Mutex::new(HashMap::new())),
uffd_handlers: Arc::new(Mutex::new(HashMap::new())),
}
}
pub fn from_env() -> Self {
Self::new(FirecrackerConfig::from_env())
}
pub fn with_composer(mut self, composer: FirecrackerComposer) -> Self {
self.composer = Some(Arc::new(composer));
self
}
pub fn from_env_composed() -> Self {
Self::from_env().with_composer(FirecrackerComposer::from_env())
}
pub fn api_socket_path(&self, vm_id: &str) -> PathBuf {
self.config
.socket_dir
.join(self.safe_vm_id(vm_id))
.join("api.sock")
}
pub fn vm_state_path(&self, vm_id: &str) -> PathBuf {
self.config.state_dir.join(self.safe_vm_id(vm_id))
}
fn safe_vm_id(&self, vm_id: &str) -> String {
vm_id
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
c
} else {
'_'
}
})
.collect()
}
fn ensure_prereqs(&self, spec: &VmSpec) -> VmRuntimeResult<()> {
if !self.config.binary_path.exists() {
return Err(VmRuntimeError::Unsupported(format!(
"firecracker binary not found: {}",
self.config.binary_path.display()
)));
}
if spec.restore_from.is_none() {
let kernel = spec.kernel.as_ref().unwrap_or(&self.config.kernel_path);
if !kernel.exists() {
return Err(VmRuntimeError::Unsupported(format!(
"kernel image not found: {}",
kernel.display()
)));
}
let rootfs = spec.rootfs.as_ref().unwrap_or(&self.config.rootfs_path);
if !rootfs.exists() {
return Err(VmRuntimeError::Unsupported(format!(
"rootfs image not found: {}",
rootfs.display()
)));
}
}
fs::create_dir_all(&self.config.socket_dir).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed to create socket dir {}: {e}",
self.config.socket_dir.display()
))
})?;
fs::create_dir_all(&self.config.state_dir).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed to create state dir {}: {e}",
self.config.state_dir.display()
))
})?;
Ok(())
}
fn remove_stale_socket(socket_path: &Path) -> VmRuntimeResult<()> {
if socket_path.exists() {
fs::remove_file(socket_path).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed to remove stale socket {}: {e}",
socket_path.display()
))
})?;
}
Ok(())
}
fn spawn_firecracker_for_compose(
&self,
vm_id: &str,
socket_path: &Path,
capture_stderr: bool,
jail: Option<&crate::jailer::VmJail>,
) -> VmRuntimeResult<Child> {
let parent = socket_path.parent().ok_or_else(|| {
VmRuntimeError::Unsupported(format!(
"invalid api socket path for vm {vm_id}: {}",
socket_path.display()
))
})?;
fs::create_dir_all(parent).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed to create socket parent {}: {e}",
parent.display()
))
})?;
Self::remove_stale_socket(socket_path)?;
let stderr = if capture_stderr {
Stdio::piped()
} else {
Stdio::null()
};
let mut command = match jail {
Some(j) => {
let jailer = self
.composer
.as_ref()
.and_then(|c| c.jailer.clone())
.ok_or_else(|| {
VmRuntimeError::Jailer(format!(
"spawn requested jailed mode for vm {vm_id} but no jailer is on the composer"
))
})?;
jailer.build_command(vm_id, j, &self.config.binary_path)?
}
None => {
let mut c = Command::new(&self.config.binary_path);
c.arg("--api-sock").arg(socket_path);
c
}
};
command
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(stderr)
.spawn()
.map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed spawning firecracker for {vm_id} ({}): {e}",
self.config.binary_path.display()
))
})
}
fn compose_pre_spawn(
&self,
vm_id: &str,
mut spec: VmSpec,
) -> VmRuntimeResult<(VmSpec, ComposedAttachments)> {
let Some(composer) = self.composer.clone() else {
return Ok((spec, ComposedAttachments::default()));
};
if spec.restore_from.is_some() {
let mut attachments = ComposedAttachments::default();
if let Some(jailer) = composer.jailer.as_ref() {
let kernel = spec
.kernel
.clone()
.unwrap_or_else(|| self.config.kernel_path.clone());
let rootfs = spec
.rootfs
.clone()
.unwrap_or_else(|| self.config.rootfs_path.clone());
let extra: Vec<PathBuf> = spec
.extra_drives
.iter()
.map(|d| d.path_on_host.clone())
.collect();
attachments.jail = Some(jailer.prepare(vm_id, &kernel, &rootfs, &extra)?);
}
return Ok((spec, attachments));
}
let mut attachments = ComposedAttachments::default();
if let Some(network) = composer.network.as_ref() {
network.ensure_host()?;
let vm_network = network.attach(vm_id)?;
let guest_mac = vm_network.mac_string();
spec.network_interfaces.push(NetworkInterface {
iface_id: "eth0".into(),
host_dev_name: vm_network.tap_name,
guest_mac: Some(guest_mac),
rx_rate_limiter: None,
tx_rate_limiter: None,
});
attachments.network_attached = true;
}
if let Some(vsock) = composer.vsock.as_ref() {
let attachment = vsock.attach(vm_id)?;
vsock.ensure_uds_parent(&attachment.uds_path)?;
spec.vsock = Some(VsockSpec {
cid: attachment.cid,
uds_path: attachment.uds_path,
});
attachments.vsock_attached = true;
}
if let Some(firewall) = composer.firewall.as_ref() {
if let Some(tap) = spec
.network_interfaces
.last()
.map(|i| i.host_dev_name.clone())
{
firewall.install(vm_id, &tap, &[])?;
attachments.firewall_installed = true;
}
}
if let Some(jailer) = composer.jailer.as_ref() {
let kernel = spec
.kernel
.clone()
.unwrap_or_else(|| self.config.kernel_path.clone());
let rootfs = spec
.rootfs
.clone()
.unwrap_or_else(|| self.config.rootfs_path.clone());
let extra: Vec<PathBuf> = spec
.extra_drives
.iter()
.map(|d| d.path_on_host.clone())
.collect();
let jail = jailer.prepare(vm_id, &kernel, &rootfs, &extra)?;
attachments.jail = Some(jail);
}
Ok((spec, attachments))
}
fn compose_release(&self, vm_id: &str, attachments: &ComposedAttachments) {
let Some(composer) = self.composer.clone() else {
return;
};
if attachments.firewall_installed
&& let Some(firewall) = composer.firewall.as_ref()
{
let _ = firewall.uninstall(vm_id);
}
if attachments.vsock_attached
&& let Some(vsock) = composer.vsock.as_ref()
{
let _ = vsock.detach(vm_id);
}
if attachments.network_attached
&& let Some(network) = composer.network.as_ref()
{
let _ = network.detach(vm_id);
}
if attachments.jail.is_some()
&& let Some(jailer) = composer.jailer.as_ref()
{
let _ = jailer.teardown(vm_id);
}
}
fn wait_for_socket_ready(&self, socket_path: &Path) -> VmRuntimeResult<()> {
let deadline = Instant::now() + self.config.socket_ready_timeout;
while Instant::now() < deadline {
if socket_path.exists()
&& self
.firecracker_request(socket_path, "GET", "/", None)
.is_ok()
{
return Ok(());
}
thread::sleep(Duration::from_millis(100));
}
Err(VmRuntimeError::Unsupported(format!(
"firecracker api socket not ready within {:?}: {}",
self.config.socket_ready_timeout,
socket_path.display()
)))
}
fn configure_vm(
&self,
socket_path: &Path,
spec: &VmSpec,
jail: Option<&VmJail>,
) -> VmRuntimeResult<()> {
let jailed = jail.is_some();
if jailed && spec.vsock.is_some() {
return Err(VmRuntimeError::Unsupported(
"vsock under the jailer is not yet supported: the UDS path cannot \
resolve both inside the chroot (for FC) and on the host (for \
clients); set MICROVM_COMPOSE_VSOCK=0 or run without the jailer"
.into(),
));
}
let vcpu_count = spec.vcpu_count.unwrap_or(self.config.vcpu_count);
let mem_size_mib = spec.mem_size_mib.unwrap_or(self.config.mem_size_mib);
let track_dirty_pages = spec.track_dirty_pages.unwrap_or(true);
let machine = serde_json::json!({
"vcpu_count": vcpu_count,
"mem_size_mib": mem_size_mib,
"smt": false,
"track_dirty_pages": track_dirty_pages
});
self.firecracker_request(socket_path, "PUT", "/machine-config", Some(machine))?;
let kernel_path: PathBuf = if jailed {
PathBuf::from("/").join(jailer::KERNEL_BASENAME)
} else {
spec.kernel
.clone()
.unwrap_or_else(|| self.config.kernel_path.clone())
};
let boot_args = spec.boot_args.as_deref().unwrap_or(&self.config.boot_args);
let boot = serde_json::json!({
"kernel_image_path": kernel_path,
"boot_args": boot_args
});
self.firecracker_request(socket_path, "PUT", "/boot-source", Some(boot))?;
let rootfs_path: PathBuf = if jailed {
PathBuf::from("/").join(jailer::ROOTFS_BASENAME)
} else {
spec.rootfs
.clone()
.unwrap_or_else(|| self.config.rootfs_path.clone())
};
let rootfs_read_only = spec
.rootfs_read_only
.unwrap_or(self.config.rootfs_read_only);
let mut root_drive = serde_json::json!({
"drive_id": "rootfs",
"path_on_host": rootfs_path,
"is_root_device": true,
"is_read_only": rootfs_read_only
});
if let Some(limiter) = spec.rootfs_rate_limit.as_ref() {
root_drive["rate_limiter"] = rate_limiter_to_json(limiter);
}
self.firecracker_request(socket_path, "PUT", "/drives/rootfs", Some(root_drive))?;
for iface in &spec.network_interfaces {
self.put_network_interface(socket_path, iface)?;
}
for (idx, drive) in spec.extra_drives.iter().enumerate() {
if jailed {
let staged = DriveSpec {
path_on_host: PathBuf::from("/")
.join(jailer::staged_drive_basename(&drive.path_on_host, idx)),
..drive.clone()
};
self.put_extra_drive(socket_path, &staged)?;
} else {
self.put_extra_drive(socket_path, drive)?;
}
}
if let Some(vsock) = spec.vsock.as_ref() {
self.put_vsock(socket_path, vsock)?;
}
Ok(())
}
fn put_extra_drive(&self, socket_path: &Path, drive: &DriveSpec) -> VmRuntimeResult<()> {
if drive.drive_id == "rootfs" {
return Err(VmRuntimeError::Unsupported(
"drive_id 'rootfs' is reserved for the root device".into(),
));
}
let mut body = serde_json::json!({
"drive_id": drive.drive_id,
"path_on_host": drive.path_on_host,
"is_root_device": false,
"is_read_only": drive.is_read_only,
});
if let Some(limiter) = drive.rate_limiter.as_ref() {
body["rate_limiter"] = rate_limiter_to_json(limiter);
}
let path = format!("/drives/{}", drive.drive_id);
self.firecracker_request(socket_path, "PUT", &path, Some(body))?;
Ok(())
}
fn put_vsock(&self, socket_path: &Path, vsock: &VsockSpec) -> VmRuntimeResult<()> {
let body = serde_json::json!({
"guest_cid": vsock.cid,
"uds_path": vsock.uds_path,
});
self.firecracker_request(socket_path, "PUT", "/vsock", Some(body))?;
Ok(())
}
fn put_network_interface(
&self,
socket_path: &Path,
iface: &NetworkInterface,
) -> VmRuntimeResult<()> {
let mut body = serde_json::json!({
"iface_id": iface.iface_id,
"host_dev_name": iface.host_dev_name,
});
if let Some(mac) = &iface.guest_mac {
body["guest_mac"] = serde_json::Value::String(mac.clone());
}
if let Some(rx) = &iface.rx_rate_limiter {
body["rx_rate_limiter"] = rate_limiter_to_json(rx);
}
if let Some(tx) = &iface.tx_rate_limiter {
body["tx_rate_limiter"] = rate_limiter_to_json(tx);
}
let path = format!("/network-interfaces/{}", iface.iface_id);
self.firecracker_request(socket_path, "PUT", &path, Some(body))?;
Ok(())
}
fn jailer_identity(&self) -> VmRuntimeResult<(u32, u32)> {
let jailer = self
.composer
.as_ref()
.and_then(|c| c.jailer.clone())
.ok_or_else(|| {
VmRuntimeError::Jailer(
"vm has a jail but no jailer is composed on the provider".into(),
)
})?;
Ok((jailer.config().uid, jailer.config().gid))
}
fn load_snapshot(
&self,
socket_path: &Path,
snapshot: &SnapshotRef,
target_vm_id: &str,
jail: Option<&VmJail>,
) -> VmRuntimeResult<Option<UffdHandler>> {
let snap_dir = self.vm_state_path(&snapshot.vm_id).join("snapshots");
let paths = snapshot_artifact_paths(
&snap_dir,
&snapshot.snapshot_id,
jail.map(|j| j.chroot_path.as_path()),
)?;
if !paths.durable_vmstate.exists() || !paths.durable_mem.exists() {
return Err(VmRuntimeError::SnapshotNotFound {
vm_id: snapshot.vm_id.clone(),
snapshot_id: snapshot.snapshot_id.clone(),
});
}
if jail.is_some() {
let (uid, gid) = self.jailer_identity()?;
jailer::stage_chroot_file(&paths.durable_vmstate, &paths.staged_vmstate, uid, gid)?;
if self.config.mem_backend == MemBackend::File {
jailer::stage_chroot_file(&paths.durable_mem, &paths.staged_mem, uid, gid)?;
}
}
let (mem_backend, uffd_handler) = match self.config.mem_backend {
MemBackend::File => (
serde_json::json!({
"backend_type": "File",
"backend_path": paths.fc_mem,
}),
None,
),
MemBackend::Uffd => {
let (host_socket, fc_socket) = match jail {
Some(jail) => (
jail.chroot_path.join(UFFD_SOCKET_BASENAME),
PathBuf::from("/").join(UFFD_SOCKET_BASENAME),
),
None => {
let path = self.vm_state_path(target_vm_id).join(UFFD_SOCKET_BASENAME);
(path.clone(), path)
}
};
let handler = UffdHandler::start(UffdConfig {
socket_path: host_socket.clone(),
mem_file_path: paths.durable_mem.clone(),
})?;
if jail.is_some() {
let (uid, gid) = self.jailer_identity()?;
if let Err(err) = nix::unistd::chown(
&host_socket,
Some(nix::unistd::Uid::from_raw(uid)),
Some(nix::unistd::Gid::from_raw(gid)),
) {
eprintln!(
"[microvm-uffd] chown {} to {uid}:{gid} failed ({err}); \
the jailed firecracker may be unable to connect",
host_socket.display()
);
}
}
(snapshot_load_mem_backend_uffd(&fc_socket), Some(handler))
}
};
let body = build_snapshot_load_body(snapshot, &paths.fc_vmstate, mem_backend);
match self.firecracker_request(socket_path, "PUT", "/snapshot/load", Some(body)) {
Err(err) => Err(err),
Ok(_) => Ok(uffd_handler),
}
}
fn firecracker_request(
&self,
socket_path: &Path,
method: &str,
endpoint: &str,
body: Option<serde_json::Value>,
) -> VmRuntimeResult<Option<serde_json::Value>> {
let mut stream = UnixStream::connect(socket_path).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed connecting to firecracker socket {}: {e}",
socket_path.display()
))
})?;
stream
.set_read_timeout(Some(self.config.api_timeout))
.map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed setting read timeout on {}: {e}",
socket_path.display()
))
})?;
stream
.set_write_timeout(Some(self.config.api_timeout))
.map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed setting write timeout on {}: {e}",
socket_path.display()
))
})?;
let body_str = body.map(|v| v.to_string()).unwrap_or_default();
let has_body = !body_str.is_empty();
let request = if has_body {
format!(
"{method} {endpoint} HTTP/1.1\r\nHost: localhost\r\nAccept: application/json\r\nContent-Type: application/json\r\nConnection: close\r\nContent-Length: {}\r\n\r\n{}",
body_str.len(),
body_str
)
} else {
format!(
"{method} {endpoint} HTTP/1.1\r\nHost: localhost\r\nAccept: application/json\r\nConnection: close\r\n\r\n"
)
};
stream.write_all(request.as_bytes()).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed writing firecracker request {method} {endpoint}: {e}"
))
})?;
stream.flush().map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed flushing firecracker request {method} {endpoint}: {e}"
))
})?;
let response = read_http_response(&mut stream).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed reading firecracker response {method} {endpoint}: {e}"
))
})?;
let response_text = String::from_utf8_lossy(&response);
let (headers, body) = response_text.split_once("\r\n\r\n").unwrap_or_default();
let status_line = headers.lines().next().unwrap_or_default();
let status_code = status_line
.split_whitespace()
.nth(1)
.and_then(|code| code.parse::<u16>().ok())
.unwrap_or(0);
if !(200..300).contains(&status_code) {
return Err(VmRuntimeError::Unsupported(format!(
"firecracker api error {method} {endpoint}: status={status_code}, body={body}"
)));
}
if body.trim().is_empty() {
return Ok(None);
}
let json = serde_json::from_str::<serde_json::Value>(body).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed parsing firecracker response JSON for {method} {endpoint}: {e}"
))
})?;
Ok(Some(json))
}
fn action_instance_start(&self, socket_path: &Path) -> VmRuntimeResult<()> {
self.firecracker_request(
socket_path,
"PUT",
"/actions",
Some(serde_json::json!({ "action_type": "InstanceStart" })),
)?;
Ok(())
}
fn action_pause(&self, socket_path: &Path) -> VmRuntimeResult<()> {
self.firecracker_request(
socket_path,
"PATCH",
"/vm",
Some(serde_json::json!({ "state": "Paused" })),
)?;
Ok(())
}
fn action_resume(&self, socket_path: &Path) -> VmRuntimeResult<()> {
self.firecracker_request(
socket_path,
"PATCH",
"/vm",
Some(serde_json::json!({ "state": "Resumed" })),
)?;
Ok(())
}
fn create_snapshot(
&self,
socket_path: &Path,
state_dir: &Path,
snapshot_id: &str,
jail: Option<&VmJail>,
) -> VmRuntimeResult<()> {
let snap_dir = state_dir.join("snapshots");
fs::create_dir_all(&snap_dir).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"failed creating snapshot dir {}: {e}",
snap_dir.display()
))
})?;
let paths = snapshot_artifact_paths(
&snap_dir,
snapshot_id,
jail.map(|j| j.chroot_path.as_path()),
)?;
let result = self
.firecracker_request(
socket_path,
"PUT",
"/snapshot/create",
Some(serde_json::json!({
"snapshot_type": "Full",
"snapshot_path": paths.fc_vmstate,
"mem_file_path": paths.fc_mem
})),
)
.and_then(|_| {
if paths.is_staged() {
move_into_place(&paths.staged_vmstate, &paths.durable_vmstate)?;
move_into_place(&paths.staged_mem, &paths.durable_mem)?;
}
Ok(())
});
if result.is_err() {
let _ = fs::remove_file(&paths.durable_vmstate);
let _ = fs::remove_file(&paths.durable_mem);
if paths.is_staged() {
let _ = fs::remove_file(&paths.staged_vmstate);
let _ = fs::remove_file(&paths.staged_mem);
}
}
result
}
fn create_vm_inner(&self, vm_id: &str, spec: &VmSpec) -> VmRuntimeResult<()> {
self.ensure_prereqs(spec)?;
{
let state = self
.state
.read()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
if state.contains_key(vm_id) {
return Err(VmRuntimeError::VmAlreadyExists(vm_id.to_owned()));
}
}
let (effective_spec, attachments) = self.compose_pre_spawn(vm_id, spec.clone())?;
let socket_path = attachments
.jail
.as_ref()
.map(|j| j.api_socket_on_host.clone())
.unwrap_or_else(|| self.api_socket_path(vm_id));
let state_dir = self.vm_state_path(vm_id);
fs::create_dir_all(&state_dir).map_err(|e| {
self.compose_release(vm_id, &attachments);
VmRuntimeError::Unsupported(format!(
"failed creating vm state dir {}: {e}",
state_dir.display()
))
})?;
let capture_stderr = self
.composer
.as_ref()
.map(|c| c.capture_console)
.unwrap_or(false);
let mut child = match self.spawn_firecracker_for_compose(
vm_id,
&socket_path,
capture_stderr,
attachments.jail.as_ref(),
) {
Ok(c) => c,
Err(e) => {
self.compose_release(vm_id, &attachments);
return Err(e);
}
};
let restoring = effective_spec.restore_from.is_some();
let mut uffd_handler: Option<UffdHandler> = None;
let configure_result = (|| -> VmRuntimeResult<()> {
self.wait_for_socket_ready(&socket_path)?;
if let Some(snapshot) = effective_spec.restore_from.as_ref() {
uffd_handler =
self.load_snapshot(&socket_path, snapshot, vm_id, attachments.jail.as_ref())?;
} else {
self.configure_vm(&socket_path, &effective_spec, attachments.jail.as_ref())?;
}
Ok(())
})();
if let Err(err) = configure_result {
let _ = child.kill();
let _ = child.wait();
self.compose_release(vm_id, &attachments);
return Err(err);
}
if capture_stderr && let Some(stderr) = child.stderr.take() {
let capture = ConsoleCapture::attach(stderr, ConsoleConfig::default());
if let Ok(mut consoles) = self.consoles.lock() {
consoles.insert(vm_id.to_owned(), capture);
}
}
self.processes
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.insert(vm_id.to_owned(), child);
if let Some(handler) = uffd_handler {
self.uffd_handlers
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.insert(vm_id.to_owned(), handler);
}
if attachments.network_attached
|| attachments.vsock_attached
|| attachments.firewall_installed
|| attachments.jail.is_some()
{
self.composed
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.insert(vm_id.to_owned(), attachments);
}
let initial_status = match (restoring, spec.restore_from.as_ref()) {
(true, Some(snap)) if snap.resume_immediately => VmStatus::Running,
(true, _) => VmStatus::Stopped,
(false, _) => VmStatus::Created,
};
self.state
.write()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.insert(
vm_id.to_owned(),
VmRecord {
status: initial_status,
snapshots: Vec::new(),
socket_path,
state_dir,
},
);
Ok(())
}
fn kill_process(&self, vm_id: &str) -> VmRuntimeResult<()> {
let child = self
.processes
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.remove(vm_id);
let use_graceful = self
.composer
.as_ref()
.map(|c| c.graceful_shutdown)
.unwrap_or(false);
if let Some(mut child) = child {
if use_graceful && let Some(composer) = self.composer.as_ref() {
let _ = graceful_shutdown(&mut child, &composer.shutdown_config);
} else {
let _ = child.kill();
let _ = child.wait();
}
}
if let Ok(mut consoles) = self.consoles.lock() {
consoles.remove(vm_id);
}
if let Ok(mut handlers) = self.uffd_handlers.lock() {
handlers.remove(vm_id);
}
let attachments = self
.composed
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.remove(vm_id);
if let Some(a) = attachments {
self.compose_release(vm_id, &a);
}
Ok(())
}
pub fn console_tail(&self, vm_id: &str) -> Option<Vec<String>> {
self.consoles
.lock()
.ok()
.and_then(|c| c.get(vm_id).map(|cap| cap.tail()))
}
}
impl VmProvider for FirecrackerVmProvider {
fn create_vm(&self, vm_id: &str) -> VmRuntimeResult<()> {
self.create_vm_inner(vm_id, &VmSpec::default())
}
fn create_vm_with_spec(&self, vm_id: &str, spec: &VmSpec) -> VmRuntimeResult<()> {
self.create_vm_inner(vm_id, spec)
}
fn start_vm(&self, vm_id: &str) -> VmRuntimeResult<()> {
let mut state = self
.state
.write()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
let record = state
.get_mut(vm_id)
.ok_or_else(|| VmRuntimeError::VmNotFound(vm_id.to_owned()))?;
match record.status {
VmStatus::Created => {
self.action_instance_start(&record.socket_path)?;
record.status = VmStatus::Running;
Ok(())
}
VmStatus::Stopped => {
self.action_resume(&record.socket_path)?;
record.status = VmStatus::Running;
Ok(())
}
other => Err(VmRuntimeError::InvalidTransition {
vm_id: vm_id.to_owned(),
from: other.to_string(),
to: "running",
}),
}
}
fn stop_vm(&self, vm_id: &str) -> VmRuntimeResult<()> {
let mut state = self
.state
.write()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
let record = state
.get_mut(vm_id)
.ok_or_else(|| VmRuntimeError::VmNotFound(vm_id.to_owned()))?;
match record.status {
VmStatus::Running => {
self.action_pause(&record.socket_path)?;
record.status = VmStatus::Stopped;
Ok(())
}
other => Err(VmRuntimeError::InvalidTransition {
vm_id: vm_id.to_owned(),
from: other.to_string(),
to: "stopped",
}),
}
}
fn snapshot_vm(&self, vm_id: &str, snapshot_id: &str) -> VmRuntimeResult<()> {
let mut state = self
.state
.write()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
let record = state
.get_mut(vm_id)
.ok_or_else(|| VmRuntimeError::VmNotFound(vm_id.to_owned()))?;
if record.status == VmStatus::Destroyed {
return Err(VmRuntimeError::InvalidTransition {
vm_id: vm_id.to_owned(),
from: VmStatus::Destroyed.to_string(),
to: "snapshot",
});
}
if record
.snapshots
.iter()
.any(|existing| existing == snapshot_id)
{
return Err(VmRuntimeError::SnapshotAlreadyExists {
vm_id: vm_id.to_owned(),
snapshot_id: snapshot_id.to_owned(),
});
}
let jail = self
.composed
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?
.get(vm_id)
.and_then(|a| a.jail.clone());
self.create_snapshot(
&record.socket_path,
&record.state_dir,
snapshot_id,
jail.as_ref(),
)?;
record.snapshots.push(snapshot_id.to_owned());
Ok(())
}
fn destroy_vm(&self, vm_id: &str) -> VmRuntimeResult<()> {
let mut state = self
.state
.write()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
let record = state
.get_mut(vm_id)
.ok_or_else(|| VmRuntimeError::VmNotFound(vm_id.to_owned()))?;
if record.status == VmStatus::Destroyed {
return Err(VmRuntimeError::InvalidTransition {
vm_id: vm_id.to_owned(),
from: VmStatus::Destroyed.to_string(),
to: "destroyed",
});
}
self.kill_process(vm_id)?;
let _ = fs::remove_file(&record.socket_path);
if let Some(parent) = record.socket_path.parent() {
let _ = fs::remove_dir_all(parent);
}
let _ = fs::remove_dir_all(&record.state_dir);
record.status = VmStatus::Destroyed;
Ok(())
}
fn rename_vm(&self, old_vm_id: &str, new_vm_id: &str) -> VmRuntimeResult<()> {
if old_vm_id == new_vm_id {
return Ok(());
}
let mut state = self
.state
.write()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
if state.contains_key(new_vm_id) {
return Err(VmRuntimeError::VmAlreadyExists(new_vm_id.to_owned()));
}
let record = state
.get(old_vm_id)
.ok_or_else(|| VmRuntimeError::VmNotFound(old_vm_id.to_owned()))?;
if record.status == VmStatus::Destroyed {
return Err(VmRuntimeError::InvalidTransition {
vm_id: old_vm_id.to_owned(),
from: VmStatus::Destroyed.to_string(),
to: "renamed",
});
}
let mut composed = self
.composed
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
let jail = match composed.get(old_vm_id) {
Some(a) if a.network_attached || a.vsock_attached || a.firewall_installed => {
return Err(VmRuntimeError::Unsupported(format!(
"rename_vm('{old_vm_id}' -> '{new_vm_id}'): composed network/vsock/\
firewall attachments are keyed by vm id in their host managers and \
cannot be re-keyed; warm-pool restores never compose these"
)));
}
Some(a) => a.jail.clone(),
None => None,
};
let old_state_dir = record.state_dir.clone();
let new_state_dir = self.vm_state_path(new_vm_id);
let socket_name = record.socket_path.file_name().ok_or_else(|| {
VmRuntimeError::Unsupported(format!(
"invalid api socket path for vm {old_vm_id}: {}",
record.socket_path.display()
))
})?;
let (old_holder, new_holder, new_socket_path, new_jail) = match jail.as_ref() {
Some(jail) => {
let old_vm_dir = jail.chroot_path.parent().ok_or_else(|| {
VmRuntimeError::Jailer(format!(
"jail chroot {} has no parent vm dir",
jail.chroot_path.display()
))
})?;
let new_vm_dir = old_vm_dir
.parent()
.ok_or_else(|| {
VmRuntimeError::Jailer(format!(
"jail vm dir {} has no firecracker base dir",
old_vm_dir.display()
))
})?
.join(jailer::safe_vm_id(new_vm_id));
let new_chroot = new_vm_dir.join("root");
let new_socket = new_chroot.join(socket_name);
let renamed_jail = VmJail {
chroot_path: new_chroot,
api_socket_in_chroot: jail.api_socket_in_chroot.clone(),
api_socket_on_host: new_socket.clone(),
};
(
old_vm_dir.to_path_buf(),
new_vm_dir,
new_socket,
Some(renamed_jail),
)
}
None => {
let old_socket_dir = self.config.socket_dir.join(self.safe_vm_id(old_vm_id));
let new_socket_dir = self.config.socket_dir.join(self.safe_vm_id(new_vm_id));
let new_socket = new_socket_dir.join(socket_name);
(old_socket_dir, new_socket_dir, new_socket, None)
}
};
fs::rename(&old_state_dir, &new_state_dir).map_err(|e| {
VmRuntimeError::Unsupported(format!(
"rename_vm: failed moving state dir {} -> {}: {e}",
old_state_dir.display(),
new_state_dir.display()
))
})?;
if let Err(e) = fs::rename(&old_holder, &new_holder) {
let rollback = fs::rename(&new_state_dir, &old_state_dir);
return Err(VmRuntimeError::Unsupported(format!(
"rename_vm: failed moving {} -> {}: {e} (state dir rollback: {})",
old_holder.display(),
new_holder.display(),
match rollback {
Ok(()) => "ok".to_owned(),
Err(re) => format!("FAILED: {re}"),
}
)));
}
let mut record = state
.remove(old_vm_id)
.expect("checked above while holding the state write lock");
record.state_dir = new_state_dir;
record.socket_path = new_socket_path;
state.insert(new_vm_id.to_owned(), record);
if let Some(mut attachments) = composed.remove(old_vm_id) {
attachments.jail = new_jail;
composed.insert(new_vm_id.to_owned(), attachments);
}
{
let mut processes = self
.processes
.lock()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
if let Some(child) = processes.remove(old_vm_id) {
processes.insert(new_vm_id.to_owned(), child);
}
}
if let Ok(mut consoles) = self.consoles.lock()
&& let Some(capture) = consoles.remove(old_vm_id)
{
consoles.insert(new_vm_id.to_owned(), capture);
}
if let Ok(mut handlers) = self.uffd_handlers.lock()
&& let Some(handler) = handlers.remove(old_vm_id)
{
handlers.insert(new_vm_id.to_owned(), handler);
}
Ok(())
}
}
impl VmQuery for FirecrackerVmProvider {
fn list_vms(&self) -> VmRuntimeResult<Vec<VmView>> {
let state = self
.state
.read()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
let mut views = state
.iter()
.map(|(vm_id, record)| record.view(vm_id))
.collect::<Vec<_>>();
views.sort_by(|a, b| a.vm_id.cmp(&b.vm_id));
Ok(views)
}
fn get_vm(&self, vm_id: &str) -> VmRuntimeResult<Option<VmView>> {
let state = self
.state
.read()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
Ok(state.get(vm_id).map(|record| record.view(vm_id)))
}
fn list_snapshots(&self, vm_id: &str) -> VmRuntimeResult<Option<Vec<String>>> {
let state = self
.state
.read()
.map_err(|_| VmRuntimeError::StatePoisoned)?;
Ok(state.get(vm_id).map(|record| record.snapshots.clone()))
}
}
fn rate_limiter_to_json(limiter: &RateLimiter) -> serde_json::Value {
let mut obj = serde_json::Map::new();
if let Some(bw) = &limiter.bandwidth {
obj.insert("bandwidth".into(), token_bucket_to_json(bw));
}
if let Some(ops) = &limiter.ops {
obj.insert("ops".into(), token_bucket_to_json(ops));
}
serde_json::Value::Object(obj)
}
fn token_bucket_to_json(bucket: &TokenBucket) -> serde_json::Value {
serde_json::json!({
"size": bucket.size,
"one_time_burst": bucket.one_time_burst.unwrap_or(bucket.size),
"refill_time": bucket.refill_time_ms,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::composer::FirecrackerComposer;
use crate::jailer::{Jailer, JailerConfig};
use crate::model::{RateLimiter, TokenBucket};
#[test]
fn read_http_response_returns_without_server_close() {
let (mut client, mut server) = UnixStream::pair().expect("socketpair");
let response =
b"HTTP/1.1 200 \r\nServer: Firecracker API\r\nConnection: keep-alive\r\nContent-Type: application/json\r\nContent-Length: 13\r\n\r\n{\"state\":\"a\"}";
server.write_all(response).expect("write response");
server.flush().expect("flush");
client
.set_read_timeout(Some(Duration::from_secs(2)))
.expect("timeout");
let got = read_http_response(&mut client).expect("framed read completes without EOF");
assert_eq!(got, response.to_vec());
drop(server);
}
#[test]
fn read_http_response_handles_missing_content_length() {
let (mut client, mut server) = UnixStream::pair().expect("socketpair");
server
.write_all(
b"HTTP/1.1 204 \r\nServer: Firecracker API\r\nConnection: keep-alive\r\n\r\n",
)
.expect("write response");
client
.set_read_timeout(Some(Duration::from_secs(2)))
.expect("timeout");
let got = read_http_response(&mut client).expect("headers-only response");
assert!(got.ends_with(b"\r\n\r\n"));
drop(server);
}
fn test_config(root: &Path) -> FirecrackerConfig {
FirecrackerConfig {
binary_path: PathBuf::from("/usr/local/bin/firecracker"),
kernel_path: root.join("vmlinux"),
rootfs_path: root.join("rootfs.ext4"),
boot_args: DEFAULT_BOOT_ARGS.to_string(),
socket_dir: root.join("sockets"),
state_dir: root.join("state"),
vcpu_count: 1,
mem_size_mib: 128,
rootfs_read_only: true,
api_timeout: Duration::from_millis(200),
socket_ready_timeout: Duration::from_millis(200),
mem_backend: MemBackend::File,
}
}
fn seeded_provider(root: &Path, vm_id: &str, status: VmStatus) -> FirecrackerVmProvider {
let provider = FirecrackerVmProvider::new(test_config(root));
let state_dir = provider.vm_state_path(vm_id);
let snap_dir = state_dir.join("snapshots");
fs::create_dir_all(&snap_dir).unwrap();
fs::write(snap_dir.join("warm.vmstate"), b"vmstate").unwrap();
fs::write(snap_dir.join("warm.mem"), b"guest memory").unwrap();
let socket_dir = provider.config.socket_dir.join(provider.safe_vm_id(vm_id));
fs::create_dir_all(&socket_dir).unwrap();
let socket_path = socket_dir.join("api.sock");
fs::write(&socket_path, b"").unwrap();
provider.state.write().unwrap().insert(
vm_id.to_owned(),
VmRecord {
status,
snapshots: vec!["warm".to_owned()],
socket_path,
state_dir,
},
);
provider
}
fn same_inode(a: &Path, b: &Path) -> bool {
use std::os::unix::fs::MetadataExt;
let (ma, mb) = (fs::metadata(a).unwrap(), fs::metadata(b).unwrap());
ma.dev() == mb.dev() && ma.ino() == mb.ino()
}
#[test]
fn mem_backend_parses_file_and_uffd_case_insensitive() {
assert_eq!("file".parse::<MemBackend>().unwrap(), MemBackend::File);
assert_eq!("File".parse::<MemBackend>().unwrap(), MemBackend::File);
assert_eq!("uffd".parse::<MemBackend>().unwrap(), MemBackend::Uffd);
assert_eq!(" UFFD ".parse::<MemBackend>().unwrap(), MemBackend::Uffd);
assert!("mmap".parse::<MemBackend>().is_err());
}
#[test]
fn mem_backend_env_absent_defaults_to_file() {
assert_eq!(mem_backend_from_env_value(None), MemBackend::File);
assert_eq!(mem_backend_from_env_value(Some("uffd")), MemBackend::Uffd);
}
#[test]
#[should_panic(expected = "MICROVM_MEM_BACKEND")]
fn mem_backend_env_invalid_value_fails_loud() {
mem_backend_from_env_value(Some("filee"));
}
#[test]
fn snapshot_paths_non_jailed_coincide_with_durable() {
let snap_dir = Path::new("/var/state/vm-1/snapshots");
let p = snapshot_artifact_paths(snap_dir, "warm", None).unwrap();
assert_eq!(p.durable_vmstate, snap_dir.join("warm.vmstate"));
assert_eq!(p.durable_mem, snap_dir.join("warm.mem"));
assert_eq!(p.fc_vmstate, p.durable_vmstate);
assert_eq!(p.fc_mem, p.durable_mem);
assert_eq!(p.staged_vmstate, p.durable_vmstate);
assert!(!p.is_staged());
}
#[test]
fn snapshot_paths_jailed_reference_chroot_relative() {
let snap_dir = Path::new("/var/state/vm-1/snapshots");
let chroot = Path::new("/srv/jailer/firecracker/vm-1/root");
let p = snapshot_artifact_paths(snap_dir, "warm", Some(chroot)).unwrap();
assert_eq!(p.fc_vmstate, PathBuf::from("/warm.vmstate"));
assert_eq!(p.fc_mem, PathBuf::from("/warm.mem"));
assert_eq!(p.staged_vmstate, chroot.join("warm.vmstate"));
assert_eq!(p.staged_mem, chroot.join("warm.mem"));
assert_eq!(p.durable_vmstate, snap_dir.join("warm.vmstate"));
assert!(p.is_staged());
}
#[test]
fn snapshot_paths_reject_unsafe_ids() {
let snap_dir = Path::new("/var/state/vm-1/snapshots");
for bad in ["../warm", "a/b", "", "a..b", "wa rm", "warm\0"] {
let err = snapshot_artifact_paths(snap_dir, bad, None).unwrap_err();
assert!(
matches!(err, VmRuntimeError::Unsupported(_)),
"id {bad:?} must be rejected"
);
}
assert!(snapshot_artifact_paths(snap_dir, "ok-1.v2_x", None).is_ok());
}
#[test]
fn move_into_place_renames_within_same_fs() {
let tmp = tempfile::tempdir().unwrap();
let from = tmp.path().join("staged.mem");
let to = tmp.path().join("durable.mem");
fs::write(&from, b"pages").unwrap();
move_into_place(&from, &to).unwrap();
assert!(!from.exists());
assert_eq!(fs::read(&to).unwrap(), b"pages");
}
fn snapshot_ref(resume: bool, overrides: Vec<NetworkInterface>) -> SnapshotRef {
SnapshotRef {
vm_id: "vm-src".into(),
snapshot_id: "warm".into(),
resume_immediately: resume,
network_overrides: overrides,
}
}
#[test]
fn load_body_file_backend_shape() {
let body = build_snapshot_load_body(
&snapshot_ref(true, vec![]),
Path::new("/warm.vmstate"),
serde_json::json!({ "backend_type": "File", "backend_path": "/warm.mem" }),
);
assert_eq!(body["snapshot_path"], "/warm.vmstate");
assert_eq!(body["mem_backend"]["backend_type"], "File");
assert_eq!(body["mem_backend"]["backend_path"], "/warm.mem");
assert_eq!(body["resume_vm"], true);
assert_eq!(body["enable_diff_snapshots"], false);
assert!(body.get("network_interfaces").is_none());
}
#[test]
fn load_body_uffd_backend_shape() {
let body = build_snapshot_load_body(
&snapshot_ref(false, vec![]),
Path::new("/warm.vmstate"),
crate::uffd::snapshot_load_mem_backend_uffd(Path::new("/uffd.sock")),
);
assert_eq!(body["mem_backend"]["backend_type"], "Uffd");
assert_eq!(body["mem_backend"]["backend_path"], "/uffd.sock");
assert_eq!(body["resume_vm"], false);
}
#[test]
fn load_body_includes_network_overrides() {
let overrides = vec![NetworkInterface {
iface_id: "eth0".into(),
host_dev_name: "tap-new".into(),
guest_mac: Some("AA:BB:CC:DD:EE:FF".into()),
rx_rate_limiter: None,
tx_rate_limiter: None,
}];
let body = build_snapshot_load_body(
&snapshot_ref(true, overrides),
Path::new("/warm.vmstate"),
serde_json::json!({ "backend_type": "File", "backend_path": "/warm.mem" }),
);
let ifaces = body["network_interfaces"].as_array().unwrap();
assert_eq!(ifaces.len(), 1);
assert_eq!(ifaces[0]["host_dev_name"], "tap-new");
assert_eq!(ifaces[0]["guest_mac"], "AA:BB:CC:DD:EE:FF");
}
#[test]
fn jailed_restore_stages_snapshot_into_chroot() {
let tmp = tempfile::tempdir().unwrap();
let provider = FirecrackerVmProvider::new(test_config(tmp.path())).with_composer(
FirecrackerComposer {
jailer: Some(Arc::new(Jailer::new(JailerConfig {
chroot_base: tmp.path().join("jail"),
..JailerConfig::default()
}))),
..FirecrackerComposer::bare()
},
);
let snap_dir = provider.vm_state_path("vm-src").join("snapshots");
fs::create_dir_all(&snap_dir).unwrap();
fs::write(snap_dir.join("warm.vmstate"), b"vmstate").unwrap();
fs::write(snap_dir.join("warm.mem"), b"guest memory").unwrap();
let chroot = tmp.path().join("jail/firecracker/vm-new/root");
fs::create_dir_all(&chroot).unwrap();
let jail = VmJail {
chroot_path: chroot.clone(),
api_socket_in_chroot: PathBuf::from("/api.sock"),
api_socket_on_host: chroot.join("api.sock"),
};
let err = provider
.load_snapshot(
&jail.api_socket_on_host,
&snapshot_ref(true, vec![]),
"vm-new",
Some(&jail),
)
.expect_err("no live FC socket — the API call must fail");
assert!(
matches!(&err, VmRuntimeError::Unsupported(msg) if msg.contains("failed connecting")),
"unexpected error: {err}"
);
assert!(same_inode(
&snap_dir.join("warm.vmstate"),
&chroot.join("warm.vmstate")
));
assert!(same_inode(
&snap_dir.join("warm.mem"),
&chroot.join("warm.mem")
));
}
#[test]
fn restore_missing_mem_file_is_snapshot_not_found() {
let tmp = tempfile::tempdir().unwrap();
let provider = FirecrackerVmProvider::new(test_config(tmp.path()));
let snap_dir = provider.vm_state_path("vm-src").join("snapshots");
fs::create_dir_all(&snap_dir).unwrap();
fs::write(snap_dir.join("warm.vmstate"), b"vmstate").unwrap();
let err = provider
.load_snapshot(
Path::new("/nonexistent.sock"),
&snapshot_ref(true, vec![]),
"vm-new",
None,
)
.unwrap_err();
assert!(matches!(err, VmRuntimeError::SnapshotNotFound { .. }));
}
#[test]
fn jailed_snapshot_create_cleans_staged_partials_on_failure() {
let tmp = tempfile::tempdir().unwrap();
let provider = FirecrackerVmProvider::new(test_config(tmp.path()));
let state_dir = provider.vm_state_path("vm-1");
fs::create_dir_all(&state_dir).unwrap();
let chroot = tmp.path().join("jail/firecracker/vm-1/root");
fs::create_dir_all(&chroot).unwrap();
fs::write(chroot.join("warm.vmstate"), b"partial").unwrap();
fs::write(chroot.join("warm.mem"), b"partial").unwrap();
let jail = VmJail {
chroot_path: chroot.clone(),
api_socket_in_chroot: PathBuf::from("/api.sock"),
api_socket_on_host: chroot.join("api.sock"),
};
provider
.create_snapshot(&jail.api_socket_on_host, &state_dir, "warm", Some(&jail))
.expect_err("no live FC socket — must fail");
assert!(!chroot.join("warm.vmstate").exists());
assert!(!chroot.join("warm.mem").exists());
assert!(!state_dir.join("snapshots/warm.vmstate").exists());
}
#[test]
fn uffd_restore_failure_tears_down_handler_socket() {
let tmp = tempfile::tempdir().unwrap();
let mut config = test_config(tmp.path());
config.mem_backend = MemBackend::Uffd;
let provider = FirecrackerVmProvider::new(config);
let snap_dir = provider.vm_state_path("vm-src").join("snapshots");
fs::create_dir_all(&snap_dir).unwrap();
fs::write(snap_dir.join("warm.vmstate"), b"vmstate").unwrap();
fs::write(snap_dir.join("warm.mem"), b"guest memory").unwrap();
fs::create_dir_all(provider.vm_state_path("vm-new")).unwrap();
let err = provider
.load_snapshot(
Path::new("/nonexistent.sock"),
&snapshot_ref(true, vec![]),
"vm-new",
None,
)
.expect_err("no live FC socket — the API call must fail");
assert!(
matches!(&err, VmRuntimeError::Unsupported(msg) if msg.contains("failed connecting")),
"unexpected error: {err}"
);
assert!(
!provider
.vm_state_path("vm-new")
.join(UFFD_SOCKET_BASENAME)
.exists()
);
}
#[test]
fn rename_moves_state_and_socket_dirs_and_rekeys_record() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Running);
provider.rename_vm("pool-1", "sandbox-9").expect("rename");
assert!(provider.get_vm("pool-1").unwrap().is_none());
assert!(!provider.vm_state_path("pool-1").exists());
assert!(!tmp.path().join("sockets/pool-1").exists());
let view = provider.get_vm("sandbox-9").unwrap().expect("renamed vm");
assert_eq!(view.status, VmStatus::Running);
assert_eq!(view.snapshots, vec!["warm".to_owned()]);
let new_state_dir = provider.vm_state_path("sandbox-9");
assert!(new_state_dir.join("snapshots/warm.vmstate").exists());
let record = provider.state.read().unwrap();
let record = record.get("sandbox-9").unwrap();
assert_eq!(record.state_dir, new_state_dir);
assert_eq!(
record.socket_path,
tmp.path().join("sockets/sandbox-9/api.sock")
);
assert!(record.socket_path.exists());
}
#[test]
fn rename_rejects_unknown_missing_and_duplicate_ids() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Running);
assert!(matches!(
provider.rename_vm("ghost", "x").unwrap_err(),
VmRuntimeError::VmNotFound(_)
));
let state_dir = provider.vm_state_path("other");
fs::create_dir_all(&state_dir).unwrap();
provider.state.write().unwrap().insert(
"other".into(),
VmRecord {
status: VmStatus::Running,
snapshots: vec![],
socket_path: tmp.path().join("sockets/other/api.sock"),
state_dir,
},
);
assert!(matches!(
provider.rename_vm("pool-1", "other").unwrap_err(),
VmRuntimeError::VmAlreadyExists(_)
));
}
#[test]
fn rename_rejects_destroyed_vm() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Destroyed);
assert!(matches!(
provider.rename_vm("pool-1", "sandbox-9").unwrap_err(),
VmRuntimeError::InvalidTransition { to: "renamed", .. }
));
}
#[test]
fn rename_to_same_id_is_a_noop() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Running);
provider.rename_vm("pool-1", "pool-1").expect("noop");
assert!(provider.get_vm("pool-1").unwrap().is_some());
}
#[test]
fn rename_refuses_composed_network_attachments() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Running);
provider.composed.lock().unwrap().insert(
"pool-1".into(),
ComposedAttachments {
network_attached: true,
..ComposedAttachments::default()
},
);
let err = provider.rename_vm("pool-1", "sandbox-9").unwrap_err();
assert!(matches!(err, VmRuntimeError::Unsupported(_)), "{err}");
assert!(provider.get_vm("pool-1").unwrap().is_some());
assert!(provider.vm_state_path("pool-1").exists());
}
#[test]
fn rename_rekeys_jailed_chroot_and_jail_paths() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Running);
let old_chroot = tmp.path().join("jail/firecracker/pool-1/root");
fs::create_dir_all(&old_chroot).unwrap();
let socket_path = old_chroot.join("api.sock");
fs::write(&socket_path, b"").unwrap();
provider
.state
.write()
.unwrap()
.get_mut("pool-1")
.unwrap()
.socket_path = socket_path;
provider.composed.lock().unwrap().insert(
"pool-1".into(),
ComposedAttachments {
jail: Some(VmJail {
chroot_path: old_chroot.clone(),
api_socket_in_chroot: PathBuf::from("/api.sock"),
api_socket_on_host: old_chroot.join("api.sock"),
}),
..ComposedAttachments::default()
},
);
provider.rename_vm("pool-1", "sandbox-9").expect("rename");
let new_chroot = tmp.path().join("jail/firecracker/sandbox-9/root");
assert!(!tmp.path().join("jail/firecracker/pool-1").exists());
assert!(new_chroot.join("api.sock").exists());
let composed = provider.composed.lock().unwrap();
assert!(composed.get("pool-1").is_none());
let jail = composed.get("sandbox-9").unwrap().jail.as_ref().unwrap();
assert_eq!(jail.chroot_path, new_chroot);
assert_eq!(jail.api_socket_on_host, new_chroot.join("api.sock"));
assert_eq!(jail.api_socket_in_chroot, PathBuf::from("/api.sock"));
let state = provider.state.read().unwrap();
assert_eq!(
state.get("sandbox-9").unwrap().socket_path,
new_chroot.join("api.sock")
);
}
#[test]
fn rename_rolls_back_state_dir_when_socket_move_fails() {
let tmp = tempfile::tempdir().unwrap();
let provider = seeded_provider(tmp.path(), "pool-1", VmStatus::Running);
fs::remove_dir_all(tmp.path().join("sockets/pool-1")).unwrap();
let err = provider.rename_vm("pool-1", "sandbox-9").unwrap_err();
assert!(matches!(err, VmRuntimeError::Unsupported(_)), "{err}");
assert!(provider.vm_state_path("pool-1").exists());
assert!(!provider.vm_state_path("sandbox-9").exists());
assert!(provider.get_vm("pool-1").unwrap().is_some());
assert!(provider.get_vm("sandbox-9").unwrap().is_none());
}
#[test]
fn configure_vm_refuses_vsock_under_jailer() {
let tmp = tempfile::tempdir().unwrap();
let provider = FirecrackerVmProvider::new(test_config(tmp.path()));
let chroot = tmp.path().join("jail/firecracker/vm-1/root");
let jail = VmJail {
chroot_path: chroot.clone(),
api_socket_in_chroot: PathBuf::from("/api.sock"),
api_socket_on_host: chroot.join("api.sock"),
};
let spec = VmSpec {
vsock: Some(VsockSpec {
cid: 3,
uds_path: tmp.path().join("vsock.sock"),
}),
..VmSpec::default()
};
let err = provider
.configure_vm(Path::new("/nonexistent.sock"), &spec, Some(&jail))
.unwrap_err();
assert!(
matches!(&err, VmRuntimeError::Unsupported(msg) if msg.contains("vsock under the jailer")),
"unexpected error: {err}"
);
}
#[test]
#[ignore = "requires root, /dev/kvm, firecracker + jailer binaries, kernel + rootfs images"]
fn jailed_snapshot_restore_and_rename_e2e() {
let provider = FirecrackerVmProvider::from_env().with_composer(FirecrackerComposer {
jailer: Some(Arc::new(Jailer::from_env())),
..FirecrackerComposer::bare()
});
provider.create_vm("e2e-src").expect("jailed cold boot");
provider.start_vm("e2e-src").expect("start");
thread::sleep(Duration::from_secs(2));
provider.stop_vm("e2e-src").expect("pause");
provider
.snapshot_vm("e2e-src", "warm")
.expect("snapshot written in-chroot then moved to the durable dir");
let snap_dir = provider.vm_state_path("e2e-src").join("snapshots");
assert!(snap_dir.join("warm.vmstate").exists());
assert!(snap_dir.join("warm.mem").exists());
let spec = VmSpec {
restore_from: Some(SnapshotRef {
vm_id: "e2e-src".into(),
snapshot_id: "warm".into(),
resume_immediately: true,
network_overrides: vec![],
}),
..VmSpec::default()
};
provider
.create_vm_with_spec("e2e-pool", &spec)
.expect("jailed restore from the durable snapshot");
provider
.rename_vm("e2e-pool", "e2e-claimed")
.expect("warm-pool handoff rename");
assert!(provider.get_vm("e2e-claimed").unwrap().is_some());
assert!(provider.get_vm("e2e-pool").unwrap().is_none());
provider.destroy_vm("e2e-claimed").expect("destroy renamed");
provider.destroy_vm("e2e-src").expect("destroy source");
}
#[test]
fn token_bucket_default_burst_equals_size() {
let json = token_bucket_to_json(&TokenBucket {
size: 1_048_576,
one_time_burst: None,
refill_time_ms: 1_000,
});
assert_eq!(json["size"], 1_048_576);
assert_eq!(json["one_time_burst"], 1_048_576);
assert_eq!(json["refill_time"], 1_000);
}
#[test]
fn token_bucket_explicit_burst_respected() {
let json = token_bucket_to_json(&TokenBucket {
size: 1_048_576,
one_time_burst: Some(2_097_152),
refill_time_ms: 500,
});
assert_eq!(json["one_time_burst"], 2_097_152);
}
#[test]
fn rate_limiter_serialises_both_buckets() {
let json = rate_limiter_to_json(&RateLimiter {
bandwidth: Some(TokenBucket {
size: 10_000,
one_time_burst: None,
refill_time_ms: 100,
}),
ops: Some(TokenBucket {
size: 50,
one_time_burst: None,
refill_time_ms: 100,
}),
});
assert!(json.get("bandwidth").is_some());
assert!(json.get("ops").is_some());
assert_eq!(json["bandwidth"]["size"], 10_000);
assert_eq!(json["ops"]["size"], 50);
}
#[test]
fn rate_limiter_empty_serialises_to_empty_object() {
let json = rate_limiter_to_json(&RateLimiter {
bandwidth: None,
ops: None,
});
assert!(json.is_object());
assert!(json.as_object().unwrap().is_empty());
}
}