use std::collections::HashMap;
use std::num::NonZeroU64;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, RwLock};
use std::time::Duration;
use chrono::{DateTime, Utc};
use fc_sdk::VmBuilder;
use fc_sdk::types::{BootSource, Drive, NetworkInterface, Vsock};
use nix::unistd::{Gid, Uid, chown};
use tokio::sync::broadcast;
use tracing::{error, info, warn};
use uuid::Uuid;
use crate::config::VmmConfig;
use crate::error::{Result, VmmError};
use crate::network::{NetworkAllocation, NetworkManager};
use crate::snapshot::SnapshotCatalog;
use crate::spawn::{spawn_direct, spawn_jailer};
use crate::vsock::{self, ExecInputMsg, OutputChunk, StartCommand};
pub type SandboxId = String;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SandboxState {
Starting,
Ready,
Running,
Stopping,
Stopped,
Failed,
}
impl std::fmt::Display for SandboxState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Starting => write!(f, "starting"),
Self::Ready => write!(f, "ready"),
Self::Running => write!(f, "running"),
Self::Stopping => write!(f, "stopping"),
Self::Stopped => write!(f, "stopped"),
Self::Failed => write!(f, "failed"),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SandboxNetworkSpec {
pub mode: String,
}
#[derive(Debug, Clone)]
pub struct SandboxMountSpec {
pub source: String,
pub target: String,
pub readonly: bool,
}
#[derive(Debug, Clone, Default)]
pub struct SandboxSpec {
pub id: Option<String>,
pub labels: HashMap<String, String>,
pub kernel: String,
pub rootfs: String,
pub boot_args: String,
pub vcpus: u32,
pub memory_mib: u64,
pub image: String,
pub cmd: Vec<String>,
pub env: HashMap<String, String>,
pub working_dir: String,
pub user: String,
pub mounts: Vec<SandboxMountSpec>,
pub network: SandboxNetworkSpec,
pub ttl_seconds: u32,
pub ssh_public_key: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct RestoreSandboxSpec {
pub id: Option<String>,
pub snapshot_id: String,
pub labels: HashMap<String, String>,
pub network_override: bool,
pub ttl_seconds: u32,
}
pub struct SandboxInstance {
pub id: SandboxId,
pub labels: HashMap<String, String>,
pub spec: SandboxSpec,
pub state: SandboxState,
pub process: Option<fc_sdk::FirecrackerProcess>,
pub vm: Option<Arc<fc_sdk::Vm>>,
pub network: Option<NetworkAllocation>,
pub vm_dir: PathBuf,
pub vsock_uds_path: Option<PathBuf>,
pub created_at: DateTime<Utc>,
pub ready_at: Option<DateTime<Utc>>,
pub last_exited_at: Option<DateTime<Utc>>,
pub last_exit_code: Option<i32>,
pub error: Option<String>,
}
impl SandboxInstance {
fn new(
id: SandboxId,
spec: SandboxSpec,
network: Option<NetworkAllocation>,
vm_dir: PathBuf,
) -> Self {
Self {
id,
labels: spec.labels.clone(),
spec,
state: SandboxState::Starting,
process: None,
vm: None,
network,
vm_dir,
vsock_uds_path: None,
created_at: Utc::now(),
ready_at: None,
last_exited_at: None,
last_exit_code: None,
error: None,
}
}
pub fn socket_path(&self) -> PathBuf {
self.vm_dir.join("firecracker.sock")
}
}
pub struct SandboxSummary {
pub id: SandboxId,
pub state: SandboxState,
pub labels: HashMap<String, String>,
pub ip_address: String,
pub created_at: DateTime<Utc>,
}
pub struct SandboxInfo {
pub id: SandboxId,
pub state: SandboxState,
pub labels: HashMap<String, String>,
pub vcpus: u32,
pub memory_mib: u64,
pub network: Option<SandboxNetworkInfo>,
pub created_at: DateTime<Utc>,
pub ready_at: Option<DateTime<Utc>>,
pub last_exited_at: Option<DateTime<Utc>>,
pub last_exit_code: Option<i32>,
pub error: Option<String>,
}
pub struct SandboxNetworkInfo {
pub ip_address: String,
pub gateway: String,
pub tap_name: String,
}
#[derive(Debug, Clone)]
pub struct SandboxEvent {
pub sandbox_id: SandboxId,
pub action: String,
pub timestamp_ns: i64,
pub attributes: HashMap<String, String>,
}
impl SandboxEvent {
fn new(sandbox_id: &str, action: &str) -> Self {
Self {
sandbox_id: sandbox_id.to_owned(),
action: action.to_owned(),
timestamp_ns: Utc::now().timestamp_nanos_opt().unwrap_or(0),
attributes: HashMap::new(),
}
}
fn with_attr(mut self, key: &str, value: &str) -> Self {
self.attributes.insert(key.to_owned(), value.to_owned());
self
}
}
pub struct CheckpointInfo {
pub snapshot_id: String,
pub snapshot_dir: String,
pub created_at: String,
}
pub struct CheckpointSummary {
pub id: String,
pub sandbox_id: String,
pub name: String,
pub labels: HashMap<String, String>,
pub snapshot_dir: String,
pub created_at: String,
}
const EVENT_CHANNEL_CAPACITY: usize = 256;
pub struct SandboxManager {
instances: Arc<RwLock<HashMap<SandboxId, Arc<Mutex<SandboxInstance>>>>>,
network: Arc<NetworkManager>,
snapshots: Arc<SnapshotCatalog>,
config: Arc<VmmConfig>,
events_tx: broadcast::Sender<SandboxEvent>,
}
impl SandboxManager {
pub fn new(config: VmmConfig) -> Result<Self> {
let network = Arc::new(NetworkManager::new(
&config.network.bridge,
&config.network.cidr,
&config.network.gateway,
config.network.dns.clone(),
)?);
let snapshots = Arc::new(SnapshotCatalog::new(&config.firecracker.data_dir));
let (events_tx, _) = broadcast::channel(EVENT_CHANNEL_CAPACITY);
Ok(Self {
instances: Arc::new(RwLock::new(HashMap::new())),
network,
snapshots,
config: Arc::new(config),
events_tx,
})
}
pub async fn create_sandbox(&self, mut spec: SandboxSpec) -> Result<(SandboxId, String)> {
let defaults = &self.config.defaults;
if spec.kernel.is_empty() {
spec.kernel.clone_from(&defaults.kernel);
}
if spec.rootfs.is_empty() {
spec.rootfs.clone_from(&defaults.rootfs);
}
if spec.boot_args.is_empty() {
spec.boot_args.clone_from(&defaults.boot_args);
}
if spec.vcpus == 0 {
spec.vcpus = defaults.vcpus as u32;
}
if spec.memory_mib == 0 {
spec.memory_mib = defaults.memory_mib;
}
if spec.network.mode.is_empty() {
spec.network.mode = "tap".into();
}
let id = spec
.id
.clone()
.filter(|s| !s.is_empty())
.unwrap_or_else(|| Uuid::new_v4().to_string());
if id.contains('/') || id.contains('\\') || id.contains('\0') || id == "." || id == ".." {
return Err(VmmError::Config(format!(
"invalid sandbox ID: {id:?} (must not contain path separators)"
)));
}
{
let instances = self.instances.read().unwrap();
if instances.contains_key(&id) {
return Err(VmmError::AlreadyExists(id));
}
}
let net_alloc = if spec.network.mode == "none" {
None
} else {
Some(self.network.allocate(&id)?)
};
let ip_address = net_alloc
.as_ref()
.map(|n| n.ip_address.to_string())
.unwrap_or_default();
let vm_dir = PathBuf::from(&self.config.firecracker.data_dir)
.join("sandboxes")
.join(&id);
std::fs::create_dir_all(&vm_dir).map_err(VmmError::Io)?;
let instance =
SandboxInstance::new(id.clone(), spec.clone(), net_alloc.clone(), vm_dir.clone());
{
let mut instances = self.instances.write().unwrap();
instances.insert(id.clone(), Arc::new(Mutex::new(instance)));
}
let _ = self.events_tx.send(SandboxEvent::new(&id, "created"));
{
let instances = Arc::clone(&self.instances);
let network = Arc::clone(&self.network);
let config = Arc::clone(&self.config);
let events_tx = self.events_tx.clone();
let id_clone = id.clone();
let spec_clone = spec.clone();
let net_alloc_clone = net_alloc;
tokio::spawn(async move {
boot_sandbox(
id_clone,
spec_clone,
net_alloc_clone,
vm_dir,
instances,
network,
config,
events_tx,
)
.await;
});
}
if spec.ttl_seconds > 0 {
let instances = Arc::clone(&self.instances);
let network = Arc::clone(&self.network);
let events_tx = self.events_tx.clone();
let config2 = Arc::clone(&self.config);
let id2 = id.clone();
let ttl = spec.ttl_seconds;
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(ttl as u64)).await;
remove_sandbox_impl(&id2, true, &instances, &network, &events_tx, &config2).await;
});
}
info!(sandbox_id = %id, "sandbox create requested (async boot started)");
Ok((id, ip_address))
}
pub async fn stop_sandbox(&self, id: &SandboxId, timeout_seconds: u32) -> Result<()> {
let vm_handle = {
let instance = self.get_instance(id)?;
let mut inst = instance.lock().unwrap();
match inst.state {
SandboxState::Ready | SandboxState::Running => {}
s => {
return Err(VmmError::WrongState {
id: id.clone(),
expected: "Ready or Running".into(),
actual: s.to_string(),
});
}
}
inst.state = SandboxState::Stopping;
inst.vm.as_ref().map(Arc::clone)
};
let _ = self.events_tx.send(SandboxEvent::new(id, "stopping"));
if let Some(vm) = vm_handle {
let timeout = if timeout_seconds > 0 {
timeout_seconds
} else {
30
};
let _ =
tokio::time::timeout(Duration::from_secs(timeout as u64), vm.send_ctrl_alt_del())
.await;
}
{
let instance = self.get_instance(id)?;
let mut inst = instance.lock().unwrap();
if let Some(ref mut proc) = inst.process
&& let Some(pid) = proc.pid()
&& pid > 0
{
let _ = nix::sys::signal::kill(
#[allow(clippy::cast_possible_wrap)]
nix::unistd::Pid::from_raw(pid as i32),
nix::sys::signal::Signal::SIGKILL,
);
}
inst.state = SandboxState::Stopped;
}
let _ = self.events_tx.send(SandboxEvent::new(id, "stopped"));
info!(sandbox_id = %id, "sandbox stopped");
Ok(())
}
pub async fn remove_sandbox(&self, id: &SandboxId, force: bool) -> Result<()> {
let state = {
let instance = self.get_instance(id)?;
instance.lock().unwrap().state
};
if !force && state == SandboxState::Running {
return Err(VmmError::WrongState {
id: id.clone(),
expected: "non-running (pass force=true to override)".into(),
actual: state.to_string(),
});
}
remove_sandbox_impl(
id,
force,
&self.instances,
&self.network,
&self.events_tx,
&self.config,
)
.await;
info!(sandbox_id = %id, "sandbox removed");
Ok(())
}
pub fn inspect_sandbox(&self, id: &SandboxId) -> Result<SandboxInfo> {
let instance = self.get_instance(id)?;
let inst = instance.lock().unwrap();
Ok(inst_to_info(&inst))
}
pub fn list_sandboxes(
&self,
state_filter: Option<&str>,
label_filter: &HashMap<String, String>,
) -> Vec<SandboxSummary> {
self.instances
.read()
.unwrap()
.values()
.filter_map(|arc| {
let inst = arc.lock().unwrap();
if let Some(sf) = state_filter
&& !sf.is_empty()
&& inst.state.to_string() != sf
{
return None;
}
for (k, v) in label_filter {
if inst.labels.get(k).map(String::as_str) != Some(v.as_str()) {
return None;
}
}
Some(SandboxSummary {
id: inst.id.clone(),
state: inst.state,
labels: inst.labels.clone(),
ip_address: inst
.network
.as_ref()
.map(|n| n.ip_address.to_string())
.unwrap_or_default(),
created_at: inst.created_at,
})
})
.collect()
}
pub fn subscribe_events(&self) -> broadcast::Receiver<SandboxEvent> {
self.events_tx.subscribe()
}
#[allow(clippy::too_many_arguments)]
pub async fn run_in_sandbox(
&self,
id: &SandboxId,
cmd: Vec<String>,
env: HashMap<String, String>,
working_dir: String,
user: String,
tty: bool,
tty_size: Option<(u16, u16)>,
timeout_seconds: u32,
) -> Result<tokio::sync::mpsc::Receiver<Result<OutputChunk>>> {
let uds_path = self.require_ready_vsock(id)?;
let start = StartCommand {
cmd,
env,
working_dir,
user,
tty,
tty_width: tty_size.map_or(80, |(w, _)| w),
tty_height: tty_size.map_or(24, |(_, h)| h),
timeout_seconds,
};
let inner_rx = vsock::run(&uds_path, start).await?;
{
let inst = self.get_instance(id)?;
inst.lock().unwrap().state = SandboxState::Running;
}
let _ = self.events_tx.send(SandboxEvent::new(id, "running"));
let (wrapped_tx, wrapped_rx) = tokio::sync::mpsc::channel(64);
let instances = Arc::clone(&self.instances);
let events_tx = self.events_tx.clone();
let sandbox_id = id.clone();
tokio::spawn(async move {
let mut inner_rx = inner_rx;
while let Some(result) = inner_rx.recv().await {
let send_result = match &result {
Ok(chunk) if chunk.stream == "exit" => {
let exit_code = chunk.exit_code;
let value = instances.read().unwrap().get(&sandbox_id).cloned();
if let Some(arc) = value {
let mut inst = arc.lock().unwrap();
inst.state = SandboxState::Ready;
inst.last_exit_code = Some(exit_code);
inst.last_exited_at = Some(Utc::now());
}
let _ = events_tx.send(
SandboxEvent::new(&sandbox_id, "idle")
.with_attr("exit_code", &exit_code.to_string()),
);
wrapped_tx.send(result).await
}
_ => wrapped_tx.send(result).await,
};
if send_result.is_err() {
break;
}
}
});
Ok(wrapped_rx)
}
#[allow(clippy::too_many_arguments)]
pub async fn exec_in_sandbox(
&self,
id: &SandboxId,
cmd: Vec<String>,
env: HashMap<String, String>,
working_dir: String,
user: String,
tty: bool,
tty_size: Option<(u16, u16)>,
timeout_seconds: u32,
) -> Result<(
tokio::sync::mpsc::Sender<ExecInputMsg>,
tokio::sync::mpsc::Receiver<Result<OutputChunk>>,
)> {
let uds_path = self.require_ready_vsock(id)?;
let start = StartCommand {
cmd,
env,
working_dir,
user,
tty,
tty_width: tty_size.map_or(80, |(w, _)| w),
tty_height: tty_size.map_or(24, |(_, h)| h),
timeout_seconds,
};
let (in_tx, inner_rx) = vsock::exec(&uds_path, start).await?;
{
let inst = self.get_instance(id)?;
inst.lock().unwrap().state = SandboxState::Running;
}
let _ = self.events_tx.send(SandboxEvent::new(id, "running"));
let (wrapped_tx, wrapped_rx) = tokio::sync::mpsc::channel(64);
let instances = Arc::clone(&self.instances);
let events_tx = self.events_tx.clone();
let sandbox_id = id.clone();
tokio::spawn(async move {
let mut inner_rx = inner_rx;
while let Some(result) = inner_rx.recv().await {
let send_result = match &result {
Ok(chunk) if chunk.stream == "exit" => {
let exit_code = chunk.exit_code;
let value = instances.read().unwrap().get(&sandbox_id).cloned();
if let Some(arc) = value {
let mut inst = arc.lock().unwrap();
inst.state = SandboxState::Ready;
inst.last_exit_code = Some(exit_code);
inst.last_exited_at = Some(Utc::now());
}
let _ = events_tx.send(
SandboxEvent::new(&sandbox_id, "idle")
.with_attr("exit_code", &exit_code.to_string()),
);
wrapped_tx.send(result).await
}
_ => wrapped_tx.send(result).await,
};
if send_result.is_err() {
break;
}
}
});
Ok((in_tx, wrapped_rx))
}
pub async fn checkpoint_sandbox(
&self,
sandbox_id: &SandboxId,
name: String,
) -> Result<CheckpointInfo> {
let (kernel_path, rootfs_path) = {
let instance = self.get_instance(sandbox_id)?;
let inst = instance.lock().unwrap();
if inst.state != SandboxState::Ready {
return Err(VmmError::WrongState {
id: sandbox_id.clone(),
expected: "Ready".into(),
actual: inst.state.to_string(),
});
}
(inst.spec.kernel.clone(), inst.spec.rootfs.clone())
};
let vm = self.get_vm_handle(sandbox_id)?;
vm.pause().await.map_err(VmmError::from)?;
let snapshot_id = Uuid::new_v4().to_string();
let (fc_vmstate_path, fc_mem_path, chroot_snap_dir_opt) =
if let Some(ref jc) = self.config.firecracker.jailer {
let base = jc.chroot_base_dir.as_deref().unwrap_or("/srv/jailer");
let cr = chroot_root(&self.config.firecracker.binary, base, sandbox_id);
let chroot_snap = cr.join("snapshots").join(&snapshot_id);
std::fs::create_dir_all(&chroot_snap).map_err(VmmError::Io)?;
let uid = nix::unistd::Uid::from_raw(jc.uid);
let gid = nix::unistd::Gid::from_raw(jc.gid);
nix::unistd::chown(&chroot_snap, Some(uid), Some(gid))
.map_err(|e| VmmError::Process(format!("chown snapshot dir: {e}")))?;
let fc_vmstate = format!("/snapshots/{snapshot_id}/vmstate");
let fc_mem = format!("/snapshots/{snapshot_id}/mem");
(fc_vmstate, fc_mem, Some(chroot_snap))
} else {
let snap_dir = self.snapshots.prepare_dir(sandbox_id, &snapshot_id)?;
(
snap_dir.join("vmstate").to_str().unwrap().to_owned(),
snap_dir.join("mem").to_str().unwrap().to_owned(),
None,
)
};
let snap_result = vm.create_snapshot(&fc_vmstate_path, &fc_mem_path).await;
let _ = vm.resume().await;
snap_result.map_err(VmmError::from)?;
let (vmstate_path, mem_path) = if let Some(chroot_snap) = chroot_snap_dir_opt {
let catalog_dir = self.snapshots.prepare_dir(sandbox_id, &snapshot_id)?;
let dst_vmstate = catalog_dir.join("vmstate");
let dst_mem = catalog_dir.join("mem");
tokio::fs::rename(chroot_snap.join("vmstate"), &dst_vmstate)
.await
.map_err(VmmError::Io)?;
if chroot_snap.join("mem").exists() {
tokio::fs::rename(chroot_snap.join("mem"), &dst_mem)
.await
.map_err(VmmError::Io)?;
}
let _ = tokio::fs::remove_dir_all(&chroot_snap).await;
(dst_vmstate, dst_mem)
} else {
let snap_dir = self.snapshots.prepare_dir(sandbox_id, &snapshot_id)?;
(snap_dir.join("vmstate"), snap_dir.join("mem"))
};
let (snap_kernel, snap_rootfs) = if self.config.firecracker.jailer.is_some() {
(Some(kernel_path), Some(rootfs_path))
} else {
(None, None)
};
let meta = self.snapshots.register(
sandbox_id,
Some(name),
crate::config::SnapshotType::Full,
vmstate_path,
Some(mem_path),
None,
snap_kernel,
snap_rootfs,
)?;
let snap_dir_path = meta
.vmstate_path
.parent()
.map(|p| p.to_string_lossy().into_owned())
.unwrap_or_default();
info!(sandbox_id, snapshot_id = %meta.id, "sandbox checkpointed");
Ok(CheckpointInfo {
snapshot_id: meta.id,
snapshot_dir: snap_dir_path,
created_at: meta.created_at.to_rfc3339(),
})
}
pub async fn restore_sandbox(&self, spec: RestoreSandboxSpec) -> Result<(SandboxId, String)> {
let new_id = spec
.id
.clone()
.filter(|s| !s.is_empty())
.unwrap_or_else(|| Uuid::new_v4().to_string());
if new_id.contains('/')
|| new_id.contains('\\')
|| new_id.contains('\0')
|| new_id == "."
|| new_id == ".."
{
return Err(VmmError::Config(format!(
"invalid sandbox ID: {new_id:?} (must not contain path separators)"
)));
}
{
let instances = self.instances.read().unwrap();
if instances.contains_key(&new_id) {
return Err(VmmError::AlreadyExists(new_id.clone()));
}
}
let net_alloc = if spec.network_override {
Some(self.network.allocate(&new_id)?)
} else {
None
};
let ip_address = net_alloc
.as_ref()
.map(|n| n.ip_address.to_string())
.unwrap_or_default();
let vm_dir = PathBuf::from(&self.config.firecracker.data_dir)
.join("sandboxes")
.join(&new_id);
std::fs::create_dir_all(&vm_dir).map_err(VmmError::Io)?;
let socket_path = vm_dir.join("firecracker.sock");
let snap_meta = self.snapshots.find_by_id(&spec.snapshot_id)?;
let vmstate_str = snap_meta.vmstate_path.to_str().unwrap().to_owned();
let mem_file = snap_meta.mem_path.as_ref().and_then(|p| {
if p.exists() {
Some(p.to_str().unwrap().to_owned())
} else {
None
}
});
let fc_cfg = &self.config.firecracker;
let (process, actual_vsock_path) = if let Some(ref jc) = fc_cfg.jailer {
let base = jc.chroot_base_dir.as_deref().unwrap_or("/srv/jailer");
let cr = chroot_root(&fc_cfg.binary, base, &new_id);
let run_dir = cr.join("run");
std::fs::create_dir_all(&run_dir).map_err(VmmError::Io)?;
let vsock_path = cr.join("run/firecracker.vsock");
let _ = std::fs::remove_file(&vsock_path);
let proc = spawn_jailer(jc, fc_cfg, &new_id).await?;
(proc, vsock_path)
} else {
let original_vm_dir = PathBuf::from(&fc_cfg.data_dir)
.join("sandboxes")
.join(&snap_meta.vm_id);
let original_vsock_path = original_vm_dir.join("firecracker.vsock");
if original_vsock_path.exists() {
if std::os::unix::net::UnixStream::connect(&original_vsock_path).is_ok() {
return Err(VmmError::Vsock(format!(
"vsock path {} is already in use by another sandbox; \
direct-mode restore does not support concurrent restores \
from the same checkpoint",
original_vsock_path.display(),
)));
}
let _ = std::fs::remove_file(&original_vsock_path);
}
if let Err(e) = std::fs::create_dir_all(&original_vm_dir)
&& e.kind() != std::io::ErrorKind::AlreadyExists
{
return Err(VmmError::Io(e));
}
let log_path = vm_dir.join("firecracker.log");
let metrics_path = vm_dir.join("firecracker.metrics");
let proc =
spawn_direct(fc_cfg, &new_id, &socket_path, &log_path, &metrics_path).await?;
(proc, original_vsock_path)
};
let (effective_vmstate, effective_mem) = if let Some(ref jc) = fc_cfg.jailer {
let base = jc.chroot_base_dir.as_deref().unwrap_or("/srv/jailer");
let cr = chroot_root(&fc_cfg.binary, base, &new_id);
let snap_in_chroot = cr.join("snapshots").join(&spec.snapshot_id);
std::fs::create_dir_all(&snap_in_chroot).map_err(VmmError::Io)?;
let uid = nix::unistd::Uid::from_raw(jc.uid);
let gid = nix::unistd::Gid::from_raw(jc.gid);
nix::unistd::chown(&snap_in_chroot, Some(uid), Some(gid))
.map_err(|e| VmmError::Process(format!("chown snap dir: {e}")))?;
if let (Some(k), Some(r)) = (
snap_meta.kernel_path.as_deref(),
snap_meta.rootfs_path.as_deref(),
) {
stage_files_for_jailer(&cr, k, r, jc.uid, jc.gid).await?;
}
let dst_vmstate = snap_in_chroot.join("vmstate");
tokio::fs::copy(&snap_meta.vmstate_path, &dst_vmstate)
.await
.map_err(VmmError::Io)?;
nix::unistd::chown(&dst_vmstate, Some(uid), Some(gid))
.map_err(|e| VmmError::Process(format!("chown vmstate: {e}")))?;
let effective_mem = if let Some(ref mf) = snap_meta.mem_path
&& mf.exists()
{
let dst_mem = snap_in_chroot.join("mem");
tokio::fs::copy(mf, &dst_mem).await.map_err(VmmError::Io)?;
nix::unistd::chown(&dst_mem, Some(uid), Some(gid))
.map_err(|e| VmmError::Process(format!("chown mem: {e}")))?;
Some(format!("/snapshots/{}/mem", spec.snapshot_id))
} else {
None
};
(
format!("/snapshots/{}/vmstate", spec.snapshot_id),
effective_mem,
)
} else {
(vmstate_str, mem_file)
};
let mut load_params = fc_sdk::types::SnapshotLoadParams {
snapshot_path: effective_vmstate,
mem_file_path: effective_mem,
mem_backend: None,
enable_diff_snapshots: None,
track_dirty_pages: None,
resume_vm: Some(true),
network_overrides: vec![],
};
if let Some(ref net) = net_alloc {
load_params.network_overrides = vec![fc_sdk::types::NetworkOverride {
iface_id: "eth0".into(),
host_dev_name: net.tap_name.clone(),
}];
}
let effective_socket = process.socket_path().to_owned();
let vm = Arc::new(
fc_sdk::restore(effective_socket.to_str().unwrap(), load_params)
.await
.map_err(VmmError::from)?,
);
let restore_spec = SandboxSpec {
id: Some(new_id.clone()),
labels: spec.labels,
ttl_seconds: spec.ttl_seconds,
..Default::default()
};
let mut instance =
SandboxInstance::new(new_id.clone(), restore_spec, net_alloc.clone(), vm_dir);
instance.process = Some(process);
instance.vm = Some(vm);
instance.vsock_uds_path = Some(actual_vsock_path);
instance.state = SandboxState::Ready;
instance.ready_at = Some(Utc::now());
{
let mut instances = self.instances.write().unwrap();
instances.insert(new_id.clone(), Arc::new(Mutex::new(instance)));
}
let _ = self.events_tx.send(SandboxEvent::new(&new_id, "ready"));
if spec.ttl_seconds > 0 {
let instances = Arc::clone(&self.instances);
let network = Arc::clone(&self.network);
let events_tx = self.events_tx.clone();
let config2 = Arc::clone(&self.config);
let id2 = new_id.clone();
let ttl = spec.ttl_seconds;
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(ttl as u64)).await;
remove_sandbox_impl(&id2, true, &instances, &network, &events_tx, &config2).await;
});
}
info!(
sandbox_id = %new_id,
snapshot_id = %spec.snapshot_id,
"sandbox restored from checkpoint"
);
Ok((new_id, ip_address))
}
pub fn list_checkpoints(&self, sandbox_id: Option<&str>) -> Result<Vec<CheckpointSummary>> {
let infos = match sandbox_id {
Some(sid) => self.snapshots.list(sid)?,
None => self.snapshots.list_all()?,
};
Ok(infos
.into_iter()
.map(|s| CheckpointSummary {
id: s.id,
sandbox_id: s.vm_id,
name: s.name.unwrap_or_default(),
labels: HashMap::new(),
snapshot_dir: s
.vmstate_path
.parent()
.map(|p| p.to_string_lossy().into_owned())
.unwrap_or_default(),
created_at: s.created_at.to_rfc3339(),
})
.collect())
}
pub fn delete_checkpoint(&self, snapshot_id: &str) -> Result<()> {
self.snapshots.delete_by_id(snapshot_id)
}
fn get_instance(&self, id: &SandboxId) -> Result<Arc<Mutex<SandboxInstance>>> {
self.instances
.read()
.unwrap()
.get(id)
.cloned()
.ok_or_else(|| VmmError::NotFound(id.clone()))
}
fn require_ready_vsock(&self, id: &SandboxId) -> Result<PathBuf> {
let instance = self.get_instance(id)?;
let inst = instance.lock().unwrap();
match inst.state {
SandboxState::Ready => {}
s => {
return Err(VmmError::WrongState {
id: id.clone(),
expected: "Ready".into(),
actual: s.to_string(),
});
}
}
inst.vsock_uds_path
.clone()
.ok_or_else(|| VmmError::Vsock(format!("sandbox {id} has no vsock configured")))
}
fn get_vm_handle(&self, id: &SandboxId) -> Result<Arc<fc_sdk::Vm>> {
let instance = self.get_instance(id)?;
let inst = instance.lock().unwrap();
inst.vm
.as_ref()
.map(Arc::clone)
.ok_or_else(|| VmmError::WrongState {
id: id.clone(),
expected: "Ready or Running (VM handle not yet available)".into(),
actual: inst.state.to_string(),
})
}
}
#[allow(clippy::too_many_arguments)]
async fn boot_sandbox(
id: SandboxId,
spec: SandboxSpec,
net_alloc: Option<NetworkAllocation>,
vm_dir: PathBuf,
instances: Arc<RwLock<HashMap<SandboxId, Arc<Mutex<SandboxInstance>>>>>,
network: Arc<NetworkManager>,
config: Arc<VmmConfig>,
events_tx: broadcast::Sender<SandboxEvent>,
) {
match do_boot(&id, &spec, net_alloc.as_ref(), &vm_dir, &config).await {
Ok((process, vm, vsock_uds_path)) => {
let ready_at = Utc::now();
let value = instances.read().unwrap().get(&id).cloned();
if let Some(arc) = value {
let mut inst = arc.lock().unwrap();
if inst.state == SandboxState::Stopping || inst.state == SandboxState::Stopped {
info!(sandbox_id = %id, "sandbox boot completed but stop was requested; staying stopped");
return;
}
inst.process = Some(process);
inst.vm = Some(vm);
inst.vsock_uds_path = Some(vsock_uds_path);
inst.state = SandboxState::Ready;
inst.ready_at = Some(ready_at);
}
let _ = events_tx.send(SandboxEvent::new(&id, "ready"));
info!(sandbox_id = %id, "sandbox booted and ready");
}
Err(e) => {
let value = instances.read().unwrap().get(&id).cloned();
if let Some(arc) = value {
let mut inst = arc.lock().unwrap();
inst.state = SandboxState::Failed;
inst.error = Some(e.to_string());
}
if let Some(ref net) = net_alloc {
network.release(net);
}
let _ =
events_tx.send(SandboxEvent::new(&id, "failed").with_attr("error", &e.to_string()));
error!(sandbox_id = %id, error = %e, "sandbox boot failed");
}
}
}
fn chroot_root(fc_binary: &str, chroot_base_dir: &str, id: &str) -> PathBuf {
let exec_name = Path::new(fc_binary)
.file_name()
.expect("fc_binary must have a filename")
.to_string_lossy();
PathBuf::from(chroot_base_dir)
.join(exec_name.as_ref())
.join(id)
.join("root")
}
async fn stage_files_for_jailer(
chroot_root: &Path,
kernel_src: &str,
rootfs_src: &str,
uid: u32,
gid: u32,
) -> Result<(String, String)> {
tokio::fs::create_dir_all(chroot_root)
.await
.map_err(VmmError::Io)?;
let kernel_dst = chroot_root.join("vmlinux");
let rootfs_dst = chroot_root.join("rootfs.ext4");
tokio::fs::copy(kernel_src, &kernel_dst)
.await
.map_err(VmmError::Io)?;
tokio::fs::copy(rootfs_src, &rootfs_dst)
.await
.map_err(VmmError::Io)?;
let uid = Uid::from_raw(uid);
let gid = Gid::from_raw(gid);
chown(&kernel_dst, Some(uid), Some(gid))
.map_err(|e| VmmError::Process(format!("chown kernel: {e}")))?;
chown(&rootfs_dst, Some(uid), Some(gid))
.map_err(|e| VmmError::Process(format!("chown rootfs: {e}")))?;
Ok(("/vmlinux".to_string(), "/rootfs.ext4".to_string()))
}
async fn do_boot(
id: &str,
spec: &SandboxSpec,
net_alloc: Option<&NetworkAllocation>,
vm_dir: &Path,
config: &VmmConfig,
) -> Result<(fc_sdk::FirecrackerProcess, Arc<fc_sdk::Vm>, PathBuf)> {
let log_path = vm_dir.join("firecracker.log");
let metrics_path = vm_dir.join("firecracker.metrics");
let socket_path = vm_dir.join("firecracker.sock");
let fc_cfg = &config.firecracker;
if fc_cfg.jailer.is_none() {
if let Some(parent) = log_path.parent() {
std::fs::create_dir_all(parent).map_err(VmmError::Io)?;
}
std::fs::File::create(&log_path).map_err(VmmError::Io)?;
std::fs::File::create(&metrics_path).map_err(VmmError::Io)?;
}
let process = if let Some(ref jc) = fc_cfg.jailer {
spawn_jailer(jc, fc_cfg, id).await?
} else {
spawn_direct(fc_cfg, id, &socket_path, &log_path, &metrics_path).await?
};
let (kernel_path, rootfs_path, vsock_fc_path, vsock_host_path) =
if let Some(ref jc) = fc_cfg.jailer {
let base = jc.chroot_base_dir.as_deref().unwrap_or("/srv/jailer");
let cr = chroot_root(&fc_cfg.binary, base, id);
let (k, r) =
stage_files_for_jailer(&cr, &spec.kernel, &spec.rootfs, jc.uid, jc.gid).await?;
let vsock_host = cr.join("run/firecracker.vsock");
(k, r, "/run/firecracker.vsock".to_string(), vsock_host)
} else {
let vsock_path = vm_dir.join("firecracker.vsock");
(
spec.kernel.clone(),
spec.rootfs.clone(),
vsock_path.to_str().unwrap().to_owned(),
vsock_path,
)
};
let vcpu_count = NonZeroU64::new(spec.vcpus.max(1) as u64)
.ok_or_else(|| VmmError::Config("vcpus must be > 0".into()))?;
let mut builder = VmBuilder::new(process.socket_path())
.boot_source(BootSource {
kernel_image_path: kernel_path,
boot_args: Some(spec.boot_args.clone()),
initrd_path: None,
})
.machine_config(fc_sdk::types::MachineConfiguration {
vcpu_count,
#[allow(clippy::cast_possible_wrap)]
mem_size_mib: spec.memory_mib as i64,
smt: false,
track_dirty_pages: true,
cpu_template: None,
huge_pages: None,
})
.drive(Drive {
drive_id: "rootfs".into(),
path_on_host: Some(rootfs_path),
is_root_device: true,
is_read_only: Some(false),
partuuid: None,
cache_type: fc_sdk::types::DriveCacheType::Unsafe,
rate_limiter: None,
io_engine: fc_sdk::types::DriveIoEngine::Sync,
socket: None,
});
if let Some(net) = net_alloc {
builder = builder.network_interface(NetworkInterface {
iface_id: "eth0".into(),
guest_mac: Some(net.mac_address.clone()),
host_dev_name: net.tap_name.clone(),
rx_rate_limiter: None,
tx_rate_limiter: None,
});
}
builder = builder.vsock(Vsock {
guest_cid: 3,
uds_path: vsock_fc_path,
vsock_id: None,
});
let vm = Arc::new(builder.start().await.map_err(VmmError::from)?);
Ok((process, vm, vsock_host_path))
}
#[allow(clippy::type_complexity)]
async fn remove_sandbox_impl(
id: &str,
_force: bool,
instances: &Arc<RwLock<HashMap<SandboxId, Arc<Mutex<SandboxInstance>>>>>,
network: &Arc<NetworkManager>,
events_tx: &broadcast::Sender<SandboxEvent>,
config: &Arc<VmmConfig>,
) {
let entry = instances.read().unwrap().get(id).cloned();
let Some(arc) = entry else {
return;
};
{
let mut inst = arc.lock().unwrap();
if let Some(ref mut proc) = inst.process
&& let Some(pid) = proc.pid()
&& pid > 0
{
let _ = nix::sys::signal::kill(
#[allow(clippy::cast_possible_wrap)]
nix::unistd::Pid::from_raw(pid as i32),
nix::sys::signal::Signal::SIGKILL,
);
}
if let Some(ref net) = inst.network {
network.release(net);
}
}
if let Some(ref jc) = config.firecracker.jailer {
let base = jc.chroot_base_dir.as_deref().unwrap_or("/srv/jailer");
let chroot_dir = chroot_root(&config.firecracker.binary, base, id);
if let Some(parent) = chroot_dir.parent()
&& let Err(e) = tokio::fs::remove_dir_all(parent).await
{
warn!(sandbox_id = %id, err = %e, "failed to remove jailer chroot dir");
}
}
let vm_dir = PathBuf::from(&config.firecracker.data_dir)
.join("sandboxes")
.join(id);
if let Err(e) = tokio::fs::remove_dir_all(&vm_dir).await
&& e.kind() != std::io::ErrorKind::NotFound
{
warn!(sandbox_id = %id, err = %e, "failed to remove sandbox dir");
}
instances.write().unwrap().remove(id);
let _ = events_tx.send(SandboxEvent::new(id, "removed"));
}
fn inst_to_info(inst: &SandboxInstance) -> SandboxInfo {
SandboxInfo {
id: inst.id.clone(),
state: inst.state,
labels: inst.labels.clone(),
vcpus: inst.spec.vcpus,
memory_mib: inst.spec.memory_mib,
network: inst.network.as_ref().map(|n| SandboxNetworkInfo {
ip_address: n.ip_address.to_string(),
gateway: n.gateway.to_string(),
tap_name: n.tap_name.clone(),
}),
created_at: inst.created_at,
ready_at: inst.ready_at,
last_exited_at: inst.last_exited_at,
last_exit_code: inst.last_exit_code,
error: inst.error.clone(),
}
}