Skip to main content

cellos_host_gvisor/
backend.rs

1//! Linux-only `runsc` shell-out implementation of [`CellBackend`].
2//!
3//! L2-06-5 skeleton. We do NOT depend on `runsc` being installed at build
4//! time; failure to locate or invoke the binary surfaces as a
5//! [`CellosError::Host`] at `create()` time and the supervisor degrades
6//! to whatever fallback the operator wired (typically the stub backend on
7//! non-prod hosts).
8//!
9//! Behaviour:
10//!
11//! - `create(spec)`:
12//!   1. Generate the bundle (`config.json` + empty `rootfs/`) via
13//!      [`generate_bundle_config`] into a temporary directory keyed by the
14//!      cell id.
15//!   2. Spawn `runsc run --bundle <dir> <cell-id>` and detach. The child
16//!      pid is tracked in an in-memory map keyed by cell id so
17//!      `wait_for_in_vm_exit` can join it later.
18//! - `wait_for_in_vm_exit(cell_id)`:
19//!   - Returns `Some(Ok(code))` when the tracked child has exited; `None`
20//!     if the cell id is not tracked by this backend (the supervisor's
21//!     host-subprocess path takes over).
22//! - `destroy(handle)`:
23//!   - Best-effort `runsc kill <cell-id> SIGKILL`, then `runsc delete
24//!     <cell-id>`. Errors are logged but do not block the teardown
25//!     report — gVisor's own state is the source of truth.
26
27use std::collections::HashMap;
28use std::path::PathBuf;
29use std::process::Stdio;
30use std::sync::Arc;
31
32use async_trait::async_trait;
33use tokio::process::{Child, Command};
34use tokio::sync::Mutex;
35use tracing::{instrument, warn};
36
37use cellos_core::ports::{CellBackend, CellHandle, TeardownReport};
38use cellos_core::{CellosError, ExecutionCellDocument};
39
40use crate::bundle::generate_bundle_config;
41
42/// Override the `runsc` binary path. Defaults to looking up `runsc` on
43/// `$PATH`. The override is consulted at every `create()` so tests can
44/// inject a fake binary without rebuilding.
45const RUNSC_BIN_ENV: &str = "CELLOS_GVISOR_RUNSC_BIN";
46
47/// Override the bundle staging directory root. Defaults to
48/// `${TMPDIR:-/tmp}/cellos-gvisor`. The supervisor creates a per-cell
49/// subdirectory underneath this root.
50const BUNDLE_ROOT_ENV: &str = "CELLOS_GVISOR_BUNDLE_ROOT";
51
52struct TrackedCell {
53    /// Bundle directory we created in `create()`; removed on `destroy()`.
54    bundle_dir: PathBuf,
55    /// The `runsc run` child process. `wait_for_in_vm_exit` joins it.
56    child: Child,
57}
58
59/// gVisor-backed [`CellBackend`].
60///
61/// All real `runsc` invocations live in `create()` / `destroy()` /
62/// `wait_for_in_vm_exit()`. The struct holds only an in-memory id → child
63/// map; no persistent state.
64pub struct GVisorCellBackend {
65    tracked: Arc<Mutex<HashMap<String, TrackedCell>>>,
66}
67
68impl Default for GVisorCellBackend {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74impl GVisorCellBackend {
75    pub fn new() -> Self {
76        Self {
77            tracked: Arc::new(Mutex::new(HashMap::new())),
78        }
79    }
80
81    fn runsc_bin() -> String {
82        std::env::var(RUNSC_BIN_ENV).unwrap_or_else(|_| "runsc".to_string())
83    }
84
85    fn bundle_root() -> PathBuf {
86        if let Ok(s) = std::env::var(BUNDLE_ROOT_ENV) {
87            return PathBuf::from(s);
88        }
89        let tmp = std::env::var("TMPDIR").unwrap_or_else(|_| "/tmp".to_string());
90        PathBuf::from(tmp).join("cellos-gvisor")
91    }
92}
93
94#[async_trait]
95impl CellBackend for GVisorCellBackend {
96    #[instrument(skip(self, spec), fields(cell_id = %spec.spec.id))]
97    async fn create(&self, spec: &ExecutionCellDocument) -> Result<CellHandle, CellosError> {
98        let cfg = generate_bundle_config(spec)
99            .map_err(|e| CellosError::InvalidSpec(format!("gvisor bundle: {e}")))?;
100
101        let cell_id = spec.spec.id.clone();
102        let bundle_dir = Self::bundle_root().join(&cell_id);
103        let rootfs_dir = bundle_dir.join("rootfs");
104
105        // Bundle layout: <bundle_dir>/config.json + <bundle_dir>/rootfs/
106        std::fs::create_dir_all(&rootfs_dir).map_err(|e| {
107            CellosError::Host(format!("gvisor: create bundle dir {bundle_dir:?}: {e}"))
108        })?;
109        let config_path = bundle_dir.join("config.json");
110        let json = serde_json::to_vec_pretty(&cfg)
111            .map_err(|e| CellosError::Host(format!("gvisor: serialize config.json: {e}")))?;
112        std::fs::write(&config_path, json)
113            .map_err(|e| CellosError::Host(format!("gvisor: write {config_path:?}: {e}")))?;
114
115        let mut cmd = Command::new(Self::runsc_bin());
116        cmd.arg("run")
117            .arg("--bundle")
118            .arg(&bundle_dir)
119            .arg(&cell_id)
120            .stdin(Stdio::null())
121            .stdout(Stdio::null())
122            .stderr(Stdio::null());
123
124        let child = cmd.spawn().map_err(|e| {
125            CellosError::Host(format!(
126                "gvisor: spawn `runsc run --bundle {bundle_dir:?} {cell_id}` failed: {e}"
127            ))
128        })?;
129
130        self.tracked.lock().await.insert(
131            cell_id.clone(),
132            TrackedCell {
133                bundle_dir: bundle_dir.clone(),
134                child,
135            },
136        );
137
138        Ok(CellHandle {
139            cell_id,
140            cgroup_path: None,
141            // gVisor manages its own network namespace; we do not apply
142            // host-side nftables in this skeleton. The supervisor's
143            // host-subprocess fallback surfaces the signal when the spec
144            // declares egress.
145            nft_rules_applied: None,
146            kernel_digest_sha256: None,
147            rootfs_digest_sha256: None,
148            firecracker_digest_sha256: None,
149        })
150    }
151
152    #[instrument(skip(self))]
153    async fn wait_for_in_vm_exit(&self, cell_id: &str) -> Option<Result<i32, CellosError>> {
154        // Pull the tracked entry out so we can await the child without
155        // holding the map lock across the await point.
156        let mut tracked = self.tracked.lock().await;
157        let entry = tracked.remove(cell_id)?;
158        drop(tracked);
159
160        let TrackedCell {
161            bundle_dir,
162            mut child,
163        } = entry;
164
165        let status = match child.wait().await {
166            Ok(s) => s,
167            Err(e) => {
168                return Some(Err(CellosError::Host(format!(
169                    "gvisor: wait for runsc child of {cell_id}: {e}"
170                ))));
171            }
172        };
173        // Bundle dir cleanup is owned by destroy(); leaving it here so
174        // post-mortem inspection still works between wait and destroy.
175        let _ = bundle_dir;
176        Some(Ok(status.code().unwrap_or(-1)))
177    }
178
179    #[instrument(skip(self, handle), fields(cell_id = %handle.cell_id))]
180    async fn destroy(&self, handle: &CellHandle) -> Result<TeardownReport, CellosError> {
181        let mut tracked = self.tracked.lock().await;
182        let entry = tracked.remove(&handle.cell_id);
183        let still_tracked = tracked.len();
184        drop(tracked);
185
186        // Best-effort: kill the container, then delete it. We never error
187        // out the teardown report on these — gVisor's own state is the
188        // authority on whether the cell exists.
189        let runsc = Self::runsc_bin();
190        for (sub, args) in [
191            ("kill", vec!["kill", &handle.cell_id, "SIGKILL"]),
192            ("delete", vec!["delete", &handle.cell_id]),
193        ] {
194            let res = Command::new(&runsc)
195                .args(&args)
196                .stdin(Stdio::null())
197                .stdout(Stdio::null())
198                .stderr(Stdio::null())
199                .status()
200                .await;
201            if let Err(e) = res {
202                warn!(error = %e, "gvisor: `runsc {sub} {}` failed (continuing)", handle.cell_id);
203            }
204        }
205
206        // Best-effort bundle cleanup.
207        if let Some(t) = entry {
208            if let Err(e) = std::fs::remove_dir_all(&t.bundle_dir) {
209                warn!(error = %e, bundle = ?t.bundle_dir, "gvisor: bundle cleanup failed");
210            }
211        }
212
213        Ok(TeardownReport {
214            cell_id: handle.cell_id.clone(),
215            destroyed: true,
216            peers_tracked_after: still_tracked,
217        })
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn runsc_bin_respects_env_override() {
227        // SAFETY: the test uses a process-wide env mutation but the keys
228        // are unique to this crate; concurrent runs would conflict only
229        // with each other. Cargo test isolation is sufficient.
230        let prev = std::env::var(RUNSC_BIN_ENV).ok();
231        std::env::set_var(RUNSC_BIN_ENV, "/usr/local/bin/my-runsc");
232        assert_eq!(GVisorCellBackend::runsc_bin(), "/usr/local/bin/my-runsc");
233        match prev {
234            Some(v) => std::env::set_var(RUNSC_BIN_ENV, v),
235            None => std::env::remove_var(RUNSC_BIN_ENV),
236        }
237    }
238
239    #[test]
240    fn bundle_root_respects_env_override() {
241        let prev = std::env::var(BUNDLE_ROOT_ENV).ok();
242        std::env::set_var(BUNDLE_ROOT_ENV, "/var/lib/cellos-gvisor-test");
243        assert_eq!(
244            GVisorCellBackend::bundle_root(),
245            PathBuf::from("/var/lib/cellos-gvisor-test")
246        );
247        match prev {
248            Some(v) => std::env::set_var(BUNDLE_ROOT_ENV, v),
249            None => std::env::remove_var(BUNDLE_ROOT_ENV),
250        }
251    }
252}