cellos-supervisor 0.5.1

CellOS execution-cell runner — boots cells in Firecracker microVMs or gVisor, enforces narrow typed authority, emits signed CloudEvents.
Documentation
//! FC-12 — `cellos-init` runs as PID 1 inside the Firecracker microVM.
//!
//! Acceptance gate (from `Plans/firecracker-release-readiness.md` §FC-12):
//!
//! > FC-12: `cellos-init` runs as PID 1. Acceptance: e2e workload reads
//! > `/proc/1/comm` and asserts the value is `cellos-init`.
//!
//! # Why this lives next to `firecracker_e2e.rs`
//!
//! The crate this slot's brief calls "host-firecracker" already has its
//! own `tests/` directory (FC-14, FC-19, FC-34 e2e harnesses), so per the
//! brief's conditional this file is placed under the supervisor's tests
//! directory instead — alongside `firecracker_e2e.rs`, whose launch
//! pattern (build a cell spec, set `CELLOS_CELL_BACKEND=firecracker`, spawn
//! the supervisor binary as a subprocess) we mirror here.
//!
//! # Encoding strategy: cell exit code → command.v1.completed event
//!
//! The brief offered two options for surfacing `/proc/1/comm` from inside
//! the guest back to the host test:
//!
//!   (a) Write the value to a result file under a `/shared` mount.
//!   (b) Encode PASS/FAIL as the workload's exit status.
//!
//! There is no `/shared` mount in the `ExecutionCell` spec format
//! (`contracts/schemas/execution-cell.schema.json` has no field for
//! host-shared paths), and the existing FC-14/FC-19 e2e harnesses follow
//! the same pattern of round-tripping evidence through a channel that
//! already exists in the production protocol. So this test takes
//! option (b): the workload is `test "$(cat /proc/1/comm)" = cellos-init`,
//! which exits 0 on match and 1 on mismatch. `cellos-init` forwards that
//! exit code over vsock; the supervisor records it as `data.exitCode` on
//! the `dev.cellos.events.cell.command.v1.completed` CloudEvent emitted
//! into the export directory.
//!
//! Asserting on the event payload (rather than just on the supervisor
//! process's exit status) is deliberate: the supervisor returns non-zero
//! for many failure shapes (spawn error, vsock timeout, teardown error),
//! so a bare `status.success()` check would conflate "PID 1 was wrong"
//! with "infrastructure broke before the workload ran". Reading the
//! recorded `exitCode` pins the assertion to the workload's evidence.
//!
//! # Skip-on-non-KVM: same env-gating as `firecracker_e2e.rs`
//!
//! The same five preconditions as `firecracker_e2e.rs` apply (KVM device,
//! the four `CELLOS_FIRECRACKER_*` env vars, file existence on disk,
//! socket dir, supervisor binary). Missing any of them is a SKIP, not a
//! failure — local dev machines won't have them, the firecracker-e2e CI
//! workflow does. We use the env-var skip pattern rather than `#[ignore]`
//! to match the sibling test exactly; an `#[ignore]`-gated test would
//! require an extra `--ignored` flag in the CI invocation that
//! `firecracker_e2e.rs` does not require.

#![cfg(target_os = "linux")]

use std::fs::{self, File};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::time::{Duration, Instant};

/// Required Firecracker env vars. Missing any of them is a skip, not a
/// failure — kept in sync with `firecracker_e2e.rs`.
const REQUIRED_ENV: &[&str] = &[
    "CELLOS_FIRECRACKER_BINARY",
    "CELLOS_FIRECRACKER_KERNEL_IMAGE",
    "CELLOS_FIRECRACKER_ROOTFS_IMAGE",
    "CELLOS_FIRECRACKER_SOCKET_DIR",
];

/// Resolves the supervisor binary path. Mirrors `firecracker_e2e.rs` —
/// Cargo sets `CARGO_BIN_EXE_cellos-supervisor` for in-package
/// integration tests; we accept both hyphen and underscore variants and
/// `CELLOS_SUPERVISOR_BIN` as an explicit override.
fn supervisor_exe() -> PathBuf {
    if let Some(p) = std::env::var_os("CELLOS_SUPERVISOR_BIN") {
        return PathBuf::from(p);
    }
    for key in [
        "CARGO_BIN_EXE_cellos-supervisor",
        "CARGO_BIN_EXE_cellos_supervisor",
    ] {
        if let Some(p) = std::env::var_os(key) {
            return PathBuf::from(p);
        }
    }
    let crate_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
    let workspace = crate_dir
        .parent()
        .and_then(|p| p.parent())
        .expect("cellos-supervisor sits two levels under workspace root");
    let profile = std::env::var("PROFILE").unwrap_or_else(|_| "release".into());
    workspace
        .join("target")
        .join(profile)
        .join("cellos-supervisor")
}

/// Backward-compatibility alias bridge — `firecracker_e2e.rs` uses the
/// same shim. Some host backends accept `CELLOS_FIRECRACKER_ROOTFS` as
/// shorthand for `CELLOS_FIRECRACKER_ROOTFS_IMAGE`; bridge the short form
/// into the long form so the supervisor finds it.
fn handle_rootfs_alias() {
    let long = std::env::var_os("CELLOS_FIRECRACKER_ROOTFS_IMAGE");
    let short = std::env::var_os("CELLOS_FIRECRACKER_ROOTFS");
    match (long, short) {
        (Some(_), _) => {}
        (None, Some(s)) => std::env::set_var("CELLOS_FIRECRACKER_ROOTFS_IMAGE", s),
        _ => {}
    }
}

fn skip(reason: &str) {
    eprintln!("fc12_pid1_assertion: skipping — {reason}");
}

/// Walk `dir` recursively and collect every `*.jsonl` path. Used to find
/// the supervisor's emitted `command.v1.completed` event after the cell
/// exits. Bounded by directory depth in practice (the export dir layout
/// is shallow), so a vector-based DFS is sufficient.
fn collect_jsonl_files(dir: &Path) -> Vec<PathBuf> {
    let mut out = Vec::new();
    let mut walker = vec![dir.to_path_buf()];
    while let Some(d) = walker.pop() {
        let entries = match fs::read_dir(&d) {
            Ok(it) => it,
            Err(_) => continue,
        };
        for entry in entries.flatten() {
            let path = entry.path();
            if path.is_dir() {
                walker.push(path);
            } else if path.extension().and_then(|s| s.to_str()) == Some("jsonl") {
                out.push(path);
            }
        }
    }
    out
}

/// Find the first `dev.cellos.events.cell.command.v1.completed` event in
/// the JSONL stream(s) under `export_dir` and return its `data.exitCode`.
///
/// Returns `Err` if no completed event is present — the supervisor MUST
/// emit one for every cell that reaches the workload-spawn phase, so a
/// missing event is itself a regression.
fn read_command_exit_code(export_dir: &Path) -> Result<i32, String> {
    const TYPE_NEEDLE: &str = "dev.cellos.events.cell.command.v1.completed";

    for path in collect_jsonl_files(export_dir) {
        let content = match fs::read_to_string(&path) {
            Ok(c) => c,
            Err(e) => {
                eprintln!("fc12: skipping unreadable jsonl {}: {e}", path.display());
                continue;
            }
        };
        for line in content.lines() {
            let line = line.trim();
            if line.is_empty() {
                continue;
            }
            let value: serde_json::Value = match serde_json::from_str(line) {
                Ok(v) => v,
                Err(e) => {
                    eprintln!(
                        "fc12: skipping malformed jsonl line in {}: {e}",
                        path.display()
                    );
                    continue;
                }
            };
            // CloudEvents v1 puts the event type in the top-level `type` field.
            let ty = value.get("type").and_then(|v| v.as_str()).unwrap_or("");
            if ty != TYPE_NEEDLE {
                continue;
            }
            let exit_code = value
                .get("data")
                .and_then(|d| d.get("exitCode"))
                .and_then(|v| v.as_i64())
                .ok_or_else(|| {
                    format!(
                        "found {TYPE_NEEDLE} event in {} but data.exitCode missing or not i64; \
                         payload: {line}",
                        path.display()
                    )
                })?;
            return Ok(exit_code as i32);
        }
    }
    Err(format!(
        "no {TYPE_NEEDLE} event found under {}; either the supervisor never \
         spawned the workload (precondition failure) or the export sink did not \
         flush before the supervisor exited",
        export_dir.display()
    ))
}

/// FC-12 acceptance gate: spawn the supervisor with a workload that
/// asserts `/proc/1/comm == cellos-init` via shell exit status, then read
/// the recorded `command.v1.completed.data.exitCode` from the export dir
/// and assert it is zero.
///
/// Skipped (not failed) when KVM, env vars, files, socket dir, or the
/// supervisor binary are unavailable — same gating shape as
/// `firecracker_e2e.rs` so this test joins the same CI lane without
/// needing a separate enable flag.
#[test]
fn cellos_init_runs_as_pid1_in_firecracker_microvm() {
    // Precondition 1: KVM device.
    if !Path::new("/dev/kvm").exists() {
        skip("/dev/kvm not present (no KVM on this host)");
        return;
    }

    handle_rootfs_alias();

    // Precondition 2: required env vars.
    let missing: Vec<&str> = REQUIRED_ENV
        .iter()
        .copied()
        .filter(|k| std::env::var_os(k).is_none())
        .collect();
    if !missing.is_empty() {
        skip(&format!("missing env: {}", missing.join(", ")));
        return;
    }

    // Precondition 3: required files exist on disk.
    for key in [
        "CELLOS_FIRECRACKER_BINARY",
        "CELLOS_FIRECRACKER_KERNEL_IMAGE",
        "CELLOS_FIRECRACKER_ROOTFS_IMAGE",
    ] {
        let path = std::env::var(key).expect("checked above");
        if !Path::new(&path).exists() {
            skip(&format!("{key}={path} does not exist on disk"));
            return;
        }
    }

    // Precondition 4: socket dir exists (or is creatable).
    let sock_dir = std::env::var("CELLOS_FIRECRACKER_SOCKET_DIR").expect("checked");
    if !Path::new(&sock_dir).is_dir() {
        if fs::create_dir_all(&sock_dir).is_err() {
            skip(&format!("socket dir {sock_dir} not creatable"));
            return;
        }
    }

    // Precondition 5: supervisor binary is built.
    let exe = supervisor_exe();
    if !exe.is_file() {
        skip(&format!(
            "supervisor binary missing at {} — run \
             `cargo build -p cellos-supervisor --release`",
            exe.display()
        ));
        return;
    }

    // Build the cell spec.
    //
    // argv encodes the FC-12 assertion: `test "$(cat /proc/1/comm)" =
    // cellos-init` exits 0 on match and 1 on mismatch. `/bin/sh` and
    // `cat` are present in the standard CellOS Alpine rootfs (the same
    // rootfs `firecracker_e2e.rs` runs `/bin/true` from). The `test`
    // builtin is shell-internal so no extra binary path needs to be
    // declared.
    let tmp = tempfile::tempdir().expect("tempdir");
    let spec_path = tmp.path().join("cell.json");
    let spec_json = r#"{
  "apiVersion": "cellos.io/v1",
  "kind": "ExecutionCell",
  "spec": {
    "id": "fc-12-pid1-assertion",
    "authority": { "secretRefs": [], "egressRules": [] },
    "lifetime": { "ttlSeconds": 30 },
    "run": {
      "secretDelivery": "env",
      "argv": ["/bin/sh", "-c", "test \"$(cat /proc/1/comm)\" = cellos-init"],
      "timeoutMs": 20000,
      "limits": { "memoryMaxBytes": 67108864 }
    }
  }
}"#;
    File::create(&spec_path)
        .and_then(|mut f| f.write_all(spec_json.as_bytes()))
        .expect("write cell spec");

    // Per-run export dir so we can read the command.v1.completed event.
    let export_dir = tmp.path().join("events");
    fs::create_dir_all(&export_dir).expect("mkdir export dir");

    let mut cmd = Command::new(&exe);
    cmd.env("CELL_OS_USE_NOOP_SINK", "1") // disable NATS sink
        .env("CELLOS_CELL_BACKEND", "firecracker")
        .env("CELLOS_EXPORT_DIR", &export_dir)
        .env("RUST_BACKTRACE", "1")
        .arg(&spec_path)
        .stdout(Stdio::piped())
        .stderr(Stdio::piped());

    // Forward all CELLOS_FIRECRACKER_* vars the harness set up.
    for (k, v) in std::env::vars_os() {
        if k.to_string_lossy().starts_with("CELLOS_FIRECRACKER_") {
            cmd.env(&k, &v);
        }
    }

    eprintln!("fc12_pid1_assertion: spawning supervisor {}", exe.display());
    let mut child = cmd.spawn().expect("spawn supervisor");

    // 30s wait with a poll loop (same budget as firecracker_e2e.rs).
    let deadline = Instant::now() + Duration::from_secs(30);
    let status = loop {
        match child.try_wait().expect("try_wait") {
            Some(status) => break status,
            None if Instant::now() >= deadline => {
                let _ = child.kill();
                let _ = child.wait();
                panic!("supervisor did not exit within 30s");
            }
            None => std::thread::sleep(Duration::from_millis(200)),
        }
    };

    // Capture stdout/stderr for diagnostics regardless of pass/fail. The
    // supervisor's process exit status is INTENTIONALLY NOT asserted on
    // here — see the file header. We assert on the recorded
    // `command.v1.completed.data.exitCode` instead, which is the
    // workload's evidence rather than the supervisor's overall outcome.
    let mut stderr_buf = String::new();
    let mut stdout_buf = String::new();
    if let Some(mut s) = child.stderr.take() {
        use std::io::Read;
        let _ = s.read_to_string(&mut stderr_buf);
    }
    if let Some(mut s) = child.stdout.take() {
        use std::io::Read;
        let _ = s.read_to_string(&mut stdout_buf);
    }

    let exit_code = read_command_exit_code(&export_dir).unwrap_or_else(|e| {
        panic!(
            "FC-12 evidence missing: {e}\n\
             supervisor process status: {status:?}\n\
             --- stderr ---\n{stderr_buf}\n--- stdout ---\n{stdout_buf}"
        )
    });

    assert_eq!(
        exit_code, 0,
        "FC-12 violation: workload exited with code {exit_code} (expected 0). \
         The workload is `test \"$(cat /proc/1/comm)\" = cellos-init`; a non-zero \
         exit means PID 1's `comm` field was something other than `cellos-init`. \
         Re-check the Firecracker boot path's `/sbin/init` symlink and the \
         cellos-init binary's argv[0].\n\
         supervisor process status: {status:?}\n\
         --- stderr ---\n{stderr_buf}\n--- stdout ---\n{stdout_buf}"
    );

    // Drop tmpdir last so artifacts stay visible through any panic above.
    drop(tmp);
}