cellos-host-firecracker 0.5.1

//! FC-22 — SIGKILL fallback regression after a SIGTERM-ignoring workload.
//!
//! Acceptance gate (from [Plans/firecracker-release-readiness.md]):
//!
//! > FC-22: graceful shutdown is bounded; SIGKILL is the fallback. A workload
//! > that traps SIGTERM/SIGINT and sleeps forever must NOT keep the supervisor
//! > pinned past `GRACEFUL_SHUTDOWN_TIMEOUT`. The test asserts the supervisor
//! > reaches a terminal state within `GRACEFUL_SHUTDOWN_TIMEOUT + 2 sec` of
//! > teardown start, and the resulting `cell.lifecycle.v1.destroyed` event
//! > carries `outcome: "failed"` + `terminalState: "forced"`.
//!
//! # What this file tests
//!
//! Wire-shape of the SIGKILL fallback at the host-backend layer, observed
//! through the supervisor:
//!
//!   1. Boot a Firecracker microVM running `cellos-init`.
//!   2. `cellos.argv` runs `sh -c "trap '' TERM INT; sleep 9999"` so neither
//!      SendCtrlAltDel (which the guest kernel translates to SIGINT for PID 1)
//!      nor a polite SIGTERM from the host can convince the workload to exit.
//!   3. `lifetime.ttlSeconds: 1` plus `CELLOS_FIRECRACKER_ALLOW_NO_VSOCK=1`
//!      with a short `CELLOS_FIRECRACKER_NO_VSOCK_TIMEOUT_SECS` bounds the
//!      in-VM exit wait so the supervisor enters teardown promptly. Without
//!      `ALLOW_NO_VSOCK` the supervisor would block on `wait_for_in_vm_exit`
//!      indefinitely — that's a separate failure mode (FC-19/SEAM-19), not
//!      what FC-22 covers.
//!   4. Inside `destroy()`, the host-firecracker backend issues
//!      `SendCtrlAltDel` over the Firecracker API; the guest's traps swallow
//!      the resulting signal. After [`GRACEFUL_SHUTDOWN_TIMEOUT`] the backend
//!      sends `SIGKILL` to the Firecracker VMM child process — that is the
//!      fallback this test regression-locks.
//!   5. The supervisor emits `cell.lifecycle.v1.destroyed` with
//!      `outcome: "failed"` (phase error from the bounded vsock wait) and
//!      `terminalState: "forced"` (in-VM bridge did not deliver an
//!      authenticated exit code).
//!
//! [`GRACEFUL_SHUTDOWN_TIMEOUT`]: ../../src/lib.rs (5 s as of authoring)
//!
//! # Skip-on-no-Firecracker gate
//!
//! Same gating as `crates/cellos-supervisor/tests/firecracker_e2e.rs`:
//!
//!   * `/dev/kvm` must exist;
//!   * `CELLOS_FIRECRACKER_BINARY`, `CELLOS_FIRECRACKER_KERNEL_IMAGE`,
//!     `CELLOS_FIRECRACKER_ROOTFS_IMAGE` (or the shorter `_ROOTFS` alias),
//!     and `CELLOS_FIRECRACKER_SOCKET_DIR` must be set;
//!   * the `cellos-supervisor` binary must be locatable, either via
//!     `CELLOS_SUPERVISOR_BIN` or under `<workspace>/target/{release,debug}`.
//!
//! Anything missing prints a single `firecracker_e2e: skipping — <reason>`
//! line and returns OK. CI runs the full path from
//! `.github/workflows/firecracker-e2e.yml`; local dev hosts skip cleanly.
//!
//! # Linux-only
//!
//! `cellos-host-firecracker` is Linux-only at the crate level (KVM, vsock,
//! TAP, nftables); the rest of the workspace stubs the type for non-Linux
//! composition. Mirroring the gate used by `fc14_capbnd_empty.rs` and
//! `fc19_vsock_exit_ack.rs` keeps Windows/macOS authoring legs green.

#![cfg(target_os = "linux")]

use std::fs::{self, File};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::time::{Duration, Instant};

/// Required Firecracker env vars. Missing any of them is a skip, not a
/// failure — local dev machines won't have them, CI does.
const REQUIRED_ENV: &[&str] = &[
    "CELLOS_FIRECRACKER_BINARY",
    "CELLOS_FIRECRACKER_KERNEL_IMAGE",
    "CELLOS_FIRECRACKER_ROOTFS_IMAGE",
    "CELLOS_FIRECRACKER_SOCKET_DIR",
];

/// Mirror of the constant in `cellos-host-firecracker::lib`. Replicated here
/// (not re-exported from the crate) so the regression budget is visible at
/// the assertion call site — if the production constant moves, this number
/// must move with it; the duplication is a deliberate tripwire, not a leak.
const GRACEFUL_SHUTDOWN_TIMEOUT_SECS: u64 = 5;

/// Slack budget on top of `GRACEFUL_SHUTDOWN_TIMEOUT`. The brief (FC-22)
/// names ε = 2 s; that covers the API round-trip for `SendCtrlAltDel`,
/// scheduling jitter on a busy CI runner, and the post-SIGKILL `wait()` +
/// JSONL flush. Going tighter than 2 s flakes; going wider hides a real
/// regression.
const TEARDOWN_SLACK_SECS: u64 = 2;

/// Bounded wait the supervisor uses to give up on the vsock exit-code
/// bridge so destroy() runs in finite time. 2 s is generous compared to
/// real (sub-millisecond) round-trips and tight enough that the surrounding
/// budget remains dominated by `GRACEFUL_SHUTDOWN_TIMEOUT` rather than
/// in-VM-wait latency.
const NO_VSOCK_TIMEOUT_SECS: u64 = 2;

/// Per-test wall-clock ceiling. VM boot + the bounded vsock wait
/// (`NO_VSOCK_TIMEOUT_SECS`) + `GRACEFUL_SHUTDOWN_TIMEOUT_SECS` + slack +
/// JSONL export. 30 s mirrors the timeout in `firecracker_e2e.rs`; the
/// fine-grained "≤7 s from teardown start" assertion is computed
/// separately from JSONL event timestamps below.
const SUPERVISOR_DEADLINE: Duration = Duration::from_secs(30);

/// Logs the skip reason and returns; mirrors the `firecracker_e2e` helper.
fn skip(reason: &str) {
    eprintln!("firecracker_e2e: skipping — {reason}");
}

/// Resolve the supervisor binary path. The `cellos-host-firecracker` crate
/// has no binary of its own, so `CARGO_BIN_EXE_*` is not set inside this
/// test; we look up `CELLOS_SUPERVISOR_BIN` (the documented override) first
/// and fall back to `<workspace>/target/{release,debug}/cellos-supervisor`,
/// which is what `scripts/firecracker/ci-smoke-test.sh` does.
fn supervisor_exe() -> Option<PathBuf> {
    if let Some(p) = std::env::var_os("CELLOS_SUPERVISOR_BIN") {
        let path = PathBuf::from(p);
        if path.is_file() {
            return Some(path);
        }
    }
    // Fallback: walk up to the workspace root and try the standard target
    // layout. CARGO_MANIFEST_DIR points at this crate's directory.
    let crate_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
    let workspace = crate_dir.parent()?.parent()?;
    for profile in ["release", "debug"] {
        let candidate = workspace
            .join("target")
            .join(profile)
            .join("cellos-supervisor");
        if candidate.is_file() {
            return Some(candidate);
        }
    }
    None
}

/// Bridge ROOTFS aliases — same logic as `firecracker_e2e.rs`. The runner
/// docs use the long form; the lane-D task spec used the short form. Both
/// must work so this test survives runner reshuffles.
fn handle_rootfs_alias() {
    let long = std::env::var_os("CELLOS_FIRECRACKER_ROOTFS_IMAGE");
    let short = std::env::var_os("CELLOS_FIRECRACKER_ROOTFS");
    match (long, short) {
        (Some(_), _) => {}
        (None, Some(s)) => std::env::set_var("CELLOS_FIRECRACKER_ROOTFS_IMAGE", s),
        _ => {}
    }
}

/// Walk `dir` recursively and return all `.jsonl` paths found. The
/// supervisor exports per-cell event files under
/// `<export_dir>/<cell_id>/...jsonl`; the exact subdirectory layout is an
/// implementation detail so we walk the whole tree rather than encoding
/// it.
fn collect_jsonl(dir: &Path) -> Vec<PathBuf> {
    let mut out = Vec::new();
    let mut walker = vec![dir.to_path_buf()];
    while let Some(d) = walker.pop() {
        let entries = match fs::read_dir(&d) {
            Ok(it) => it,
            Err(_) => continue,
        };
        for entry in entries.flatten() {
            let path = entry.path();
            if path.is_dir() {
                walker.push(path);
            } else if path.extension().and_then(|s| s.to_str()) == Some("jsonl") {
                out.push(path);
            }
        }
    }
    out
}

/// Parse every JSONL line in `paths` into `(type, time, data)` tuples,
/// preserving file order. Lines that fail to parse as cloud-event-shaped
/// JSON are skipped — the supervisor's exporter writes well-formed lines
/// in production, but a partial-flush at SIGKILL is plausible and we'd
/// rather succeed on the events we did get.
fn read_events(paths: &[PathBuf]) -> Vec<(String, String, serde_json::Value)> {
    let mut events = Vec::new();
    for p in paths {
        let raw = match fs::read_to_string(p) {
            Ok(s) => s,
            Err(_) => continue,
        };
        for line in raw.lines() {
            let line = line.trim();
            if line.is_empty() {
                continue;
            }
            let v: serde_json::Value = match serde_json::from_str(line) {
                Ok(v) => v,
                Err(_) => continue,
            };
            let ty = v
                .get("type")
                .and_then(|x| x.as_str())
                .unwrap_or("")
                .to_string();
            let time = v
                .get("time")
                .and_then(|x| x.as_str())
                .unwrap_or("")
                .to_string();
            let data = v.get("data").cloned().unwrap_or(serde_json::Value::Null);
            events.push((ty, time, data));
        }
    }
    events
}

/// Parse an RFC 3339 timestamp into a `chrono::DateTime<Utc>`-equivalent
/// `SystemTime`. We use the same string-parsing approach as the supervisor
/// (`chrono::Utc::now().to_rfc3339()` produces these strings) but go via
/// `time::OffsetDateTime` here would require a new dep — so this hand-rolls
/// a tolerant parser that consumes the supervisor's exact emission shape.
///
/// The supervisor emits `chrono::Utc::now().to_rfc3339()` which has the
/// shape `YYYY-MM-DDTHH:MM:SS.fff+00:00` (or `Z`). We extract the absolute
/// instant as nanoseconds since the Unix epoch and return it as i128 so
/// the caller can subtract two such instants without underflow concerns.
fn parse_rfc3339_to_unix_nanos(ts: &str) -> Option<i128> {
    // Cheap path: split off any timezone suffix and parse the YMD-HMS.fff.
    // We only need to subtract two timestamps that are very close in wall
    // clock — sub-microsecond fidelity isn't required, but parsing must
    // not silently drop the fractional seconds (otherwise a SIGKILL
    // fallback that fires in 4.999 s would round to 5 s and fail the
    // bound by a hair).
    let (date_part, time_part_with_tz) = ts.split_once('T')?;
    let mut dp = date_part.split('-');
    let y: i32 = dp.next()?.parse().ok()?;
    let mo: u32 = dp.next()?.parse().ok()?;
    let d: u32 = dp.next()?.parse().ok()?;

    // Strip trailing tz designator (Z or ±HH:MM). For our use we only
    // consume timestamps emitted on the same host so the offsets cancel
    // exactly when subtracting; we parse the offset only enough to find
    // where it starts.
    let (hms_frac, tz) = if let Some(idx) = time_part_with_tz.rfind(['+', '-', 'Z']) {
        // Guard against the '-' inside the date part already stripped (we
        // split on 'T' so 'time_part_with_tz' starts with HH:..., no
        // dashes for the date).
        (&time_part_with_tz[..idx], &time_part_with_tz[idx..])
    } else {
        (time_part_with_tz, "")
    };

    let mut tp = hms_frac.split(':');
    let h: u32 = tp.next()?.parse().ok()?;
    let mi: u32 = tp.next()?.parse().ok()?;
    let s_with_frac = tp.next()?;
    let (sec_str, frac_str) = match s_with_frac.split_once('.') {
        Some((s, f)) => (s, f),
        None => (s_with_frac, ""),
    };
    let s: u32 = sec_str.parse().ok()?;
    // Pad/truncate fractional seconds to 9 digits (nanos).
    let mut nanos: u32 = 0;
    let mut chars = frac_str.chars();
    for i in 0..9 {
        let digit = chars.next().unwrap_or('0');
        let v = digit.to_digit(10)?;
        nanos += v * 10u32.pow(8 - i);
    }

    // Compute days since Unix epoch using the standard civil-from-days
    // formula (Howard Hinnant, public domain). Avoids pulling in chrono.
    let yy = y - if mo <= 2 { 1 } else { 0 };
    let era = if yy >= 0 { yy } else { yy - 399 } / 400;
    let yoe = (yy - era * 400) as i64;
    let doy = (153 * (mo as i64 + if mo > 2 { -3 } else { 9 }) + 2) / 5 + d as i64 - 1;
    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
    let days_since_epoch = era as i64 * 146097 + doe - 719468;

    let secs: i128 =
        days_since_epoch as i128 * 86_400 + h as i128 * 3_600 + mi as i128 * 60 + s as i128;
    let mut total_nanos: i128 = secs * 1_000_000_000 + nanos as i128;

    // Apply timezone offset. `Z` means UTC (no adjustment).
    if !tz.is_empty() && tz != "Z" {
        let sign: i128 = if tz.starts_with('+') { -1 } else { 1 };
        let body = &tz[1..];
        let mut bp = body.split(':');
        let oh: i128 = bp.next()?.parse().ok()?;
        let om: i128 = bp.next().unwrap_or("0").parse().ok()?;
        // To convert "wall time at offset +HH:MM" to UTC, subtract the
        // offset; sign above flips for '-'.
        total_nanos += sign * (oh * 3_600 + om * 60) * 1_000_000_000;
    }

    Some(total_nanos)
}

#[test]
fn fc22_sigkill_fallback_after_sigterm_ignoring_workload() {
    // Precondition 1: KVM device.
    if !Path::new("/dev/kvm").exists() {
        skip("/dev/kvm not present (no KVM on this host)");
        return;
    }

    // Bridge ROOTFS aliases before checking required vars.
    handle_rootfs_alias();

    // Precondition 2: required env vars.
    let missing: Vec<&str> = REQUIRED_ENV
        .iter()
        .copied()
        .filter(|k| std::env::var_os(k).is_none())
        .collect();
    if !missing.is_empty() {
        skip(&format!("missing env: {}", missing.join(", ")));
        return;
    }

    // Precondition 3: required files exist on disk.
    for key in [
        "CELLOS_FIRECRACKER_BINARY",
        "CELLOS_FIRECRACKER_KERNEL_IMAGE",
        "CELLOS_FIRECRACKER_ROOTFS_IMAGE",
    ] {
        let path = std::env::var(key).expect("checked above");
        if !Path::new(&path).exists() {
            skip(&format!("{key}={path} does not exist on disk"));
            return;
        }
    }

    // Precondition 4: socket dir exists (or is creatable).
    let sock_dir = std::env::var("CELLOS_FIRECRACKER_SOCKET_DIR").expect("checked");
    if !Path::new(&sock_dir).is_dir() && fs::create_dir_all(&sock_dir).is_err() {
        skip(&format!("socket dir {sock_dir} not creatable"));
        return;
    }

    // Precondition 5: supervisor binary is reachable.
    let exe = match supervisor_exe() {
        Some(e) => e,
        None => {
            skip(
                "supervisor binary not found — set CELLOS_SUPERVISOR_BIN or build cellos-supervisor",
            );
            return;
        }
    };

    // Build the cell spec. The workload is a shell command that traps
    // SIGTERM/SIGINT with the empty action and then sleeps indefinitely.
    // BusyBox's `sleep` accepts `9999` seconds; `sleep infinity` is a
    // GNU-ism that does NOT round-trip on Alpine's /bin/sh.
    let tmp = tempfile::tempdir().expect("tempdir");
    let spec_path = tmp.path().join("cell.json");
    let spec_json = r#"{
  "apiVersion": "cellos.io/v1",
  "kind": "ExecutionCell",
  "spec": {
    "id": "fc-22-sigkill",
    "authority": { "secretRefs": [], "egressRules": [] },
    "lifetime": { "ttlSeconds": 1 },
    "run": {
      "argv": ["/bin/sh", "-c", "trap '' TERM INT; sleep 9999"],
      "limits": { "memoryMaxBytes": 67108864 }
    }
  }
}"#;
    File::create(&spec_path)
        .and_then(|mut f| f.write_all(spec_json.as_bytes()))
        .expect("write cell spec");

    // Per-run export dir so we can read and assert event JSONL.
    let export_dir = tmp.path().join("events");
    fs::create_dir_all(&export_dir).expect("mkdir export dir");

    // Build the supervisor command. Two FC-22-specific knobs on top of the
    // normal e2e env:
    //
    //  * CELLOS_FIRECRACKER_ALLOW_NO_VSOCK=1 + a short
    //    CELLOS_FIRECRACKER_NO_VSOCK_TIMEOUT_SECS bound the in-VM exit-code
    //    wait. Without this, the supervisor would block forever on
    //    `wait_for_in_vm_exit` because the workload never delivers an exit
    //    code over the vsock bridge — SIGKILL fallback inside `destroy()`
    //    is the path under test, not the in-VM-wait timeout itself.
    //
    //  * CELL_OS_USE_NOOP_SINK=1 disables the NATS sink so the test does
    //    not require an event-bus.
    let mut cmd = Command::new(&exe);
    cmd.env("CELL_OS_USE_NOOP_SINK", "1")
        .env("CELLOS_CELL_BACKEND", "firecracker")
        .env("CELLOS_EXPORT_DIR", &export_dir)
        .env("RUST_BACKTRACE", "1")
        .arg(&spec_path)
        .stdout(Stdio::piped())
        .stderr(Stdio::piped());

    // Inherit every CELLOS_FIRECRACKER_* var the harness set up so the
    // supervisor sees the same kernel / rootfs / socket layout the
    // upstream `firecracker_e2e` test does.
    for (k, v) in std::env::vars_os() {
        if k.to_string_lossy().starts_with("CELLOS_FIRECRACKER_") {
            cmd.env(&k, &v);
        }
    }

    // Set the FC-22-specific overrides AFTER the inherit loop so they
    // win even if the surrounding harness exported a stale value.
    cmd.env("CELLOS_FIRECRACKER_ALLOW_NO_VSOCK", "1");
    cmd.env(
        "CELLOS_FIRECRACKER_NO_VSOCK_TIMEOUT_SECS",
        NO_VSOCK_TIMEOUT_SECS.to_string(),
    );

    eprintln!(
        "fc22_sigkill_fallback: spawning supervisor {}",
        exe.display()
    );
    let supervisor_started = Instant::now();
    let mut child = cmd.spawn().expect("spawn supervisor");

    // Poll-loop wait. The full path (boot → run → bounded vsock wait →
    // teardown SIGKILL fallback → exit) should complete well under
    // SUPERVISOR_DEADLINE on a healthy runner; that ceiling exists so a
    // wedged test fails loudly rather than hanging the whole CI leg.
    let deadline = supervisor_started + SUPERVISOR_DEADLINE;
    let status = loop {
        match child.try_wait().expect("try_wait") {
            Some(status) => break status,
            None if Instant::now() >= deadline => {
                let _ = child.kill();
                let _ = child.wait();
                panic!(
                    "supervisor did not exit within {:?} — SIGKILL fallback regression \
                     (or a deeper hang in `destroy()`); FC-22 budget is \
                     GRACEFUL_SHUTDOWN_TIMEOUT ({GRACEFUL_SHUTDOWN_TIMEOUT_SECS}s) + \
                     {TEARDOWN_SLACK_SECS}s slack from teardown start",
                    SUPERVISOR_DEADLINE
                );
            }
            None => std::thread::sleep(Duration::from_millis(100)),
        }
    };

    // Capture stderr/stdout for diagnostics on failure AND for the
    // "VM did not exit gracefully — sending SIGKILL" warning check below.
    let mut stderr_buf = String::new();
    let mut stdout_buf = String::new();
    if let Some(mut s) = child.stderr.take() {
        use std::io::Read;
        let _ = s.read_to_string(&mut stderr_buf);
    }
    if let Some(mut s) = child.stdout.take() {
        use std::io::Read;
        let _ = s.read_to_string(&mut stdout_buf);
    }

    // The supervisor is EXPECTED to exit non-zero — the run failed (in-VM
    // bridge timeout, then forced teardown). Don't assert success here;
    // assert the lifecycle event shape instead.
    eprintln!(
        "fc22_sigkill_fallback: supervisor exit status = {status:?} \
         (non-zero is expected for the failed-then-forced path)"
    );

    // Assertion 1: the SIGKILL fallback warning must appear in stderr.
    // This is the production marker that `destroy()` reached the
    // post-`GRACEFUL_SHUTDOWN_TIMEOUT` branch — without it, the test
    // proves nothing about the SIGKILL path.
    let sigkill_marker = "VM did not exit gracefully";
    assert!(
        stderr_buf.contains(sigkill_marker),
        "expected `{sigkill_marker}` in supervisor stderr — SIGKILL fallback did not fire \
         (FC-22 regression). \n--- stderr ---\n{stderr_buf}\n--- stdout ---\n{stdout_buf}"
    );

    // Assertion 2: locate the lifecycle.v1.destroyed event in the JSONL
    // export and check its outcome + terminalState fields.
    let jsonl_paths = collect_jsonl(&export_dir);
    assert!(
        !jsonl_paths.is_empty(),
        "no JSONL files under {} — the supervisor did not export any events. \
         \n--- stderr ---\n{stderr_buf}",
        export_dir.display()
    );

    let events = read_events(&jsonl_paths);
    let destroyed = events
        .iter()
        .find(|(ty, _, _)| ty == "dev.cellos.events.cell.lifecycle.v1.destroyed");
    let (_, destroyed_time, destroyed_data) = destroyed.unwrap_or_else(|| {
        panic!(
            "no `cell.lifecycle.v1.destroyed` event found in JSONL export. \
             Got types: {:?}\n--- stderr ---\n{}",
            events.iter().map(|(t, _, _)| t).collect::<Vec<_>>(),
            stderr_buf
        )
    });

    let outcome = destroyed_data
        .get("outcome")
        .and_then(|v| v.as_str())
        .unwrap_or("");
    assert_eq!(
        outcome, "failed",
        "destroyed.outcome must be `failed` (workload trapped TERM/INT and was force-killed); \
         got {outcome:?}. data = {destroyed_data}"
    );

    let terminal_state = destroyed_data
        .get("terminalState")
        .and_then(|v| v.as_str())
        .unwrap_or("");
    assert_eq!(
        terminal_state, "forced",
        "destroyed.terminalState must be `forced` (in-VM bridge did not deliver an \
         authenticated exit code); got {terminal_state:?}. data = {destroyed_data}"
    );

    // Assertion 3: teardown-window budget. The supervisor calls
    // `host.destroy()` immediately after the run loop ends (no event is
    // emitted between the last in-run event and destroy itself), so we
    // approximate "teardown start" as the timestamp of the latest
    // event emitted BEFORE the destroyed event. The wall-clock from
    // there to `destroyed` MUST fit inside `GRACEFUL_SHUTDOWN_TIMEOUT +
    // TEARDOWN_SLACK_SECS`. Using "latest-non-destroyed" rather than a
    // hard-coded event name keeps the test robust if the run-phase
    // event order shifts (e.g. when phase_err short-circuits
    // `command.completed`).
    let teardown_start_ns = events
        .iter()
        .filter(|(ty, _, _)| ty != "dev.cellos.events.cell.lifecycle.v1.destroyed")
        .filter_map(|(_, t, _)| parse_rfc3339_to_unix_nanos(t))
        .max();
    let teardown_end_ns = parse_rfc3339_to_unix_nanos(destroyed_time);
    match (teardown_start_ns, teardown_end_ns) {
        (Some(start_ns), Some(end_ns)) => {
            let teardown_ns = end_ns - start_ns;
            let budget_ns: i128 =
                (GRACEFUL_SHUTDOWN_TIMEOUT_SECS + TEARDOWN_SLACK_SECS) as i128 * 1_000_000_000;
            assert!(
                teardown_ns >= 0 && teardown_ns <= budget_ns,
                "teardown took {} ms — must be ≤ {} ms (= GRACEFUL_SHUTDOWN_TIMEOUT \
                 {GRACEFUL_SHUTDOWN_TIMEOUT_SECS}s + slack {TEARDOWN_SLACK_SECS}s). \
                 destroyed time = {destroyed_time:?}",
                teardown_ns / 1_000_000,
                budget_ns / 1_000_000
            );
        }
        _ => {
            // Best-effort fallback: the supervisor-deadline ceiling
            // panic above already protects against an unbounded SIGKILL
            // hang. The two primary proofs (SIGKILL stderr warning +
            // outcome=failed/terminalState=forced) still hold.
            eprintln!(
                "fc22_sigkill_fallback: could not derive teardown window from JSONL \
                 timestamps; relying on the supervisor-deadline ceiling and the SIGKILL \
                 warning + destroyed-event shape assertions"
            );
        }
    }

    // Drop tmpdir last so artifacts remain readable through the asserts.
    drop(tmp);
}