use std::collections::BTreeSet;
#[derive(Debug, PartialEq, Eq)]
pub enum BootFailedValidationError {
MissingData,
MissingTerminalState,
WrongTerminalState { actual: String },
MissingReason,
WrongReason { actual: String },
}
impl std::fmt::Display for BootFailedValidationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::MissingData => f.write_str(
"FC-59 violation: event payload has no `data` object — schema violation",
),
Self::MissingTerminalState => f.write_str(
"FC-59 violation: data.terminalState missing; the failed-boot path must \
emit terminalState=\"forced\" because no authenticated exit code arrived",
),
Self::WrongTerminalState { actual } => write!(
f,
"FC-59 violation: data.terminalState=\"{actual}\" (expected \"forced\"). \
Either the supervisor mis-classified a forced teardown as clean, or the \
LifecycleTerminalState enum's serde representation drifted."
),
Self::MissingReason => f.write_str(
"FC-59 violation: data.reason missing. FC-50 GAP — when the typed reason \
enum lands, this check must use the typed variant; today reason is a \
free-form Option<&str> so absence is itself a regression.",
),
Self::WrongReason { actual } => write!(
f,
"FC-59 violation: data.reason=\"{actual}\" (expected \"boot_failed\"). \
FC-50 GAP — when reason becomes a typed enum, change this validator to \
compare against TerminalReason::BootFailed and update the runbook \
cross-reference in docs/operator-runbooks.md §VM hung at /sbin/init."
),
}
}
}
impl std::error::Error for BootFailedValidationError {}
#[derive(Debug, PartialEq, Eq)]
pub enum OrphanError {
OrphansRemain { orphan_pids: Vec<u32> },
}
impl std::fmt::Display for OrphanError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::OrphansRemain { orphan_pids } => write!(
f,
"FC-59 violation: {n} orphan firecracker PID(s) remain after boot-failure \
teardown: {orphan_pids:?}. The supervisor reached `complete-forced` but \
did not reap the VMM. Re-check `FirecrackerCellBackend::destroy` and the \
SIGKILL path in `crates/cellos-host-firecracker/src/lib.rs`.",
n = orphan_pids.len(),
),
}
}
}
impl std::error::Error for OrphanError {}
pub fn validate_boot_failed_event(
event_payload: &serde_json::Value,
_max_boot_timeout_seconds: u64,
) -> Result<(), BootFailedValidationError> {
let data = event_payload
.get("data")
.ok_or(BootFailedValidationError::MissingData)?;
let terminal_state = data
.get("terminalState")
.and_then(|v| v.as_str())
.ok_or(BootFailedValidationError::MissingTerminalState)?;
if terminal_state != "forced" {
return Err(BootFailedValidationError::WrongTerminalState {
actual: terminal_state.to_string(),
});
}
let reason = data
.get("reason")
.and_then(|v| v.as_str())
.ok_or(BootFailedValidationError::MissingReason)?;
if reason != "boot_failed" {
return Err(BootFailedValidationError::WrongReason {
actual: reason.to_string(),
});
}
Ok(())
}
pub fn validate_no_orphan_firecracker_after_boot_failure(
child_pids_before: &[u32],
child_pids_after: &[u32],
) -> Result<(), OrphanError> {
let before: BTreeSet<u32> = child_pids_before.iter().copied().collect();
let after: BTreeSet<u32> = child_pids_after.iter().copied().collect();
let orphans: Vec<u32> = after.difference(&before).copied().collect();
if !orphans.is_empty() {
return Err(OrphanError::OrphansRemain {
orphan_pids: orphans,
});
}
Ok(())
}
#[test]
fn validate_boot_failed_event_passes_on_canonical_payload() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-canonical",
"specId": "fc59",
"outcome": "failed",
"terminalState": "forced",
"reason": "boot_failed",
}
});
validate_boot_failed_event(&payload, 30)
.expect("canonical FC-59 payload must satisfy the validator");
}
#[test]
fn validate_boot_failed_event_passes_on_fc50_typed_lifecycle_reason_boot_failed() {
let typed = cellos_core::LifecycleReason::BootFailed;
assert_eq!(
typed.as_wire_str(),
"boot_failed",
"FC-50 typed enum's wire form for BootFailed must equal the FC-59 contract"
);
let payload = serde_json::json!({
"data": {
"cellId": "fc59-typed",
"specId": "fc59",
"outcome": "failed",
"terminalState": "forced",
"reason": typed.as_wire_str(),
}
});
validate_boot_failed_event(&payload, 30)
.expect("typed-variant emission of LifecycleReason::BootFailed must pass FC-59");
}
#[test]
fn validate_boot_failed_event_fails_when_data_missing() {
let payload = serde_json::json!({"type": "dev.cellos.events.cell.lifecycle.v1.destroyed"});
let err =
validate_boot_failed_event(&payload, 30).expect_err("payload without `data` must fail");
assert_eq!(err, BootFailedValidationError::MissingData);
assert!(err.to_string().contains("schema violation"));
}
#[test]
fn validate_boot_failed_event_fails_when_terminal_state_missing() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-no-terminal",
"outcome": "failed",
"reason": "boot_failed",
}
});
let err = validate_boot_failed_event(&payload, 30).expect_err("missing terminalState");
assert_eq!(err, BootFailedValidationError::MissingTerminalState);
}
#[test]
fn validate_boot_failed_event_fails_when_terminal_state_is_clean() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-mis-clean",
"outcome": "failed",
"terminalState": "clean",
"reason": "boot_failed",
}
});
let err = validate_boot_failed_event(&payload, 30).expect_err("clean must trip the gate");
assert_eq!(
err,
BootFailedValidationError::WrongTerminalState {
actual: "clean".into()
}
);
assert!(err.to_string().contains("\"clean\""));
assert!(err.to_string().contains("forced"));
}
#[test]
fn validate_boot_failed_event_fails_when_terminal_state_is_unrecognised() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-unknown-state",
"terminalState": "panicked",
"reason": "boot_failed",
}
});
let err = validate_boot_failed_event(&payload, 30).expect_err("unknown state");
assert_eq!(
err,
BootFailedValidationError::WrongTerminalState {
actual: "panicked".into()
}
);
}
#[test]
fn validate_boot_failed_event_fails_when_reason_missing() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-no-reason",
"terminalState": "forced",
}
});
let err = validate_boot_failed_event(&payload, 30).expect_err("missing reason");
assert_eq!(err, BootFailedValidationError::MissingReason);
assert!(
err.to_string().contains("FC-50 GAP"),
"error must surface the FC-50 dependency for grep-ability; got: {err}"
);
}
#[test]
fn validate_boot_failed_event_fails_when_reason_is_oom() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-mis-oom",
"terminalState": "forced",
"reason": "oom",
}
});
let err = validate_boot_failed_event(&payload, 30).expect_err("oom must trip the gate");
assert_eq!(
err,
BootFailedValidationError::WrongReason {
actual: "oom".into()
}
);
assert!(err.to_string().contains("\"oom\""));
assert!(err.to_string().contains("boot_failed"));
assert!(
err.to_string().contains("FC-50 GAP"),
"wrong-reason error must surface the FC-50 dependency; got: {err}"
);
}
#[test]
fn validate_boot_failed_event_fails_when_reason_is_freeform_string() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-freeform",
"terminalState": "forced",
"reason": "kernel ran out of memory or something",
}
});
let err = validate_boot_failed_event(&payload, 30).expect_err("free-form reason");
match err {
BootFailedValidationError::WrongReason { actual } => {
assert_eq!(actual, "kernel ran out of memory or something");
}
other => panic!("expected WrongReason, got {other:?}"),
}
}
#[test]
fn validate_boot_failed_event_passes_with_extra_fields() {
let payload = serde_json::json!({
"data": {
"cellId": "fc59-with-extras",
"specId": "fc59",
"ttlSeconds": 60,
"runId": "urn:cellos:run:abc123",
"correlation": {"traceId": "trace-xyz"},
"outcome": "failed",
"terminalState": "forced",
"reason": "boot_failed",
"futureFieldX": "ignored",
}
});
validate_boot_failed_event(&payload, 30)
.expect("schema growth must not trip the FC-59 validator");
}
#[test]
fn validate_no_orphan_firecracker_passes_when_after_is_subset_of_before() {
let before = vec![100, 200, 300, 999];
let after = vec![100, 200, 300];
validate_no_orphan_firecracker_after_boot_failure(&before, &after)
.expect("subset-after must pass");
}
#[test]
fn validate_no_orphan_firecracker_passes_when_sets_match() {
let before = vec![1, 2, 3];
let after = vec![3, 2, 1]; validate_no_orphan_firecracker_after_boot_failure(&before, &after)
.expect("equal sets must pass regardless of order");
}
#[test]
fn validate_no_orphan_firecracker_passes_on_empty_inputs() {
validate_no_orphan_firecracker_after_boot_failure(&[], &[]).expect("empty/empty must pass");
}
#[test]
fn validate_no_orphan_firecracker_tolerates_duplicates() {
let before = vec![10, 10, 20];
let after = vec![10, 20, 20, 10];
validate_no_orphan_firecracker_after_boot_failure(&before, &after)
.expect("duplicates must not produce false orphans");
}
#[test]
fn validate_no_orphan_firecracker_fails_when_one_orphan_remains() {
let before = vec![100, 200];
let after = vec![100, 200, 999];
let err = validate_no_orphan_firecracker_after_boot_failure(&before, &after)
.expect_err("orphan must trip the gate");
assert_eq!(
err,
OrphanError::OrphansRemain {
orphan_pids: vec![999]
}
);
let msg = err.to_string();
assert!(
msg.contains("999"),
"error must echo the orphan PID; got: {msg}"
);
assert!(
msg.contains("FC-59 violation"),
"error must use the canonical phrase; got: {msg}"
);
assert!(
msg.contains("complete-forced"),
"error must reference the lifecycle phase; got: {msg}"
);
}
#[test]
fn validate_no_orphan_firecracker_fails_with_multiple_orphans() {
let before = vec![100];
let after = vec![100, 555, 666];
let err = validate_no_orphan_firecracker_after_boot_failure(&before, &after)
.expect_err("multiple orphans must trip the gate");
match err {
OrphanError::OrphansRemain { orphan_pids } => {
assert_eq!(orphan_pids, vec![555, 666]);
}
}
}
#[test]
fn validate_no_orphan_firecracker_fails_when_pid_appears_only_after() {
let before: Vec<u32> = vec![];
let after = vec![42];
let err = validate_no_orphan_firecracker_after_boot_failure(&before, &after)
.expect_err("solo-after must trip the gate");
assert_eq!(
err,
OrphanError::OrphansRemain {
orphan_pids: vec![42]
}
);
}
#[cfg(target_os = "linux")]
fn write_corrupt_kernel(dir: &std::path::Path) -> std::path::PathBuf {
let path = dir.join("fc59-corrupt-kernel");
let bytes = vec![0xFFu8; 4096];
std::fs::write(&path, &bytes).expect("write_corrupt_kernel: failed to write 4 KiB tempfile");
path
}
#[cfg(target_os = "linux")]
fn snapshot_firecracker_pids() -> Vec<u32> {
let mut pids = Vec::new();
let entries = match std::fs::read_dir("/proc") {
Ok(e) => e,
Err(_) => return pids,
};
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = match name.to_str() {
Some(s) => s,
None => continue,
};
let pid: u32 = match name_str.parse() {
Ok(p) => p,
Err(_) => continue, };
let comm_path = entry.path().join("comm");
let comm = match std::fs::read_to_string(&comm_path) {
Ok(c) => c,
Err(_) => continue, };
if comm.trim() == "firecracker" {
pids.push(pid);
}
}
pids
}
#[cfg(target_os = "linux")]
#[test]
#[ignore = "requires firecracker; deliberately uses corrupt kernel — run via firecracker-e2e workflow"]
fn fc59_corrupt_kernel_e2e() {
unsafe {
std::env::set_var("CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST", "1");
std::env::set_var("CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST_REALLY", "1");
}
let max_boot_timeout_seconds: u64 = std::env::var("CELLOS_FIRECRACKER_FC59_BOOT_TIMEOUT_SECS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(30);
let pids_before = snapshot_firecracker_pids();
let dir = tempfile::tempdir().expect("fc59 tempdir");
let kernel_path = write_corrupt_kernel(dir.path());
assert!(
kernel_path.exists(),
"corrupt kernel was not written to {kernel_path:?}"
);
let kernel_bytes = std::fs::read(&kernel_path).expect("read corrupt kernel");
assert_eq!(kernel_bytes.len(), 4096, "corrupt kernel must be 4 KiB");
assert!(
kernel_bytes.iter().all(|b| *b == 0xFF),
"corrupt kernel must be all 0xFF bytes (no valid magic number)"
);
let fixture_path =
std::env::var("CELLOS_FIRECRACKER_FC59_EVENT_FIXTURE").unwrap_or_else(|_| {
panic!(
"CELLOS_FIRECRACKER_FC59_EVENT_FIXTURE must point to the captured \
cell.lifecycle.v1.destroyed event JSON when running this test \
with --ignored. The firecracker-e2e workflow drops this file."
)
});
let body = std::fs::read_to_string(&fixture_path)
.unwrap_or_else(|e| panic!("failed to read FC-59 fixture at {fixture_path:?}: {e}"));
let payload: serde_json::Value = serde_json::from_str(&body).unwrap_or_else(|e| {
panic!(
"failed to parse FC-59 fixture at {fixture_path:?} as JSON: {e}\n\
----- body -----\n{body}\n----- end -----"
)
});
validate_boot_failed_event(&payload, max_boot_timeout_seconds).unwrap_or_else(|e| {
panic!(
"FC-59 event validator failed: {e}\n\
----- payload -----\n{payload:#}\n----- end -----"
)
});
let pids_after = snapshot_firecracker_pids();
validate_no_orphan_firecracker_after_boot_failure(&pids_before, &pids_after).unwrap_or_else(
|e| {
panic!(
"FC-59 orphan-process validator failed: {e}\n\
before: {pids_before:?}\nafter: {pids_after:?}\n\
(this is the 'no orphan firecracker process remains' clause \
of the FC-59 acceptance gate)"
)
},
);
}
#[cfg(target_os = "linux")]
#[test]
#[ignore = "requires firecracker; deliberately uses corrupt kernel"]
fn fc59_corrupt_kernel_helper_writes_4kib_of_0xff() {
let dir = tempfile::tempdir().expect("tempdir");
let path = write_corrupt_kernel(dir.path());
let bytes = std::fs::read(&path).expect("read corrupt kernel");
assert_eq!(bytes.len(), 4096);
assert!(bytes.iter().all(|b| *b == 0xFF));
}