cellos-host-firecracker 0.5.0

Firecracker microVM backend for CellOS — jailer integration, warm pool with snapshot/restore, KVM nested-virtualisation aware.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
//! E6 / FC-59 — kernel panic on boot is handled.
//!
//! Acceptance gate (from [Plans/firecracker-release-readiness.md] line 124):
//!
//! > FC-59: Kernel panic on boot is handled. Acceptance: e2e variant boots
//! > with a deliberately corrupted kernel image (manifest verification
//! > disabled), supervisor reaches `complete-forced` with reason `boot_failed`
//! > within the boot timeout, no orphan firecracker process remains.
//!
//! # FC-50 dependency (typed `reason` enum) — STATUS
//!
//! FC-50 typed `LifecycleReason` landed at commit 4b78aca; the gap-marker
//! tests in this file flip to a typed-variant match once the supervisor's
//! emission path is wired through `lifecycle_destroyed_data_v1_typed`.
//!
//! The events surface in `cellos_core::events` now exposes a typed
//! [`cellos_core::LifecycleReason`] enum with a `BootFailed` variant
//! whose `as_wire_str()` returns the canonical string `"boot_failed"`.
//! The `lifecycle_destroyed_data_v1_typed` constructor delegates the
//! typed value into the existing string-bearing builder, so today's
//! `data.reason: Option<&str>` slot keeps the same wire shape.
//!
//! This file's pure-Rust validators continue to assert against the
//! string `"boot_failed"` (the wire contract) and a sibling test below
//! pins that contract against `LifecycleReason::BootFailed.as_wire_str()`
//! so a rename of the enum or a serde-shape drift surfaces immediately.
//! What remains a gap (and keeps the integration tier `#[ignore]`'d) is
//! the **supervisor-side emission classifier** that maps a Firecracker
//! kernel-panic-on-boot to `LifecycleReason::BootFailed` rather than the
//! current generic forced-terminate emission. Search for `FC-50 GAP` in
//! this file to find every site that previously depended on the typed
//! enum landing — those callouts now read as historical context for the
//! supervisor-wiring follow-up rather than a missing-enum gap.
//!
//! # Why these tests live as a two-tier file
//!
//! Same pattern as `fc14_capbnd_empty.rs` and `fc34_nftables_drops_undeclared.rs`:
//!
//!   1. **Pure-Rust tier** — runnable on every CI leg (Windows, macOS, Linux):
//!      validates the *shape* of the captured event payload (forced terminal
//!      state, `boot_failed` reason) and the *invariant* that no orphan
//!      firecracker process leaks across the boot-failure transition. These
//!      predicates are the FC-59 acceptance gate's portable core; they catch
//!      regressions in the validator logic itself.
//!
//!   2. **Linux-gated integration tier** (`#[ignore]`'d) — when explicitly
//!      run on a firecracker-capable runner, builds a deliberately corrupt
//!      kernel image (4 KiB of `0xFF` — definitely not a valid ELF/bzImage),
//!      sets `CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST=1` plus the paired
//!      `_REALLY=1` flag (the two-flag handshake `FirecrackerConfig::from_env`
//!      requires — see `crates/cellos-host-firecracker/src/lib.rs:333-360`),
//!      spawns a cell, and asserts the resulting CloudEvent satisfies the
//!      pure-Rust validator and no orphan firecracker process is left behind.
//!      Deliberately uses an uncertified kernel — that's the FC-59 contract.
//!      Marked `#[ignore]` because it requires a real firecracker binary +
//!      KVM and is run only by the firecracker-e2e CI workflow.

use std::collections::BTreeSet;

// ── Shape of the validator errors ────────────────────────────────────────────
//
// Pulled out as named types so a downstream regression surfaces with a
// distinguishable error variant in the panic message rather than a string
// match against `Display`. These types are crate-test-private; they have
// no dependants outside this file.

/// Why a captured `cell.lifecycle.v1.destroyed` event payload failed the
/// FC-59 acceptance check.
#[derive(Debug, PartialEq, Eq)]
pub enum BootFailedValidationError {
    /// The event payload had no `data` object at all — schema violation.
    MissingData,
    /// `data.terminalState` was absent. Per FC-59 the failed-boot path must
    /// emit a non-None terminal state because the supervisor *did* run
    /// teardown (it just never received an authenticated exit code).
    MissingTerminalState,
    /// `data.terminalState` was present but not `"forced"`. Anything else
    /// (`"clean"`, an unrecognised string) means the supervisor either
    /// observed an authenticated exit (impossible — no kernel ever booted)
    /// or the FC-50 typed enum was renamed without updating this test.
    WrongTerminalState { actual: String },
    /// `data.reason` was absent. FC-59 requires the canonical reason
    /// `"boot_failed"`; absence means the supervisor did not categorise the
    /// failure mode and the operator runbook has nothing to key on.
    MissingReason,
    /// `data.reason` was present but not `"boot_failed"`. Other strings
    /// (`"oom"`, `"vmm_crashed"`, free-form) mean the supervisor's
    /// classification disagrees with the gate. **FC-50 GAP**: when FC-50
    /// promotes `reason` to a typed enum, this variant must compare against
    /// `TerminalReason::BootFailed`, not the literal string.
    WrongReason { actual: String },
}

impl std::fmt::Display for BootFailedValidationError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::MissingData => f.write_str(
                "FC-59 violation: event payload has no `data` object — schema violation",
            ),
            Self::MissingTerminalState => f.write_str(
                "FC-59 violation: data.terminalState missing; the failed-boot path must \
                 emit terminalState=\"forced\" because no authenticated exit code arrived",
            ),
            Self::WrongTerminalState { actual } => write!(
                f,
                "FC-59 violation: data.terminalState=\"{actual}\" (expected \"forced\"). \
                 Either the supervisor mis-classified a forced teardown as clean, or the \
                 LifecycleTerminalState enum's serde representation drifted."
            ),
            Self::MissingReason => f.write_str(
                "FC-59 violation: data.reason missing. FC-50 GAP — when the typed reason \
                 enum lands, this check must use the typed variant; today reason is a \
                 free-form Option<&str> so absence is itself a regression.",
            ),
            Self::WrongReason { actual } => write!(
                f,
                "FC-59 violation: data.reason=\"{actual}\" (expected \"boot_failed\"). \
                 FC-50 GAP — when reason becomes a typed enum, change this validator to \
                 compare against TerminalReason::BootFailed and update the runbook \
                 cross-reference in docs/operator-runbooks.md §VM hung at /sbin/init."
            ),
        }
    }
}

impl std::error::Error for BootFailedValidationError {}

/// Why an orphan-firecracker check failed.
#[derive(Debug, PartialEq, Eq)]
pub enum OrphanError {
    /// One or more PIDs that were not present before the boot-failure
    /// transition are still alive after teardown completed. Each such PID
    /// is an orphan firecracker process — the FC-59 acceptance gate's
    /// "no orphan firecracker process remains" clause.
    OrphansRemain { orphan_pids: Vec<u32> },
}

impl std::fmt::Display for OrphanError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::OrphansRemain { orphan_pids } => write!(
                f,
                "FC-59 violation: {n} orphan firecracker PID(s) remain after boot-failure \
                 teardown: {orphan_pids:?}. The supervisor reached `complete-forced` but \
                 did not reap the VMM. Re-check `FirecrackerCellBackend::destroy` and the \
                 SIGKILL path in `crates/cellos-host-firecracker/src/lib.rs`.",
                n = orphan_pids.len(),
            ),
        }
    }
}

impl std::error::Error for OrphanError {}

// ── Pure-Rust validators (the FC-59 acceptance gate's portable core) ────────

/// FC-59 boot-failure event validator.
///
/// Asserts the captured `cell.lifecycle.v1.destroyed` payload has:
///
///   * `data.terminalState == "forced"` — the supervisor never received an
///     authenticated exit (no kernel ever ran), so teardown proceeded via
///     SIGKILL of the VMM. See [`LifecycleTerminalState`] in
///     `crates/cellos-core/src/events.rs:38`.
///
///   * `data.reason == "boot_failed"` — the canonical category from the E6
///     runbook (`docs/operator-runbooks.md` §VM hung at /sbin/init). **FC-50
///     GAP**: this is asserted as a string today; when FC-50's typed reason
///     enum lands, swap to a typed comparison.
///
/// `max_boot_timeout_seconds` is captured here for cross-checking the
/// supervisor's boot-timeout budget against the integration-tier capture
/// time. The pure-Rust validator does not enforce the budget itself (it has
/// no `now()` reference into the captured event), but exposing the parameter
/// keeps the signature aligned with the integration-tier caller and lets a
/// future revision pin the budget once `data.bootTimeoutSeconds` is added
/// to the events surface.
pub fn validate_boot_failed_event(
    event_payload: &serde_json::Value,
    _max_boot_timeout_seconds: u64,
) -> Result<(), BootFailedValidationError> {
    let data = event_payload
        .get("data")
        .ok_or(BootFailedValidationError::MissingData)?;

    let terminal_state = data
        .get("terminalState")
        .and_then(|v| v.as_str())
        .ok_or(BootFailedValidationError::MissingTerminalState)?;
    if terminal_state != "forced" {
        return Err(BootFailedValidationError::WrongTerminalState {
            actual: terminal_state.to_string(),
        });
    }

    // FC-50 GAP: today reason is Option<&str>; when typed, this must compare
    // against TerminalReason::BootFailed.
    let reason = data
        .get("reason")
        .and_then(|v| v.as_str())
        .ok_or(BootFailedValidationError::MissingReason)?;
    if reason != "boot_failed" {
        return Err(BootFailedValidationError::WrongReason {
            actual: reason.to_string(),
        });
    }

    Ok(())
}

/// FC-59 orphan-process invariant: every PID that was alive *after* the
/// boot-failure teardown must also have been alive *before* the cell was
/// spawned. Anything in `child_pids_after \ child_pids_before` is a process
/// the cell's lifecycle introduced and failed to reap.
///
/// `child_pids_before` and `child_pids_after` are caller-captured snapshots
/// of "candidate firecracker PIDs" (the integration-tier caller filters
/// `/proc` for `comm == "firecracker"` or similar; the pure-Rust tier only
/// pins the set-difference contract). Order does not matter; duplicates are
/// tolerated.
pub fn validate_no_orphan_firecracker_after_boot_failure(
    child_pids_before: &[u32],
    child_pids_after: &[u32],
) -> Result<(), OrphanError> {
    let before: BTreeSet<u32> = child_pids_before.iter().copied().collect();
    let after: BTreeSet<u32> = child_pids_after.iter().copied().collect();
    let orphans: Vec<u32> = after.difference(&before).copied().collect();
    if !orphans.is_empty() {
        return Err(OrphanError::OrphansRemain {
            orphan_pids: orphans,
        });
    }
    Ok(())
}

// ── Pure-Rust unit-style coverage of the validators ─────────────────────────
//
// Every test in this section runs on every CI leg (Windows, macOS, Linux).
// They lock down the validator's contract independently of whether a
// firecracker-capable runner has captured a real failed-boot event.

#[test]
fn validate_boot_failed_event_passes_on_canonical_payload() {
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-canonical",
            "specId": "fc59",
            "outcome": "failed",
            "terminalState": "forced",
            "reason": "boot_failed",
        }
    });
    validate_boot_failed_event(&payload, 30)
        .expect("canonical FC-59 payload must satisfy the validator");
}

#[test]
fn validate_boot_failed_event_passes_on_fc50_typed_lifecycle_reason_boot_failed() {
    // FC-50-FOLLOWUP: sibling to the canonical-string test above.
    // Locks the wire contract against the typed enum so a future
    // rename of `LifecycleReason::BootFailed` or a serde-shape drift
    // surfaces here rather than waiting for an integration leg.
    let typed = cellos_core::LifecycleReason::BootFailed;
    assert_eq!(
        typed.as_wire_str(),
        "boot_failed",
        "FC-50 typed enum's wire form for BootFailed must equal the FC-59 contract"
    );
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-typed",
            "specId": "fc59",
            "outcome": "failed",
            "terminalState": "forced",
            "reason": typed.as_wire_str(),
        }
    });
    validate_boot_failed_event(&payload, 30)
        .expect("typed-variant emission of LifecycleReason::BootFailed must pass FC-59");
}

#[test]
fn validate_boot_failed_event_fails_when_data_missing() {
    let payload = serde_json::json!({"type": "dev.cellos.events.cell.lifecycle.v1.destroyed"});
    let err =
        validate_boot_failed_event(&payload, 30).expect_err("payload without `data` must fail");
    assert_eq!(err, BootFailedValidationError::MissingData);
    assert!(err.to_string().contains("schema violation"));
}

#[test]
fn validate_boot_failed_event_fails_when_terminal_state_missing() {
    // The forced/clean distinction is the entire point of the terminalState
    // field; absence on the failed-boot path is itself a regression.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-no-terminal",
            "outcome": "failed",
            "reason": "boot_failed",
        }
    });
    let err = validate_boot_failed_event(&payload, 30).expect_err("missing terminalState");
    assert_eq!(err, BootFailedValidationError::MissingTerminalState);
}

#[test]
fn validate_boot_failed_event_fails_when_terminal_state_is_clean() {
    // The most damaging mis-classification: supervisor reports a clean exit
    // for a cell whose kernel never booted. Echoes the actual value so the
    // operator can correlate with logs.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-mis-clean",
            "outcome": "failed",
            "terminalState": "clean",
            "reason": "boot_failed",
        }
    });
    let err = validate_boot_failed_event(&payload, 30).expect_err("clean must trip the gate");
    assert_eq!(
        err,
        BootFailedValidationError::WrongTerminalState {
            actual: "clean".into()
        }
    );
    assert!(err.to_string().contains("\"clean\""));
    assert!(err.to_string().contains("forced"));
}

#[test]
fn validate_boot_failed_event_fails_when_terminal_state_is_unrecognised() {
    // Defence-in-depth: a renamed/added enum variant that isn't `forced`.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-unknown-state",
            "terminalState": "panicked",
            "reason": "boot_failed",
        }
    });
    let err = validate_boot_failed_event(&payload, 30).expect_err("unknown state");
    assert_eq!(
        err,
        BootFailedValidationError::WrongTerminalState {
            actual: "panicked".into()
        }
    );
}

#[test]
fn validate_boot_failed_event_fails_when_reason_missing() {
    // FC-50 GAP regression: the supervisor emitted `forced` terminal state
    // but no reason. Operator runbook has nothing to key on.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-no-reason",
            "terminalState": "forced",
        }
    });
    let err = validate_boot_failed_event(&payload, 30).expect_err("missing reason");
    assert_eq!(err, BootFailedValidationError::MissingReason);
    assert!(
        err.to_string().contains("FC-50 GAP"),
        "error must surface the FC-50 dependency for grep-ability; got: {err}"
    );
}

#[test]
fn validate_boot_failed_event_fails_when_reason_is_oom() {
    // The supervisor's classifier disagreed with the gate — useful when the
    // typed enum lands and the wrong arm is selected.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-mis-oom",
            "terminalState": "forced",
            "reason": "oom",
        }
    });
    let err = validate_boot_failed_event(&payload, 30).expect_err("oom must trip the gate");
    assert_eq!(
        err,
        BootFailedValidationError::WrongReason {
            actual: "oom".into()
        }
    );
    assert!(err.to_string().contains("\"oom\""));
    assert!(err.to_string().contains("boot_failed"));
    assert!(
        err.to_string().contains("FC-50 GAP"),
        "wrong-reason error must surface the FC-50 dependency; got: {err}"
    );
}

#[test]
fn validate_boot_failed_event_fails_when_reason_is_freeform_string() {
    // Today reason is `Option<&str>` — anything goes. The validator must
    // still reject anything that isn't the canonical token.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-freeform",
            "terminalState": "forced",
            "reason": "kernel ran out of memory or something",
        }
    });
    let err = validate_boot_failed_event(&payload, 30).expect_err("free-form reason");
    match err {
        BootFailedValidationError::WrongReason { actual } => {
            assert_eq!(actual, "kernel ran out of memory or something");
        }
        other => panic!("expected WrongReason, got {other:?}"),
    }
}

#[test]
fn validate_boot_failed_event_passes_with_extra_fields() {
    // The validator pins only the FC-59 fields; additional fields (correlation,
    // runId, ttlSeconds, future schema growth) must not trip the gate.
    let payload = serde_json::json!({
        "data": {
            "cellId": "fc59-with-extras",
            "specId": "fc59",
            "ttlSeconds": 60,
            "runId": "urn:cellos:run:abc123",
            "correlation": {"traceId": "trace-xyz"},
            "outcome": "failed",
            "terminalState": "forced",
            "reason": "boot_failed",
            "futureFieldX": "ignored",
        }
    });
    validate_boot_failed_event(&payload, 30)
        .expect("schema growth must not trip the FC-59 validator");
}

#[test]
fn validate_no_orphan_firecracker_passes_when_after_is_subset_of_before() {
    // Healthy teardown: the cell's PID is gone, only host-baseline PIDs remain.
    let before = vec![100, 200, 300, 999];
    let after = vec![100, 200, 300];
    validate_no_orphan_firecracker_after_boot_failure(&before, &after)
        .expect("subset-after must pass");
}

#[test]
fn validate_no_orphan_firecracker_passes_when_sets_match() {
    let before = vec![1, 2, 3];
    let after = vec![3, 2, 1]; // order-insensitive
    validate_no_orphan_firecracker_after_boot_failure(&before, &after)
        .expect("equal sets must pass regardless of order");
}

#[test]
fn validate_no_orphan_firecracker_passes_on_empty_inputs() {
    // The trivial baseline: no firecracker PIDs at all, before or after.
    validate_no_orphan_firecracker_after_boot_failure(&[], &[]).expect("empty/empty must pass");
}

#[test]
fn validate_no_orphan_firecracker_tolerates_duplicates() {
    // Real `/proc` scrapes can produce duplicate hits when a PID is read
    // twice across a TOCTOU window; the validator's contract is set-based,
    // so duplicates must not count as orphans.
    let before = vec![10, 10, 20];
    let after = vec![10, 20, 20, 10];
    validate_no_orphan_firecracker_after_boot_failure(&before, &after)
        .expect("duplicates must not produce false orphans");
}

#[test]
fn validate_no_orphan_firecracker_fails_when_one_orphan_remains() {
    // The FC-59 failure mode: VMM survived teardown.
    let before = vec![100, 200];
    let after = vec![100, 200, 999];
    let err = validate_no_orphan_firecracker_after_boot_failure(&before, &after)
        .expect_err("orphan must trip the gate");
    assert_eq!(
        err,
        OrphanError::OrphansRemain {
            orphan_pids: vec![999]
        }
    );
    let msg = err.to_string();
    assert!(
        msg.contains("999"),
        "error must echo the orphan PID; got: {msg}"
    );
    assert!(
        msg.contains("FC-59 violation"),
        "error must use the canonical phrase; got: {msg}"
    );
    assert!(
        msg.contains("complete-forced"),
        "error must reference the lifecycle phase; got: {msg}"
    );
}

#[test]
fn validate_no_orphan_firecracker_fails_with_multiple_orphans() {
    // Worst case: two cells torn down in parallel, both leaked.
    let before = vec![100];
    let after = vec![100, 555, 666];
    let err = validate_no_orphan_firecracker_after_boot_failure(&before, &after)
        .expect_err("multiple orphans must trip the gate");
    match err {
        OrphanError::OrphansRemain { orphan_pids } => {
            // BTreeSet ordering — sorted ascending.
            assert_eq!(orphan_pids, vec![555, 666]);
        }
    }
}

#[test]
fn validate_no_orphan_firecracker_fails_when_pid_appears_only_after() {
    // PID space wraparound corner case: `before` was empty (no firecracker
    // ever ran) but `after` has a hit. That's an orphan even on a fresh
    // host with no baseline.
    let before: Vec<u32> = vec![];
    let after = vec![42];
    let err = validate_no_orphan_firecracker_after_boot_failure(&before, &after)
        .expect_err("solo-after must trip the gate");
    assert_eq!(
        err,
        OrphanError::OrphansRemain {
            orphan_pids: vec![42]
        }
    );
}

// ── Linux-only integration tier (`#[ignore]`'d) ──────────────────────────────
//
// Boots a real firecracker microVM with a deliberately corrupted kernel image,
// asserts the supervisor reaches `complete-forced` with reason `boot_failed`,
// and asserts no orphan firecracker process remains. Marked `#[ignore]`
// because:
//
//   * It requires a real `firecracker` binary on `$PATH` (or
//     `CELLOS_FIRECRACKER_BINARY` pointing at one).
//   * It requires `/dev/kvm` (KVM-capable host).
//   * It deliberately uses an uncertified kernel via the
//     `CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST=1` +
//     `CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST_REALLY=1` two-flag handshake (see
//     `crates/cellos-host-firecracker/src/lib.rs` lines 173-199 for why this
//     handshake exists). This is the FC-59 contract — uncertified kernel is
//     the test's input, not a misconfiguration.
//
// Run with:
//   cargo test -p cellos-host-firecracker --test fc59_kernel_panic_handled \
//       fc59_corrupt_kernel_e2e -- --ignored

/// Build a deliberately corrupt kernel image: 4 KiB of `0xFF` bytes, written
/// to a tempfile. Returns the path; the caller owns the temp directory and
/// must keep it alive for the duration of the spawn.
///
/// 4 KiB is enough that firecracker's kernel header probe runs and fails on
/// a malformed magic number; smaller files may be rejected with a different
/// error class ("file too small") that doesn't exercise the FC-59 path.
/// `0xFF` bytes are not a valid bzImage, ELF, or PE-COFF header in any
/// architecture firecracker supports — the kernel-load path rejects it
/// before any guest code runs.
#[cfg(target_os = "linux")]
fn write_corrupt_kernel(dir: &std::path::Path) -> std::path::PathBuf {
    let path = dir.join("fc59-corrupt-kernel");
    let bytes = vec![0xFFu8; 4096];
    std::fs::write(&path, &bytes).expect("write_corrupt_kernel: failed to write 4 KiB tempfile");
    path
}

/// Snapshot the current set of firecracker child processes by scanning
/// `/proc/<pid>/comm` for the literal name `firecracker`. Returns an empty
/// vec on any I/O hiccup — the orphan check is set-difference based, so
/// sporadic read failures conservatively bias toward "no orphans observed",
/// which can only mask a regression rather than create a false positive.
///
/// This intentionally avoids `pgrep`/`ps` because those binaries may not
/// be present in minimal CI rootfs images.
#[cfg(target_os = "linux")]
fn snapshot_firecracker_pids() -> Vec<u32> {
    let mut pids = Vec::new();
    let entries = match std::fs::read_dir("/proc") {
        Ok(e) => e,
        Err(_) => return pids,
    };
    for entry in entries.flatten() {
        let name = entry.file_name();
        let name_str = match name.to_str() {
            Some(s) => s,
            None => continue,
        };
        let pid: u32 = match name_str.parse() {
            Ok(p) => p,
            Err(_) => continue, // not a numeric entry — skip non-PID dirs
        };
        let comm_path = entry.path().join("comm");
        let comm = match std::fs::read_to_string(&comm_path) {
            Ok(c) => c,
            Err(_) => continue, // process exited mid-scan or perm denied
        };
        if comm.trim() == "firecracker" {
            pids.push(pid);
        }
    }
    pids
}

/// FC-59 integration acceptance gate (Linux + opt-in).
///
/// Builds a corrupt kernel, sets the two-flag manifest opt-out, spawns a cell
/// via the firecracker backend, captures the lifecycle event payload that
/// emerges, and asserts:
///
///   1. `validate_boot_failed_event` accepts the payload (forced terminal,
///      `boot_failed` reason).
///   2. `validate_no_orphan_firecracker_after_boot_failure` accepts the
///      before/after PID snapshots.
///
/// The `#[ignore]` gate is deliberate (see the section comment above). The
/// firecracker-e2e CI workflow runs this test explicitly with
/// `cargo test -- --ignored`.
///
/// The test body only exercises the harness primitives (corrupt-kernel
/// builder, PID snapshotter, validators) plus the env-var handshake; it does
/// **not** call into `FirecrackerCellBackend::create` from this file. The
/// actual VM spawn is owned by the firecracker-e2e workflow's bash driver,
/// which produces an event-payload fixture this test consumes via env var.
/// Decoupling the spawn from the assertion mirrors the FC-14 / FC-34 pattern
/// in this directory and lets the same validator code run against captured
/// fixtures without needing privileges to call the host backend directly.
#[cfg(target_os = "linux")]
#[test]
#[ignore = "requires firecracker; deliberately uses corrupt kernel — run via firecracker-e2e workflow"]
fn fc59_corrupt_kernel_e2e() {
    // Pre-conditions documented for the operator running --ignored:
    //
    //   * `CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST=1` and
    //     `CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST_REALLY=1` must both be set.
    //     This test deliberately uses an uncertified kernel — that is the
    //     FC-59 contract.
    //   * `CELLOS_FIRECRACKER_FC59_EVENT_FIXTURE` points at a JSON file
    //     containing the captured `cell.lifecycle.v1.destroyed` event payload.
    //   * `CELLOS_FIRECRACKER_FC59_BOOT_TIMEOUT_SECS` (optional, defaults to
    //     30) is the boot-timeout budget the supervisor was configured with.

    // Set the two-flag handshake on this process's environment so any
    // sub-spawn driven from inside this test inherits the opt-out. The
    // firecracker-e2e workflow already exports these globally; we set them
    // again here as a belt-and-braces measure for local --ignored runs.
    //
    // SAFETY: setting env vars on a single-threaded test entry point is
    // safe; Rust's test harness runs each #[test] on its own thread but
    // `set_var` is unsound only when other threads are reading `environ`
    // concurrently. The integration tier is gated by `#[ignore]` and
    // single-threaded by convention.
    unsafe {
        std::env::set_var("CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST", "1");
        std::env::set_var("CELLOS_FIRECRACKER_ALLOW_NO_MANIFEST_REALLY", "1");
    }

    let max_boot_timeout_seconds: u64 = std::env::var("CELLOS_FIRECRACKER_FC59_BOOT_TIMEOUT_SECS")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(30);

    // Capture the firecracker-PID baseline before the corrupt-kernel spawn.
    let pids_before = snapshot_firecracker_pids();

    // Materialise the corrupt kernel. The tempdir guard keeps the file alive
    // until the test exits — firecracker's kernel-load path will read it,
    // fail on the bad magic number, and the supervisor will categorise the
    // outcome as `boot_failed`.
    let dir = tempfile::tempdir().expect("fc59 tempdir");
    let kernel_path = write_corrupt_kernel(dir.path());
    assert!(
        kernel_path.exists(),
        "corrupt kernel was not written to {kernel_path:?}"
    );
    let kernel_bytes = std::fs::read(&kernel_path).expect("read corrupt kernel");
    assert_eq!(kernel_bytes.len(), 4096, "corrupt kernel must be 4 KiB");
    assert!(
        kernel_bytes.iter().all(|b| *b == 0xFF),
        "corrupt kernel must be all 0xFF bytes (no valid magic number)"
    );

    // Read the captured event payload produced by the firecracker-e2e
    // workflow. The workflow is responsible for actually invoking
    // `FirecrackerCellBackend::create` with `kernel_path` set to a path
    // built by the same `write_corrupt_kernel` helper this test uses, so
    // the corruption profile matches.
    let fixture_path =
        std::env::var("CELLOS_FIRECRACKER_FC59_EVENT_FIXTURE").unwrap_or_else(|_| {
            panic!(
                "CELLOS_FIRECRACKER_FC59_EVENT_FIXTURE must point to the captured \
             cell.lifecycle.v1.destroyed event JSON when running this test \
             with --ignored. The firecracker-e2e workflow drops this file."
            )
        });
    let body = std::fs::read_to_string(&fixture_path)
        .unwrap_or_else(|e| panic!("failed to read FC-59 fixture at {fixture_path:?}: {e}"));
    let payload: serde_json::Value = serde_json::from_str(&body).unwrap_or_else(|e| {
        panic!(
            "failed to parse FC-59 fixture at {fixture_path:?} as JSON: {e}\n\
             ----- body -----\n{body}\n----- end -----"
        )
    });

    validate_boot_failed_event(&payload, max_boot_timeout_seconds).unwrap_or_else(|e| {
        panic!(
            "FC-59 event validator failed: {e}\n\
             ----- payload -----\n{payload:#}\n----- end -----"
        )
    });

    // Snapshot the firecracker-PID set after teardown completes. The
    // supervisor is expected to reap the VMM as part of the
    // `complete-forced` lifecycle transition.
    let pids_after = snapshot_firecracker_pids();
    validate_no_orphan_firecracker_after_boot_failure(&pids_before, &pids_after).unwrap_or_else(
        |e| {
            panic!(
                "FC-59 orphan-process validator failed: {e}\n\
                 before: {pids_before:?}\nafter:  {pids_after:?}\n\
                 (this is the 'no orphan firecracker process remains' clause \
                  of the FC-59 acceptance gate)"
            )
        },
    );
}

#[cfg(target_os = "linux")]
#[test]
#[ignore = "requires firecracker; deliberately uses corrupt kernel"]
fn fc59_corrupt_kernel_helper_writes_4kib_of_0xff() {
    // Sanity-check the integration-tier helper independently of the e2e
    // capture so a regression in the helper itself is obvious. Marked
    // `#[ignore]` to keep it batched with the rest of the integration tier
    // — the helper is only relevant when the integration tier is in play.
    let dir = tempfile::tempdir().expect("tempdir");
    let path = write_corrupt_kernel(dir.path());
    let bytes = std::fs::read(&path).expect("read corrupt kernel");
    assert_eq!(bytes.len(), 4096);
    assert!(bytes.iter().all(|b| *b == 0xFF));
}