ktstr 0.4.14

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
//! Panic-hook shim for vCPU worker threads.
//!
//! The crate runs with `panic = "abort"` in release (Cargo.toml), so
//! a panic on any thread tears down the entire VM process without
//! unwinding — Drop impls do not run, and `std::panic::catch_unwind`
//! cannot observe the failure. That leaves a single window between
//! "thread panics" and "libc::abort" during which the user-registered
//! panic hook (`std::panic::set_hook`) runs synchronously on the
//! panicking thread. This module uses that window to flip the
//! per-VM `kill` flag and the per-thread `exited` flag so the
//! watchdog/monitor threads observe a classified shutdown instead of
//! an opaque abort (panic=abort calls `libc::abort`, which raises
//! SIGABRT — not SIGKILL — but the outward signal an observer sees
//! is "process terminated with no cleanup").
//!
//! Primary benefit is *ordering* — the `kill` / `exited` flip
//! happens before `libc::abort`, so any observer that polls those
//! atomics (watchdog, parent join loop) sees a classified shutdown
//! rather than an unexplained abort. User-facing diagnostics
//! (panic message, backtrace) come from the preserved previous-hook
//! chain, not from this module.
//!
//! Scope of work done inside the hook:
//! - Atomic `store(true)` on `kill` and `exited` — non-blocking,
//!   allocation-free, and correct under the panicking-thread
//!   constraint (any lock acquisition here risks deadlocking against
//!   the same thread if it held the lock at the point of panic; any
//!   allocation risks triggering a nested panic).
//! - Nothing else. Serial-buffer flush is *not* performed here: the
//!   serial state lives behind a `PiMutex` and `PiMutex::lock`'s
//!   non-try path would assert-fail if the panic struck mid-`lock`.
//!   On a normal exit path the VM cleanup code drains serial; on
//!   panic=abort, final serial bytes are intentionally sacrificed for
//!   hook correctness.
//!
//! Registration model: `install_once` sets the process-wide hook
//! exactly once (a single test process can spawn multiple VMs
//! sequentially — the hook is installed on the first call and
//! reused). Per-thread context (`VcpuPanicCtx`) is stashed in a
//! `thread_local` from inside the vCPU thread body; the hook reads
//! the thread-local to decide what to signal. Threads that never
//! register leave the thread-local at `None` and the hook falls
//! through to the default.
//!
//! The previous (already-installed) hook is captured at install time
//! and called after our hook runs so standard panic messages /
//! backtraces still reach stderr.
//!
//! Limitation: `std::panic::set_hook` is process-wide. If a future
//! caller of this crate installs their own hook after ours, the
//! previous-hook chain is broken and our signaling is bypassed for
//! any vCPU panic that happens after that. Callers that embed ktstr
//! must install their own hook before spawning vCPU threads, or
//! accept the fall-through.

use std::cell::RefCell;
use std::sync::Arc;
use std::sync::Once;
use std::sync::atomic::{AtomicBool, Ordering};

use vmm_sys_util::eventfd::EventFd;

#[cfg(test)]
use std::sync::atomic::AtomicUsize;

/// Panic-hook callable type. Matches the signature accepted by
/// [`std::panic::set_hook`] and returned by [`std::panic::take_hook`].
type PanicHook = dyn Fn(&std::panic::PanicHookInfo<'_>) + Send + Sync + 'static;

/// Build the chained panic hook: flip per-thread kill/exited flags (if
/// this thread registered a [`VcpuPanicCtx`]) and then delegate to
/// `prev`. Factored out of [`install_once`] so tests can install a
/// custom `prev` and observe that the chain is not silently dropped.
///
/// # RefCell borrow invariant
///
/// The hook takes a shared `slot.borrow()` on `VCPU_PANIC_CTX`, the
/// per-thread `RefCell<Option<VcpuPanicCtx>>`. That borrow is safe
/// only because the hook is never re-entered while another borrow is
/// active on the same thread's RefCell:
///
/// - `with_vcpu_panic_ctx` scopes its two `borrow_mut()` windows
///   strictly to the set / clear statements and drops them before
///   running `body()`, per that function's documented INVARIANT. Any
///   panic raised inside `body()` therefore finds the RefCell
///   unborrowed, and the hook's `borrow()` cannot conflict.
/// - A panic is delivered to `std`'s hook machinery synchronously on
///   the panicking thread. `std::panic::set_hook` serializes hook
///   registration, and `catch_unwind` / runtime unwinding calls the
///   hook exactly once per `panic!` site before unwinding continues.
///   There is no concurrent second entry into this closure on the
///   same thread to hold a conflicting borrow.
/// - `prev(info)` is the previously-installed process-wide hook
///   captured at `install_once` time. By construction it does not
///   re-enter this module's thread-local (no ktstr code path inside a
///   `prev` hook touches `VCPU_PANIC_CTX`), so the delegation tail
///   cannot recursively panic into our hook.
///
/// If any of those preconditions breaks — a caller holds a `borrow`
/// across a panic site, a runtime gains re-entrant hook dispatch, or
/// a downstream `prev` hook calls back into this module — the
/// `borrow()` here panics, the panic hook double-panics, and under
/// `panic = "abort"` the process aborts without emitting the classified
/// shutdown signal `VcpuPanicCtx` exists to produce. Preserve the
/// invariant.
fn make_hook(prev: Box<PanicHook>) -> Box<PanicHook> {
    Box::new(move |info| {
        VCPU_PANIC_CTX.with(|slot| {
            if let Some(ctx) = slot.borrow().as_ref() {
                // Flip `alive` first so any cross-thread reader
                // observing the unwind has a chance to see
                // `alive == false` BEFORE this thread's `vcpu`
                // local drops during stack unwinding (under
                // `panic = "unwind"` test profile). The hook runs
                // synchronously on the panicking thread before
                // unwinding starts; every store here is
                // happens-before the Drop of `vcpu` that frees the
                // `kvm_run` mmap backing this thread's
                // `ImmediateExitHandle` ptr. A coordinator that
                // captures Copy clones of the handle gates each
                // `ie.set` on `alive.load(Acquire)` — Release here
                // pairs with that Acquire so the gate observes the
                // flip ahead of the freed mmap.
                if let Some(ref alive) = ctx.alive {
                    alive.store(false, Ordering::Release);
                }
                ctx.kill.store(true, Ordering::Release);
                ctx.exited.store(true, Ordering::Release);
                // Wake the freeze coordinator's epoll loop. EventFd
                // writes are not async-signal-safe per spec, but a
                // panic hook is not a signal handler — it runs on
                // the panicking thread under normal Rust runtime
                // context. WouldBlock (counter overflow) is
                // swallowed: the coordinator has at least one wake
                // pending in that case, so missing this one is
                // harmless. Any other error is logged but non-fatal
                // — under panic=abort the kill flag itself is
                // observed via the watchdog's atomic-load fallback.
                if let Some(ref evt) = ctx.kill_evt {
                    let _ = evt.write(1);
                }
                if let Some(ref evt) = ctx.exited_evt {
                    let _ = evt.write(1);
                }
            }
        });
        prev(info);
    })
}

/// Count of times the `HOOK_ONCE` body executed. The `Once` contract
/// guarantees this reaches 1 and stays there regardless of how many
/// callers invoke [`install_once`], giving tests a stronger assertion
/// than "no panic / no deadlock" for install idempotency.
#[cfg(test)]
static INSTALL_COUNT: AtomicUsize = AtomicUsize::new(0);

/// Serialize tests that install a custom panic hook via
/// [`install_hook_with_prev_for_test`]. The hook is process-wide, so
/// concurrent manipulation would race. Tests that only rely on the
/// standard `install_once` hook do NOT need this lock — they observe
/// a stable hook via `Once`.
#[cfg(test)]
static HOOK_TEST_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

thread_local! {
    /// Per-thread context consulted by the panic hook. `Some` only
    /// for threads that passed through [`with_vcpu_panic_ctx`].
    static VCPU_PANIC_CTX: RefCell<Option<VcpuPanicCtx>> = const { RefCell::new(None) };
}

/// Flags the panic hook will flip on behalf of a panicking vCPU
/// thread. Clone-cheap — each field is an `Arc<AtomicBool>` shared
/// with the main VM thread and the monitor/watchdog. Fields are
/// `pub(crate)` to match the container's `pub(crate)` visibility;
/// nothing outside the `vmm` module observes this struct directly.
///
/// INVARIANT: Every field's `Drop` must be panic-free. `VcpuPanicCtx`
/// is owned by the `VCPU_PANIC_CTX` thread-local slot and dropped
/// when that slot is cleared — potentially during unwinding of a
/// `body()` panic in `with_vcpu_panic_ctx`. A panicking `Drop` in
/// that window produces a double-panic: the unwind-in-progress plus
/// the Drop panic → `std` aborts the process (even under the
/// test-profile `panic = "unwind"` setting), and the classified
/// shutdown signal this type exists to produce never reaches the
/// watchdog. `Arc<AtomicBool>` satisfies the invariant — its Drop is
/// an atomic decrement + optional `Box` deallocation, neither of
/// which panics. Any new field must uphold the same guarantee.
#[derive(Clone)]
pub(crate) struct VcpuPanicCtx {
    /// VM-wide kill signal. Flipping this unblocks the monitor loop
    /// and lets the parent thread observe a clean shutdown path
    /// instead of treating the abort as an unexplained termination.
    pub(crate) kill: Arc<AtomicBool>,
    /// Per-thread exited marker. The parent's
    /// `VcpuThread::exited.load()` polling sees this flip before the
    /// `libc::abort` call returns to the kernel, so the parent can
    /// record "vcpu-N exited" in its failure ledger.
    pub(crate) exited: Arc<AtomicBool>,
    /// Companion eventfd for `kill`. When `Some`, the panic hook
    /// writes `1` after the `kill.store(true)` so any thread
    /// blocked in `epoll_wait` (notably the freeze coordinator)
    /// wakes immediately rather than waiting for its next epoll
    /// timeout. None on paths that have no epoll listener (the
    /// interactive shell wires kill via signal-based kicks instead).
    /// EventFd::write is async-signal-unsafe by spec, but a panic
    /// hook is not a signal handler — it runs synchronously on the
    /// panicking thread before `libc::abort`, in normal Rust
    /// runtime context. The write is allocation-free and panic-free
    /// (`write_all` on a non-blocking eventfd either succeeds, or
    /// returns `WouldBlock`, which we swallow because the
    /// coordinator already has at least one wake pending).
    pub(crate) kill_evt: Option<Arc<EventFd>>,
    /// Companion eventfd for `exited`. Same rationale as `kill_evt`
    /// but bound to the per-thread `exited` flag. For the BSP
    /// thread `exited` IS `bsp_done`, so this is the bsp_done_evt
    /// the freeze coordinator polls; for AP threads the freeze
    /// coordinator does not poll per-AP exited via epoll (joins
    /// happen after `run.kill = true` covers all APs), so AP
    /// callers leave this `None`.
    pub(crate) exited_evt: Option<Arc<EventFd>>,
    /// kvm_run-mmap-liveness flag. `true` means the thread's
    /// `VcpuFd` (and its `MAP_SHARED` `kvm_run` mapping that
    /// backs every cross-thread [`ImmediateExitHandle`] copy) is
    /// still mapped. The hook flips it to `false` BEFORE the
    /// stack unwind drops `vcpu` so a coordinator iterating a
    /// captured `Vec<ImmediateExitHandle>` can gate each
    /// `ie.set` on `alive.load(Acquire)` and skip the index
    /// whose mmap is about to disappear. Mirrors the BSP-side
    /// `bsp_alive` belt-and-braces gate.
    ///
    /// `None` for callers that do not share an
    /// `ImmediateExitHandle` cross-thread (the interactive shell
    /// path, where the BSP runs alone and no coordinator is
    /// spawned). The hook treats `None` as "no liveness gate
    /// participates" — equivalent to the pre-existing behaviour.
    pub(crate) alive: Option<Arc<AtomicBool>>,
}

static HOOK_ONCE: Once = Once::new();

/// Install the vCPU panic hook if it has not already been installed.
/// Idempotent — safe to call from every `spawn_ap_threads`
/// invocation; `Once` gates the actual registration.
///
/// Install convention: callers MUST ensure no other panic hook is
/// installed process-wide AFTER this call. Any later
/// [`std::panic::set_hook`] replaces ours as the active hook; the
/// replacement sees ours as its `prev`, but for vCPU-thread panics our
/// hook is no longer invoked first, so the classified-shutdown
/// signaling documented on [`VcpuPanicCtx`] is bypassed. Embedders
/// with their own panic hook must install it BEFORE `install_once` so
/// our hook sits on top of theirs in the chain.
pub(crate) fn install_once() {
    HOOK_ONCE.call_once(|| {
        #[cfg(test)]
        INSTALL_COUNT.fetch_add(1, Ordering::Relaxed);
        let prev = std::panic::take_hook();
        std::panic::set_hook(make_hook(prev));
    });
}

/// Install a custom `prev` hook wrapped by [`make_hook`] directly,
/// bypassing [`HOOK_ONCE`]. Test-only helper for verifying the
/// prev-hook chain fires on a panic that enters the vCPU hook.
/// Callers must hold [`HOOK_TEST_LOCK`] and restore the previous hook
/// via [`std::panic::set_hook`] before releasing the lock.
#[cfg(test)]
fn install_hook_with_prev_for_test(prev: Box<PanicHook>) {
    std::panic::set_hook(make_hook(prev));
}

/// Register `ctx` for the current thread, run `body`, then clear the
/// registration. Any panic inside `body` is observed by the hook
/// installed by [`install_once`]; regardless of whether `body`
/// returns normally or unwinds, a Drop guard clears the thread-local
/// before this function's stack frame exits so a future reuse of
/// this OS thread (unusual but possible if a runtime recycles
/// threads) does not carry stale context into an unrelated panic.
///
/// INVARIANT: `body()` must not hold a `borrow` or `borrow_mut` on
/// `VCPU_PANIC_CTX` across a potential panic — the hook needs a
/// `borrow()` to read the context, and an outstanding borrow would
/// make that `borrow()` panic (turning the hook into a nested
/// panic, which under panic=abort aborts without signaling). The
/// set- and clear-site `borrow_mut` windows scope strictly to one
/// statement each: the `set` releases before `body()` runs, and the
/// guard's `clear` runs after the hook has already fired and
/// released its shared borrow. The panic window inside `body()`
/// never overlaps a mutable borrow. Callers inside `body` must not
/// re-enter this module's thread local.
///
/// RAII via `CtxGuard`: the previous formulation cleared the slot
/// with an unconditional statement after `body()`, which was skipped
/// when `body()` unwound under the test profile (`panic = "unwind"`).
/// That left stale context in the thread-local for the next reuse
/// of this OS thread; if the runtime recycled the thread onto an
/// unrelated panic, the hook would fire flags that weren't meant for
/// it. Clearing via a Drop guard closes that window — Drop runs on
/// both the normal-return path and the unwinding path.
pub(crate) fn with_vcpu_panic_ctx<R>(ctx: VcpuPanicCtx, body: impl FnOnce() -> R) -> R {
    /// Clear-on-drop helper so the `VCPU_PANIC_CTX` slot is always
    /// reset, whether `body()` returns normally or unwinds. Drop is
    /// panic-free: the `borrow_mut()` is safe because the panic hook
    /// (which takes `borrow()`) has already fired and released by
    /// the time any unwinding Drop runs; writing `None` into the
    /// slot drops the stored `VcpuPanicCtx`, whose `Arc<AtomicBool>`
    /// fields are themselves panic-free per the type's INVARIANT.
    struct CtxGuard;
    impl Drop for CtxGuard {
        fn drop(&mut self) {
            VCPU_PANIC_CTX.with(|slot| {
                *slot.borrow_mut() = None;
            });
        }
    }

    VCPU_PANIC_CTX.with(|slot| {
        *slot.borrow_mut() = Some(ctx);
    });
    let _guard = CtxGuard;
    body()
}

#[cfg(test)]
mod tests {
    //! The `[profile.test]` profile inherits `[profile.dev]` and does
    //! NOT set `panic = "abort"`, so the default unwind behavior is
    //! active inside cargo test / nextest runs. That is what allows
    //! `catch_unwind` below to observe the panic without tearing down
    //! the test process — the release-profile panic=abort semantic
    //! this module targets is itself NOT under test here (it's
    //! outside rustc's testable surface).
    //!
    //! Tests run on freshly `std::thread::spawn`ed threads so the
    //! `VCPU_PANIC_CTX` thread-local begins at its `None` init for
    //! every case. That isolates state between tests even under the
    //! parallel test runner (nextest).
    use super::*;
    use std::panic::{AssertUnwindSafe, catch_unwind};

    /// Ten `install_once` calls must neither panic nor deadlock;
    /// `Once` guarantees the body runs exactly once.
    #[test]
    fn install_once_is_idempotent() {
        for _ in 0..10 {
            install_once();
        }
    }

    /// After `with_vcpu_panic_ctx` returns normally, the thread-local
    /// must be reset to `None` so a later unrelated panic on the
    /// same OS thread does not surface stale kill/exited atomics.
    #[test]
    fn with_vcpu_panic_ctx_clears_thread_local_on_return() {
        install_once();
        let ctx = VcpuPanicCtx {
            kill: Arc::new(AtomicBool::new(false)),
            exited: Arc::new(AtomicBool::new(false)),
            kill_evt: None,
            exited_evt: None,
            alive: None,
        };
        std::thread::spawn(move || {
            with_vcpu_panic_ctx(ctx, || {});
            VCPU_PANIC_CTX.with(|slot| {
                assert!(
                    slot.borrow().is_none(),
                    "thread-local must be None after normal return",
                );
            });
        })
        .join()
        .unwrap();
    }

    /// RAII guard in `with_vcpu_panic_ctx` must clear the thread-
    /// local on the unwind path too, not just normal return. Before
    /// the guard landed, the clear statement ran AFTER `body()`, so
    /// a panicking `body()` skipped it entirely — leaving stale ctx
    /// in the slot for the next reuse of this OS thread. Under the
    /// test profile (`panic = "unwind"`), `catch_unwind` here
    /// observes the panic; the slot-is-None assertion that follows
    /// proves the guard's Drop ran during unwind.
    #[test]
    fn with_vcpu_panic_ctx_clears_thread_local_on_unwind() {
        install_once();
        let ctx = VcpuPanicCtx {
            kill: Arc::new(AtomicBool::new(false)),
            exited: Arc::new(AtomicBool::new(false)),
            kill_evt: None,
            exited_evt: None,
            alive: None,
        };
        std::thread::spawn(move || {
            let _ = catch_unwind(AssertUnwindSafe(|| {
                with_vcpu_panic_ctx(ctx, || panic!("test: intended panic"));
            }));
            VCPU_PANIC_CTX.with(|slot| {
                assert!(
                    slot.borrow().is_none(),
                    "thread-local must be None after unwind — RAII guard must clear on drop, not just after body()",
                );
            });
        })
        .join()
        .unwrap();
    }

    /// A panic inside the `with_vcpu_panic_ctx` body must flip both
    /// `kill` and `exited` via the installed hook. `catch_unwind`
    /// observes the unwind (test profile); under release panic=abort
    /// the same hook would run immediately before `libc::abort`.
    #[test]
    fn panic_inside_ctx_flips_flags() {
        install_once();
        let kill = Arc::new(AtomicBool::new(false));
        let exited = Arc::new(AtomicBool::new(false));
        let ctx = VcpuPanicCtx {
            kill: kill.clone(),
            exited: exited.clone(),
            kill_evt: None,
            exited_evt: None,
            alive: None,
        };
        let kill_c = kill.clone();
        let exited_c = exited.clone();
        let (kill_r, exited_r) = std::thread::spawn(move || {
            let _ = catch_unwind(AssertUnwindSafe(|| {
                with_vcpu_panic_ctx(ctx, || panic!("test: intended panic"));
            }));
            (
                kill_c.load(Ordering::Acquire),
                exited_c.load(Ordering::Acquire),
            )
        })
        .join()
        .unwrap();
        assert!(kill_r, "kill must be flipped by the panic hook");
        assert!(exited_r, "exited must be flipped by the panic hook");
    }

    /// Stronger idempotency check: [`HOOK_ONCE`]'s body must run at
    /// most once regardless of how many [`install_once`] calls land,
    /// including across concurrent tests. Asserts the underlying
    /// [`INSTALL_COUNT`] counter is 1 after repeated calls — the
    /// original `install_once_is_idempotent` only proved absence of
    /// panic/deadlock, which does not distinguish "body ran once"
    /// from "body ran every call".
    #[test]
    fn install_once_body_runs_exactly_once() {
        install_once();
        let after_first = INSTALL_COUNT.load(Ordering::Relaxed);
        for _ in 0..20 {
            install_once();
        }
        let after_many = INSTALL_COUNT.load(Ordering::Relaxed);
        assert_eq!(
            after_many, after_first,
            "HOOK_ONCE body ran more than once under repeated install_once calls",
        );
        assert!(
            after_many >= 1,
            "INSTALL_COUNT must reach 1 after install_once",
        );
    }

    /// A panic inside a registered context must still chain to the
    /// previously-installed panic hook. Guards against a regression
    /// where the tail `prev(info)` call is removed or skipped — the
    /// classified-shutdown signaling is harmless on its own, but the
    /// user-facing panic message / backtrace only reach stderr via
    /// the preserved prev chain.
    #[test]
    fn panic_inside_ctx_still_runs_prev_hook() {
        let _guard = HOOK_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
        let saved = std::panic::take_hook();

        let prev_ran = Arc::new(AtomicBool::new(false));
        let prev_ran_c = prev_ran.clone();
        install_hook_with_prev_for_test(Box::new(move |_info| {
            prev_ran_c.store(true, Ordering::Release);
        }));

        let kill = Arc::new(AtomicBool::new(false));
        let exited = Arc::new(AtomicBool::new(false));
        let ctx = VcpuPanicCtx {
            kill: kill.clone(),
            exited: exited.clone(),
            kill_evt: None,
            exited_evt: None,
            alive: None,
        };
        std::thread::spawn(move || {
            let _ = catch_unwind(AssertUnwindSafe(|| {
                with_vcpu_panic_ctx(ctx, || panic!("test: prev-hook chain"));
            }));
        })
        .join()
        .unwrap();

        std::panic::set_hook(saved);

        assert!(
            prev_ran.load(Ordering::Acquire),
            "prev hook must run after our hook in the chain",
        );
        assert!(
            kill.load(Ordering::Acquire),
            "our hook must flip kill before delegating to prev",
        );
        assert!(
            exited.load(Ordering::Acquire),
            "our hook must flip exited before delegating to prev",
        );
    }

    /// A panic on a thread that never registered a context must NOT
    /// touch any external flags — the hook's thread-local read
    /// returns `None`.
    #[test]
    fn panic_outside_ctx_leaves_flags_alone() {
        install_once();
        let kill = Arc::new(AtomicBool::new(false));
        let exited = Arc::new(AtomicBool::new(false));
        let kill_c = kill.clone();
        let exited_c = exited.clone();
        let (kill_r, exited_r) = std::thread::spawn(move || {
            let _ = catch_unwind(AssertUnwindSafe(|| {
                panic!("test: intended panic without registered ctx");
            }));
            (
                kill_c.load(Ordering::Acquire),
                exited_c.load(Ordering::Acquire),
            )
        })
        .join()
        .unwrap();
        assert!(!kill_r, "kill must stay false when no ctx registered");
        assert!(!exited_r, "exited must stay false when no ctx registered");
    }

    /// The panic hook must flip `alive` to `false` BEFORE the
    /// prev-hook chain runs — the coordinator's pass-1 kick loop
    /// reads each AP's `alive` Acquire-bool to gate `ie.set` on a
    /// kvm_run mmap that's about to disappear under
    /// `panic = "unwind"` stack drop. Capture the value the prev
    /// hook observes via the `alive` Arc, then assert it was
    /// already false at the moment the prev hook ran. Together with
    /// the existing `panic_inside_ctx_still_runs_prev_hook` test,
    /// this pins the cross-thread visibility ordering: every
    /// liveness flip happens inside the hook (synchronously,
    /// before unwinding), not as a side effect of the unwind
    /// itself.
    #[test]
    fn panic_inside_ctx_flips_alive_before_prev() {
        let _guard = HOOK_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
        let saved = std::panic::take_hook();

        let alive = Arc::new(AtomicBool::new(true));
        let alive_seen_by_prev = Arc::new(AtomicBool::new(true));
        let alive_for_prev = alive.clone();
        let alive_seen_clone = alive_seen_by_prev.clone();
        install_hook_with_prev_for_test(Box::new(move |_info| {
            // Sample the flag at prev-hook time: our hook ran
            // already, so this load must observe the Release flip.
            alive_seen_clone.store(alive_for_prev.load(Ordering::Acquire), Ordering::Release);
        }));

        let ctx = VcpuPanicCtx {
            kill: Arc::new(AtomicBool::new(false)),
            exited: Arc::new(AtomicBool::new(false)),
            kill_evt: None,
            exited_evt: None,
            alive: Some(alive.clone()),
        };
        std::thread::spawn(move || {
            let _ = catch_unwind(AssertUnwindSafe(|| {
                with_vcpu_panic_ctx(ctx, || panic!("test: alive flip"));
            }));
        })
        .join()
        .unwrap();

        std::panic::set_hook(saved);

        assert!(
            !alive_seen_by_prev.load(Ordering::Acquire),
            "prev hook must observe alive == false — our hook \
             must flip alive synchronously before delegating",
        );
        assert!(
            !alive.load(Ordering::Acquire),
            "alive must remain false post-panic",
        );
    }
}