ktstr 0.15.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
//! Host-side post_vm plumbing: the LlmExtract model-load skip sentinel,
//! the post_vm error marker types (ScxBpfErrorMatcherMismatch,
//! PostVmAssertionFailure, HostSkipRequest, ExpectAutoReproSatisfied),
//! the conditional/unconditional callback combiner + dispatch, the
//! post_vm_skip helper, and skip-sidecar recording. Split out of
//! eval/mod.rs to keep the module under the size ceiling.

use super::*;

/// Sentinel prefix on the `AssertDetail` message that
/// [`host_side_llm_extract`] emits when the host-side LLM model could
/// not load (cold-cache offline, or a cached GGUF incompatible with the
/// linked llama.cpp). The `run_ktstr_test` caller routes on this prefix
/// to SKIP the test rather than fail it — an unloadable model is an
/// unmet prerequisite (the extraction cannot run), not a test failure.
/// Single source of truth shared by the emit site and the caller's
/// skip check.
pub(crate) const LLM_MODEL_LOAD_FAILED_PREFIX: &str = "LlmExtract model load failed: ";

/// Decide whether an unloadable host LLM model should SKIP the test (vs
/// fail it). Returns `Some(skip_reason)` when the host-side extraction
/// failed only because the model could not load
/// ([`LLM_MODEL_LOAD_FAILED_PREFIX`]) AND no host-side `post_vm` callback
/// failed — an unloadable model is an unmet prerequisite, not a test
/// failure. Returns `None` (fall through to the normal verdict) when
/// there is no model-load failure, OR a `post_vm` callback failed: a real
/// host-side regression DOMINATES a missing-prereq skip and must never be
/// masked by it. Pure so the skip-vs-fail precedence is unit-tested
/// without the full eval pipeline.
pub(crate) fn should_skip_on_llm_model_load_failure(
    host_extract_failures: &[crate::assert::AssertDetail],
    post_vm_failed: bool,
) -> Option<String> {
    if post_vm_failed {
        return None;
    }
    host_extract_failures
        .iter()
        .find(|d| d.message.starts_with(LLM_MODEL_LOAD_FAILED_PREFIX))
        .map(|d| d.message.clone())
}

#[cfg(test)]
mod should_skip_on_llm_model_load_failure_tests {
    //! Truth table for the LLM-model-load skip-vs-fail precedence. Locks
    //! in that an unloadable model skips, but a host-side post_vm failure
    //! dominates (no skip — a real regression is never masked), and a
    //! non-model failure never skips. A revert of any arm flips a cell.
    use super::{LLM_MODEL_LOAD_FAILED_PREFIX, should_skip_on_llm_model_load_failure};
    use crate::assert::{AssertDetail, DetailKind};

    fn model_load_failure() -> AssertDetail {
        AssertDetail::new(
            DetailKind::Other,
            format!("{LLM_MODEL_LOAD_FAILED_PREFIX}cold-cache offline"),
        )
    }

    #[test]
    fn model_load_failure_no_post_vm_skips() {
        // Unmet prerequisite + no host-side regression → SKIP.
        let failures = vec![model_load_failure()];
        assert!(should_skip_on_llm_model_load_failure(&failures, false).is_some());
    }

    #[test]
    fn model_load_failure_with_post_vm_does_not_skip() {
        // A post_vm regression dominates the missing-prereq skip → FAIL
        // (None: fall through to the verdict).
        let failures = vec![model_load_failure()];
        assert!(should_skip_on_llm_model_load_failure(&failures, true).is_none());
    }

    #[test]
    fn non_model_failure_does_not_skip() {
        // A non-model-load failure (a real assertion failure) must not be
        // masked as a skip.
        let failures = vec![AssertDetail::new(
            DetailKind::Other,
            "metric out of declared range".to_string(),
        )];
        assert!(should_skip_on_llm_model_load_failure(&failures, false).is_none());
    }

    #[test]
    fn no_failures_does_not_skip() {
        assert!(should_skip_on_llm_model_load_failure(&[], false).is_none());
    }

    #[test]
    fn model_load_failure_among_others_skips() {
        // `.find` scans all entries: a model-load failure that is not the
        // first detail still triggers the skip, and the returned reason is
        // the prefix-bearing one (pins the iteration, not just `[0]`).
        let failures = vec![
            AssertDetail::new(
                DetailKind::Other,
                "metric out of declared range".to_string(),
            ),
            model_load_failure(),
        ];
        let skip = should_skip_on_llm_model_load_failure(&failures, false);
        assert!(
            skip.as_deref()
                .is_some_and(|m| m.starts_with(LLM_MODEL_LOAD_FAILED_PREFIX)),
            "the model-load detail (2nd in the vec) must be found + returned; got {skip:?}",
        );
    }
}

/// Marker error type attached as `anyhow::Context` to the failure
/// `Err` produced when an scx_bpf_error matcher
/// ([`crate::assert::Assert::expect_scx_bpf_error_contains`] or
/// [`crate::assert::Assert::expect_scx_bpf_error_matches`]) mismatched
/// the captured scheduler log / sched_ext dump corpus.
///
/// Dispatch (`crate::test_support::dispatch::result_to_exit_code`)
/// downcasts the error chain for this marker in the `expect_err = true`
/// branch and refuses to invert the verdict to a pass — a reproducer
/// that fired the WRONG bug must fail loudly, not silently invert to
/// "test passed" via `expect_err`. Without the marker, the matcher's
/// diagnostic surfaces in stderr but the exit code follows the normal
/// expect_err inversion path.
#[derive(Debug, Clone, Copy)]
pub(crate) struct ScxBpfErrorMatcherMismatch;

impl std::fmt::Display for ScxBpfErrorMatcherMismatch {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "scx_bpf_error matcher mismatch — the reproducer matcher rejected \
             this failure mode; expect_err inversion bypassed"
        )
    }
}

impl std::error::Error for ScxBpfErrorMatcherMismatch {}

/// Marker error type attached as `anyhow::Context` to the failure
/// `Err` produced by `run_ktstr_test_inner_impl` when a host-side
/// `post_vm` / `post_vm_unconditional` callback returned `Err`
/// (which `evaluate_vm_result` has already folded into the verdict —
/// as an `Other` detail in the parse-success arm, as a message prefix
/// in the parse-fail arms).
///
/// Dispatch (`crate::test_support::dispatch::result_to_exit_code`)
/// downcasts the error chain for this marker and refuses to invert the
/// verdict to a pass — even under `expect_err = true`. The semantic
/// boundary: `expect_err` inverts a GUEST-side expected failure (the
/// scheduler stalled, the workload bailed), but a HOST-side `post_vm`
/// assertion is always honored. A failure-dump render test that
/// triggers an expected stall to PRODUCE the dump, then asserts the
/// dump's contents in `post_vm`, must fail loudly when the dump renders
/// wrong — not silently invert to "passed" because the stall it relied
/// on was "expected". Without the marker, the post_vm diagnostic
/// surfaces in stderr but the exit code follows the normal expect_err
/// inversion path (a false PASS).
///
/// Mirrors [`ScxBpfErrorMatcherMismatch`]: same `anyhow::Context`
/// attachment, same `downcast_ref` chain-walk at the dispatch arm. The
/// dispatch arm is positioned AFTER the resource-contention / topology
/// skip arms (a skip means the test never ran) but BEFORE the
/// [`ExpectAutoReproSatisfied`] and `expect_err` inversion arms, so a
/// real host-side regression wins over any inversion.
#[derive(Debug, Clone, Copy)]
pub(crate) struct PostVmAssertionFailure;

impl std::fmt::Display for PostVmAssertionFailure {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "host-side post_vm assertion failed — expect_err inversion bypassed \
             (a host-side check is honored even when the accompanying guest-side \
             failure is expected)"
        )
    }
}

impl std::error::Error for PostVmAssertionFailure {}

/// Marker error type attached as `anyhow::Context` to a `post_vm` /
/// `post_vm_unconditional` `Err` to request a test SKIP (not a
/// failure): the host-side callback determined the run is
/// INCONCLUSIVE — the VM could not produce the artifact the assertion
/// needs (e.g. a load-starved VM whose BPF probe never attached, so
/// the failure dump is a placeholder), as opposed to a real
/// regression. The eval fn detects this marker (context-aware
/// `downcast_ref`, near the LLM-model skip gate) and returns
/// [`crate::assert::AssertResult::skip`] instead of folding the `Err`
/// into the verdict.
///
/// A real [`PostVmAssertionFailure`] in a sibling callback DOMINATES:
/// [`combine_post_vm_errs`] preserves the skip marker only when BOTH
/// callbacks request skip (or only one callback ran); a genuine
/// failure alongside a skip request collapses to a failure, so a skip
/// request can never mask a regression.
#[derive(Debug, Clone, Copy)]
pub(crate) struct HostSkipRequest;

impl std::fmt::Display for HostSkipRequest {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "host-side post_vm requested skip — the run is inconclusive \
             (the VM could not produce the artifact the assertion needs)"
        )
    }
}

impl std::error::Error for HostSkipRequest {}

/// Marker error type attached as `anyhow::Context` to the failure
/// `Err` produced by `evaluate_vm_result` when
/// [`apply_expect_auto_repro_inversion`] has set
/// `result.expect_auto_repro_satisfied = true`: the primary VM
/// produced a Fail AND a shape-valid `.repro.wprof.pb` artifact
/// landed on disk from the auto-repro VM.
///
/// Dispatch (`crate::test_support::dispatch::result_to_exit_code`)
/// downcasts the error chain for this marker and routes the verdict
/// to `EXIT_PASS`. The underlying `AssertResult` is NOT mutated —
/// the original failure detail still surfaces in stderr/dump
/// rendering so an operator chasing why `expect_auto_repro` fired
/// sees the original failure trail alongside the inversion notice.
#[derive(Debug, Clone, Copy)]
pub(crate) struct ExpectAutoReproSatisfied;

impl std::fmt::Display for ExpectAutoReproSatisfied {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "expect_auto_repro satisfied — the primary test failed and the \
             auto-repro VM produced a shape-valid .repro.wprof.pb artifact; \
             verdict inverted to PASS"
        )
    }
}

impl std::error::Error for ExpectAutoReproSatisfied {}

/// Combine the conditional and unconditional `post_vm` failure
/// signals. When both callbacks fail in the same run, surface
/// BOTH errors in a single chained message so a debugging
/// operator sees both regressions on the first pass — a `.or()`
/// would silently drop the unconditional signal whenever the
/// conditional also fired, defeating the whole point of the
/// unconditional callback.
pub(crate) fn combine_post_vm_errs(
    conditional: Option<anyhow::Error>,
    unconditional: Option<anyhow::Error>,
) -> Option<anyhow::Error> {
    match (conditional, unconditional) {
        (Some(c), Some(u)) => {
            // A genuine failure dominates a skip request: collapse to a
            // skip only when BOTH callbacks requested skip (both
            // inconclusive). Otherwise a real PostVmAssertionFailure
            // must surface, so the chained message wins and the
            // HostSkipRequest marker is intentionally dropped.
            let both_skip = c.downcast_ref::<HostSkipRequest>().is_some()
                && u.downcast_ref::<HostSkipRequest>().is_some();
            let combined = anyhow::anyhow!("post_vm: {c:#}; post_vm_unconditional: {u:#}");
            Some(if both_skip {
                combined.context(HostSkipRequest)
            } else {
                combined
            })
        }
        (Some(c), None) => Some(c),
        (None, Some(u)) => Some(u),
        (None, None) => None,
    }
}

/// Request a test SKIP from a `post_vm` / `post_vm_unconditional`
/// callback: `return Err(post_vm_skip(reason))` when the run is
/// INCONCLUSIVE — the VM could not produce the artifact the assertion
/// needs (e.g. a load-starved VM whose BPF probe never attached,
/// leaving a placeholder failure dump), as distinct from a real
/// regression. The framework detects the attached `HostSkipRequest`
/// marker and converts the run to
/// [`crate::assert::AssertResult::skip`] instead of a failure.
///
/// A genuine `Err` from a sibling callback dominates (see
/// `combine_post_vm_errs`): a skip request never masks a regression.
pub fn post_vm_skip(reason: impl Into<String>) -> anyhow::Error {
    anyhow::anyhow!("{}", reason.into()).context(HostSkipRequest)
}

#[cfg(test)]
mod post_vm_skip_tests {
    //! Locks in the post_vm→skip mechanism. `post_vm_skip` attaches the
    //! [`HostSkipRequest`] marker (found by the context-aware
    //! `downcast_ref` the eval gate uses); `combine_post_vm_errs`
    //! preserves a lone skip request but lets a genuine sibling failure
    //! DOMINATE — a skip request must never mask a real regression. A
    //! revert of either the marker attach or the both-skip gate flips a
    //! cell here.
    use super::{HostSkipRequest, PostVmAssertionFailure, combine_post_vm_errs, post_vm_skip};

    fn real_fail() -> anyhow::Error {
        anyhow::anyhow!("real host-side regression").context(PostVmAssertionFailure)
    }

    #[test]
    fn post_vm_skip_carries_marker() {
        assert!(
            post_vm_skip("inconclusive: placeholder dump")
                .downcast_ref::<HostSkipRequest>()
                .is_some()
        );
    }

    #[test]
    fn combine_lone_unconditional_skip_preserved() {
        let c = combine_post_vm_errs(None, Some(post_vm_skip("ph"))).unwrap();
        assert!(c.downcast_ref::<HostSkipRequest>().is_some());
    }

    #[test]
    fn combine_lone_conditional_skip_preserved() {
        let c = combine_post_vm_errs(Some(post_vm_skip("ph")), None).unwrap();
        assert!(c.downcast_ref::<HostSkipRequest>().is_some());
    }

    #[test]
    fn combine_both_skip_yields_skip() {
        let c = combine_post_vm_errs(Some(post_vm_skip("a")), Some(post_vm_skip("b"))).unwrap();
        assert!(c.downcast_ref::<HostSkipRequest>().is_some());
    }

    #[test]
    fn combine_skip_plus_real_fail_does_not_skip() {
        // A genuine failure alongside a skip request collapses to a
        // failure: the combined Err must NOT carry HostSkipRequest, so the
        // eval gate folds it as a failure (re-attaching PostVmAssertionFailure)
        // rather than skipping — a regression is never masked.
        let c = combine_post_vm_errs(Some(post_vm_skip("ph")), Some(real_fail())).unwrap();
        assert!(c.downcast_ref::<HostSkipRequest>().is_none());
    }

    #[test]
    fn combine_real_fail_plus_skip_does_not_skip() {
        let c = combine_post_vm_errs(Some(real_fail()), Some(post_vm_skip("ph"))).unwrap();
        assert!(c.downcast_ref::<HostSkipRequest>().is_none());
    }
}

/// Invoke a `post_vm` / `post_vm_unconditional` callback with panic
/// catch. Converts a panic to `anyhow::Error` so the panic message
/// surfaces in the test failure output AND the rest of the
/// post-VM teardown (`write_placeholder_failure_dump_if_missing`,
/// `drop(vm)` releasing CPU/LLC flocks + guest memory + kernel-cache
/// reader flock) still runs.
///
/// Without the catch, a panicking callback would unwind past the
/// placeholder-dump emission and past `drop(vm)`, leaking VM
/// resources (flocks, guest memory) until process exit or the next
/// test's drop reclaims them. Same hazard for `Ok` returns from
/// callbacks that subsequently panic in their inner state — both
/// paths fold into this single guard.
///
/// `label` is woven into the error message so the operator sees
/// which callback panicked (`post_vm` vs `post_vm_unconditional`)
/// when both are wired and both fire.
///
/// Returns `Some(err)` when the callback returns `Err` OR panics;
/// returns `None` when the callback returns `Ok(())`. Mirrors the
/// shape `.err()` produces from `Result` so the caller's
/// `.and_then(|cb| ...)` flows unchanged.
///
/// Under `panic = "abort"` (release builds — see `Cargo.toml
/// [profile.release]`), `catch_unwind` is a no-op: a panic aborts
/// the process before this function returns. The wrap is still
/// safe — `catch_unwind` is always defined, just inert — and the
/// debug builds get the leak protection that exposes regressions
/// before they ship.
/// Dispatch the entry's `post_vm` + `post_vm_unconditional`
/// callbacks and combine their failure signals.
///
/// - `post_vm` runs only when the guest reported a non-Fail
///   `AssertResult` (Skip / Inconclusive / Pass) — the
///   `guest_already_failed` parameter folds the
///   `parse_assert_result_from_drain` lookup the call site does.
///   The skip mirrors the suppression contract documented on
///   `KtstrTestEntry::post_vm`.
///
/// - `post_vm_unconditional` ALWAYS runs — bypasses the
///   guest-fail suppression that gates `post_vm`. The callback
///   owns its own skip-on-crash logic (or doesn't, when the
///   intent is "assert on host-side artifact regardless of
///   guest-side outcome").
///
/// Both callbacks route through [`invoke_post_vm_callback`] so a
/// panic in either body becomes an `anyhow::Error` rather than
/// unwinding past the call site (which would leak VM resources;
/// see the helper doc).
///
/// Returns the combined `Option<anyhow::Error>` via
/// [`combine_post_vm_errs`]: when both callbacks fail, the
/// chained message names both errors so the operator sees both
/// regressions on the first pass instead of a two-pass debug
/// cycle. `.or()` would silently drop the unconditional fail
/// when the conditional also fired.
pub(crate) fn run_post_vm_callbacks(
    entry: &KtstrTestEntry,
    result: &crate::vmm::VmResult,
    guest_already_failed: bool,
) -> Option<anyhow::Error> {
    let conditional = if guest_already_failed {
        None
    } else {
        entry
            .post_vm
            .and_then(|cb| invoke_post_vm_callback(cb, result, "post_vm"))
    };
    let unconditional = entry
        .post_vm_unconditional
        .and_then(|cb| invoke_post_vm_callback(cb, result, "post_vm_unconditional"));
    combine_post_vm_errs(conditional, unconditional)
}

pub(crate) fn invoke_post_vm_callback(
    cb: super::super::PostVmCallback,
    result: &crate::vmm::VmResult,
    label: &'static str,
) -> Option<anyhow::Error> {
    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| cb(result))) {
        Ok(Ok(())) => None,
        Ok(Err(e)) => Some(e),
        Err(payload) => {
            let msg = if let Some(s) = payload.downcast_ref::<&'static str>() {
                (*s).to_string()
            } else if let Some(s) = payload.downcast_ref::<String>() {
                s.clone()
            } else {
                "<non-string panic payload>".to_string()
            };
            Some(anyhow::anyhow!("{label} callback panicked: {msg}"))
        }
    }
}

/// Write a skip sidecar for `entry`, logging to stderr on failure
/// without propagating the error. Used at six sites — the four in
/// [`run_ktstr_test_inner`] (the wrapper's catch-all that fires
/// for any pre-VM-build ResourceContention, the performance_mode
/// gate, and the two `ResourceContention` arms at VM build + VM
/// run) and the two in `super::dispatch` (performance_mode gates
/// at the plain-run entry points) — all of which must record the
/// skip for stats tooling but cannot meaningfully handle a
/// sidecar-write failure beyond logging it. The skip itself is
/// still valid; only post-run stats tooling loses visibility.
pub(crate) fn record_skip_sidecar(entry: &KtstrTestEntry) {
    if let Err(e) = write_skip_sidecar(entry) {
        // Dual-emit at warn level: an unwritten skip sidecar costs
        // the run no correctness — the test still skipped — but
        // silently drops post-run stats tooling's visibility into
        // the skip, so operators debugging a missing row in a
        // gauntlet report need a loud-enough log to notice. The
        // eprintln surfaces under direct nextest / cargo-ktstr
        // invocations where no tracing subscriber is installed;
        // the tracing::warn lands in every structured-log consumer
        // (cargo-ktstr, downstream pipelines) at warn level rather
        // than the previous implicit debug visibility.
        let entry_name = entry.name;
        let rendered = format!("{e:#}");
        eprintln!("ktstr_test: warn: skip-sidecar write failed for {entry_name}: {rendered}");
        tracing::warn!(
            test = %entry_name,
            err = %rendered,
            "skip-sidecar write failed — stats tooling will not see this skip",
        );
    }
}