ktstr 0.17.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
//! Test-only macros shared across the crate.
//!
//! Hoisted to crate root via `#[macro_use]` on the module declaration
//! in `lib.rs`, so `skip!` and `skip_on_contention!` are reachable from
//! any `#[cfg(test)]` code without an explicit `use`.

/// Emit a canonical `ktstr: SKIP: ...` message and return from the
/// caller. Routes through [`crate::report::test_skip`] so the
/// prefix lives in one place — the alternative (15+ open-coded
/// `eprintln!` sites) drifts into inconsistent casings that break
/// every grep-based test-summary tool.
///
/// Only callable from functions returning `()` — the macro expands to
/// an early `return;` with no value. Production code that returns a
/// non-unit type (dispatcher fns returning `i32`, helpers returning
/// `Option<T>`, loop bodies that `continue`) calls
/// [`crate::report::test_skip`] directly and drives its own control
/// flow.
macro_rules! skip {
    // Zero-args arm: `skip!()` emits the banner with an empty
    // reason. `format_args!()` itself requires at least a format
    // string, so the variadic arm below cannot handle this case
    // — a dedicated rule routes it to an empty literal.
    () => {{
        $crate::report::test_skip(format_args!(""));
        return;
    }};
    ($($arg:tt)*) => {{
        $crate::report::test_skip(format_args!($($arg)*));
        return;
    }};
}

/// Evaluate a `Result`-returning builder (or any `anyhow::Result`
/// expression) and either unwrap the value or skip gracefully on a
/// skip-class host-insufficiency error. Routes the error through the
/// shared `crate::test_support::classify_host_error` — the single source
/// of truth also used by `err_to_exit_code` and the `#[ktstr_test]` macro
/// body — so this helper can never drift from them. A `HostClass::Skip`
/// (no kernel resolved, resource contention, topology insufficient, or
/// perf-mode unavailable — chain-aware, so a `.context(...)`-wrapped
/// instance still skips) emits
/// the canonical SKIP banner and early-returns. Everything else panics
/// with `{e:#}`: a `HostClass::Fail` (the hard-error
/// `CpuBudgetUnsatisfiable` / `TopologyUnrepresentable`) and any
/// non-host-class error are real failures, not skips. `no_skip` is passed
/// `false` — this helper always skips the skip-class errors and has no
/// `KTSTR_NO_SKIP_MODE` promotion (unchanged from its prior behavior).
///
/// Replaces the recurring `match ... { Ok => v, Err(e) if
/// ResourceContention => skip!(...), Err(e) => panic!(...) }`
/// boilerplate. Inherits `skip!`'s early-return behavior, so callers
/// must return `()`.
macro_rules! skip_on_contention {
    ($expr:expr) => {
        match $expr {
            Ok(v) => v,
            Err(e) => match $crate::test_support::classify_host_error(&e, false) {
                $crate::test_support::HostClass::Skip { reason } => {
                    skip!("{reason}");
                }
                // Fail (cpu-budget / topology-unrepresentable) and any
                // non-host-class error are real failures, not skips —
                // panic, exactly as the prior open-coded catch-all did
                // (those were never in this helper's skip set). no_skip is
                // false above, so the skip-class errors classify as Skip
                // here, never Fail.
                _ => panic!("{e:#}"),
            },
        }
    };
}

/// Skip the calling test when the current process lacks a Linux
/// capability. Uses `prctl(PR_CAPBSET_READ, cap)` to probe the
/// capability bounding set — returns 1 when the cap is present, 0
/// when absent, -1 on EINVAL (unknown cap number).
///
/// Typical use: `require_capability!(libc::CAP_SYS_RESOURCE);` at the
/// top of a test that calls `setrlimit` to raise a hard limit.
#[allow(unused_macros)]
macro_rules! require_capability {
    ($cap:expr) => {{
        let ret = unsafe { libc::prctl(libc::PR_CAPBSET_READ, $cap, 0, 0, 0) };
        if ret != 1 {
            skip!(
                "missing capability {} (prctl PR_CAPBSET_READ returned {})",
                stringify!($cap),
                ret
            );
        }
    }};
}

/// Compile-time pin that a type does NOT impl [`Default`]. Mirrors
/// `static_assertions::assert_not_impl_any!(T, Default)` without
/// taking the dep — the `AmbiguousIfImpl<_>` blanket-vs-specialized
/// impl trick produces a compile error when both impls match for the
/// target type, which only happens if `T: Default`.
///
/// Use when a type's docs forbid `Default` because the zero / unset
/// state is semantically invalid. Existing call sites: `CgroupDef`
/// (name="cg_0" footgun), `Migration` (zeroed migration = self-
/// migration is contradictory), `WorkerExitInfo` (default-pick of
/// TimedOut variant masquerades as a real outcome), and
/// `DualFailureDumpReport` (default-empty `late` report would
/// silently lie about a successful capture). When adding a new
/// call site, append it here so a future debugger of an
/// AmbiguousIfImpl compile error sees the existing pattern.
///
/// Expands to a `const _: fn() = ...` block; safe to invoke at module
/// scope inside `#[cfg(test)] mod tests` or anywhere a `const` item
/// is valid.
#[allow(unused_macros)]
macro_rules! assert_not_impl_default {
    ($t:ty) => {
        const _: fn() = || {
            trait AmbiguousIfImpl<A> {
                fn some_item() {}
            }
            impl<T: ?Sized> AmbiguousIfImpl<()> for T {}
            #[allow(dead_code)]
            struct InvalidDefault;
            impl<T: ?Sized + Default> AmbiguousIfImpl<InvalidDefault> for T {}
            let _ = <$t as AmbiguousIfImpl<_>>::some_item;
        };
    };
}

#[cfg(test)]
mod tests {
    use crate::vmm::host_topology::{
        CpuBudgetUnsatisfiable, PerfModeUnavailable, ResourceContention, TopologyInsufficient,
    };

    /// Regression for the error-chain fix: a ResourceContention wrapped
    /// in `.context(...)` must still be recognized by the macro and
    /// trigger the `skip!` branch instead of the `panic!` branch.
    ///
    /// `#[cfg(panic = "unwind")]`: this test uses `std::panic::catch_unwind`
    /// to assert the macro does NOT panic. Under `panic = "abort"` (the
    /// release profile's setting — see `Cargo.toml [profile.release]`)
    /// panics cannot be caught; the panic aborts the whole test binary
    /// instead of returning an `Err` from `catch_unwind`. Gating the
    /// test on the panic strategy lets `cargo ktstr test --release`
    /// skip it without false-failing the binary.
    #[test]
    #[cfg(panic = "unwind")]
    fn skip_on_contention_walks_context_chain() {
        let result = std::panic::catch_unwind(|| {
            fn skip_fn() {
                let err: anyhow::Error = anyhow::Error::new(ResourceContention {
                    reason: "simulated contention".into(),
                })
                .context("wrapping context layer 1")
                .context("wrapping context layer 2");
                let _: () = skip_on_contention!(Err::<(), _>(err));
                unreachable!("skip_on_contention! should have early-returned");
            }
            skip_fn();
        });
        assert!(
            result.is_ok(),
            "context-wrapped ResourceContention must skip, not panic"
        );
    }

    /// Unwrapped ResourceContention keeps working (no regression on the
    /// simple path).
    ///
    /// `#[cfg(panic = "unwind")]`: same rationale as the sibling
    /// context-chain test — `catch_unwind` is unusable under
    /// `panic = "abort"`.
    #[test]
    #[cfg(panic = "unwind")]
    fn skip_on_contention_recognizes_direct_error() {
        let result = std::panic::catch_unwind(|| {
            fn skip_fn() {
                let err: anyhow::Error = anyhow::Error::new(ResourceContention {
                    reason: "direct contention".into(),
                });
                let _: () = skip_on_contention!(Err::<(), _>(err));
                unreachable!("skip_on_contention! should have early-returned");
            }
            skip_fn();
        });
        assert!(
            result.is_ok(),
            "direct ResourceContention must skip, not panic"
        );
    }

    /// Non-contention errors still panic (negative case).
    #[test]
    #[should_panic(expected = "unrelated error")]
    fn skip_on_contention_panics_on_non_contention_error() {
        fn skip_fn() {
            let err = anyhow::anyhow!("unrelated error");
            let _: () = skip_on_contention!(Err::<(), _>(err));
        }
        skip_fn();
    }

    /// A [`TopologyInsufficient`] (the VM cannot boot on this host — a kvm
    /// hardware cap) routes to skip, including when wrapped in
    /// `.context(...)`.
    ///
    /// `#[cfg(panic = "unwind")]`: same rationale as the
    /// ResourceContention skip tests — `catch_unwind` is unusable
    /// under `panic = "abort"`.
    #[test]
    #[cfg(panic = "unwind")]
    fn skip_on_contention_skips_topology_insufficient() {
        let result = std::panic::catch_unwind(|| {
            fn skip_fn() {
                let err: anyhow::Error = anyhow::Error::new(TopologyInsufficient {
                    reason: "vCPU count 600 exceeds KVM_CAP_MAX_VCPUS 512; cannot boot a VM \
                             this wide"
                        .into(),
                })
                .context("build ktstr_test VM");
                let _: () = skip_on_contention!(Err::<(), _>(err));
                unreachable!("skip_on_contention! should have early-returned");
            }
            skip_fn();
        });
        assert!(
            result.is_ok(),
            "context-wrapped TopologyInsufficient must skip, not panic",
        );
    }

    /// A [`PerfModeUnavailable`] (the host fundamentally cannot honor
    /// perf-mode — too few CPUs for an exclusive LLC + a service CPU)
    /// routes to skip, including when wrapped in `.context(...)`. Pins the
    /// `skip_on_contention!` perf-mode arm above its `Err(e) => panic!`
    /// catch-all: a future reorder that drops it below the catch-all would
    /// compile but panic real perf-incapable hosts.
    ///
    /// `#[cfg(panic = "unwind")]`: same rationale as the
    /// ResourceContention / TopologyInsufficient skip tests —
    /// `catch_unwind` is unusable under `panic = "abort"`.
    #[test]
    #[cfg(panic = "unwind")]
    fn skip_on_contention_skips_perf_mode_unavailable() {
        let result = std::panic::catch_unwind(|| {
            fn skip_fn() {
                let err: anyhow::Error = anyhow::Error::new(PerfModeUnavailable {
                    reason: "host too small for perf topology".into(),
                })
                .context("build ktstr_test VM");
                let _: () = skip_on_contention!(Err::<(), _>(err));
                unreachable!("skip_on_contention! should have early-returned");
            }
            skip_fn();
        });
        assert!(
            result.is_ok(),
            "context-wrapped PerfModeUnavailable must skip, not panic",
        );
    }

    /// Anti-fragility: a plain error whose message HAPPENS to contain
    /// "need" + "CPU" but carries no typed skip-class error must PANIC
    /// (it is a real failure), not skip. The replaced string-match
    /// (`"need"` + `"CPU"`/`"LLC"`) would have wrongly skipped this.
    #[test]
    #[should_panic(expected = "did not get the CPU")]
    fn skip_on_contention_panics_on_unrelated_need_cpu_message() {
        fn skip_fn() {
            let err =
                anyhow::anyhow!("scheduler regression: workload did not get the CPU time it needs");
            let _: () = skip_on_contention!(Err::<(), _>(err));
        }
        skip_fn();
    }

    /// A typed HARD-FAIL host error is NOT in skip_on_contention!'s skip
    /// set: classify_host_error returns HostClass::Fail for a
    /// CpuBudgetUnsatisfiable (an explicit cpu budget the host cannot
    /// satisfy), which the macro's `_ =>` arm panics — a typed hard-fail
    /// must never be swallowed as a skip. Pins the Fail->panic boundary the
    /// classify_host_error routing depends on; the skip tests cover the
    /// Skip set (RC/TI/perf) and the plain-NotHostClass panics, but not
    /// this typed-Fail edge.
    #[test]
    #[should_panic(expected = "exceeds the allowed cpuset")]
    fn skip_on_contention_panics_on_typed_hard_fail() {
        fn skip_fn() {
            let err: anyhow::Error = anyhow::Error::new(CpuBudgetUnsatisfiable {
                reason: "cpu_budget = 999 exceeds the allowed cpuset".into(),
            });
            let _: () = skip_on_contention!(Err::<(), _>(err));
        }
        skip_fn();
    }

    /// The `skip!` macro must emit the canonical `ktstr: SKIP:
    /// <reason>` banner to stderr AND early-return from the calling
    /// function. Prior tests exercise `test_skip` (the lower-level
    /// emitter) and `skip_on_contention!` (the wrapper macro) but
    /// the bare `skip!` macro was left uncovered — a regression that
    /// silently broke the format_args expansion or the `return;`
    /// tail would slip through until a downstream consumer
    /// parsed the wrong line.
    ///
    /// This test uses the crate-shared stderr-capture helper and
    /// verifies BOTH invariants: the captured bytes carry the
    /// canonical banner, and a post-`skip!` line in the helper fn
    /// is never reached (pinned via a sentinel flag).
    #[test]
    fn skip_macro_emits_banner_and_early_returns() {
        use crate::test_support::test_helpers::capture_stderr;
        use std::sync::atomic::{AtomicBool, Ordering};

        let reached_tail = AtomicBool::new(false);
        let (_, bytes) = capture_stderr(|| {
            // Helper fn returning `()` so `skip!` can emit its
            // `return;` tail. The AtomicBool is set only if the
            // line AFTER `skip!` executes — a regression that
            // dropped the `return;` tail would trip it. The two
            // `#[allow(...)]` attributes are load-bearing: when
            // `skip!` correctly returns, `reached.store` is dead
            // code AND `reached` falls out of the live set —
            // which is exactly what this test is designed to
            // pin. Without the allows, compilation warns about
            // the very invariant the test verifies.
            #[allow(unused_variables, unreachable_code)]
            fn helper(reached: &AtomicBool) {
                skip!("macro-level reason with {} substitution", "format-args");
                reached.store(true, Ordering::SeqCst);
            }
            helper(&reached_tail);
        });
        let text = std::str::from_utf8(&bytes).expect("stderr is UTF-8");
        assert_eq!(
            text, "ktstr: SKIP: macro-level reason with format-args substitution\n",
            "expected canonical banner with format-args substitution",
        );
        assert!(
            !reached_tail.load(Ordering::SeqCst),
            "skip! must early-return; lines after the macro must not execute",
        );
    }

    /// `skip!` with a literal (no format args) still emits the
    /// banner. Pairs with the substitution test above to cover the
    /// no-args branch of the `format_args!($($arg)*)` expansion.
    #[test]
    fn skip_macro_literal_reason_emits_banner() {
        use crate::test_support::test_helpers::capture_stderr;
        let (_, bytes) = capture_stderr(|| {
            fn helper() {
                skip!("literal skip reason");
            }
            helper();
        });
        let text = std::str::from_utf8(&bytes).unwrap();
        assert_eq!(text, "ktstr: SKIP: literal skip reason\n");
    }

    /// `skip!()` with ZERO arguments expands to
    /// `format_args!()` — an empty reason. The banner still fires
    /// with the canonical prefix + colon + empty tail + newline.
    /// Pins the degenerate-input behavior so a regression that
    /// rejected zero-argument expansion (e.g. a macro arm
    /// requiring at least one token tree) fails here instead of at
    /// some downstream call site that happens to call `skip!()`
    /// for "I don't care why, just skip" semantics.
    #[test]
    fn skip_macro_zero_args_emits_banner_with_empty_reason() {
        use crate::test_support::test_helpers::capture_stderr;
        let (_, bytes) = capture_stderr(|| {
            fn helper() {
                skip!();
            }
            helper();
        });
        let text = std::str::from_utf8(&bytes).unwrap();
        assert_eq!(text, "ktstr: SKIP: \n");
    }

    /// Pin the contract that the `#[ktstr_test]` macro's generated
    /// expect_ok body relies on: when `run_ktstr_test` returns
    /// `Err(ResourceContention)` (possibly wrapped in `.context(...)`),
    /// the macro must NOT panic — it must emit the canonical
    /// `ktstr: SKIP: resource contention: ...` banner and return. The
    /// macro lives in `ktstr-macros` and expands to a `match` whose
    /// catch-all `Err(e)` arm routes through the REAL
    /// [`crate::test_support::classify_host_error`] (the shared
    /// single-source-of-truth classifier, also used by
    /// `err_to_exit_code`) and maps a [`HostClass::Skip`] to
    /// `eprintln! + return`. We can't invoke the proc-macro from a unit
    /// test, but we CAN exercise the real classifier + the same
    /// control-flow shape and assert the observable behaviour: the
    /// canonical banner is emitted (the extracted reason, NOT the noisy
    /// `.context(...)` chain), the post-arm sentinel never executes, and
    /// the function never panics.
    ///
    /// `no_skip` is passed `false` directly (rather than read from the
    /// env) so the test deterministically exercises the skip-default
    /// path regardless of ambient `KTSTR_NO_SKIP_MODE` — the env read is
    /// the macro's concern, not the classifier's (its env-independence is
    /// the whole testability win).
    ///
    /// `#[cfg(panic = "unwind")]`: same rationale as the sibling
    /// `skip_on_contention_walks_context_chain` test —
    /// `catch_unwind` is unusable under `panic = "abort"`.
    #[test]
    #[cfg(panic = "unwind")]
    fn ktstr_test_macro_body_skips_on_resource_contention() {
        use crate::test_support::test_helpers::capture_stderr;
        use crate::test_support::{HostClass, classify_host_error};
        use crate::vmm::host_topology::ResourceContention;
        use std::sync::atomic::{AtomicBool, Ordering};

        let reached_tail = AtomicBool::new(false);
        let result = std::panic::catch_unwind(|| {
            let (_, bytes) = capture_stderr(|| {
                // Simulates the catch-all `Err(e)` arm of the body that
                // `ktstr-macros::ktstr_test` expands into for a
                // non-`expect_err` test: classify via the real shared fn,
                // map Skip -> SKIP banner + return. The trailing store
                // must not execute.
                #[allow(unused_variables, unreachable_code)]
                fn helper(reached: &AtomicBool) {
                    let result: Result<(), anyhow::Error> =
                        Err(anyhow::Error::new(ResourceContention {
                            reason: "all 3 LLC slots busy".into(),
                        })
                        .context("build ktstr_test VM"));
                    match result {
                        Ok(_) => {}
                        Err(e) => match classify_host_error(&e, false) {
                            HostClass::Skip { reason } => {
                                eprintln!("ktstr: SKIP: {reason}");
                                return;
                            }
                            HostClass::Fail { reason } => panic!("ktstr: FAIL: {reason}"),
                            HostClass::NotHostClass => panic!("{e:#}"),
                        },
                    }
                    reached.store(true, Ordering::SeqCst);
                }
                helper(&reached_tail);
            });
            let text = std::str::from_utf8(&bytes).expect("stderr is UTF-8");
            assert_eq!(
                text, "ktstr: SKIP: resource contention: all 3 LLC slots busy\n",
                "expected the canonical SKIP banner with the extracted reason \
                 (no .context(...) chain noise); got: {text:?}",
            );
        });
        assert!(
            result.is_ok(),
            "macro body must NOT panic on ResourceContention",
        );
        assert!(
            !reached_tail.load(Ordering::SeqCst),
            "macro body must early-return after emitting the SKIP banner",
        );
    }
}