ktstr 0.10.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
//! Runtime support for `#[ktstr_test]` integration tests.
//!
//! Provides the registration type, distributed slice, VM launcher,
//! and result evaluation. Includes guest-side profraw flush for
//! coverage-instrumented builds.
//!
//! The entry point for test authors is the [`macro@crate::ktstr_test`]
//! attribute macro; see the user-facing Writing Tests guide shipped
//! with the crate's mdbook for end-to-end examples and the full
//! attribute grammar.
//!
//! # Consumer API
//!
//! Test authors interact primarily with the `#[ktstr_test]` proc
//! macro; programmatic test generation can instead populate
//! [`KtstrTestEntry`] values into the [`KTSTR_TESTS`]
//! `linkme` distributed slice. The remaining items in this module
//! are runtime glue invoked by the macro-generated code and the
//! `ktstr` / `cargo-ktstr` binaries.
//!
//! # Module layout
//!
//! Implementation is split across 17 production submodules
//! re-exported at `test_support::*` for a flat public API: `args`
//! (CLI argument extraction), `dispatch` (ktstr / cargo-ktstr CLI
//! entry points), `entry` (scheduler + test-entry types), `eval`
//! (host-side VM result evaluation), `metrics` (payload stdout →
//! `Metric` list), `model` (LLM backend + model cache), `output`
//! (guest-output and console parsing), `payload` (`Payload` /
//! `MetricCheck` / `Metric` / `Polarity`), `probe` (auto-repro and
//! BPF probe pipeline), `probe_metrics` (host-side BPF map
//! introspection), `profraw` (coverage flush), `runtime` (`pub mod`
//! — neutral home for verbose/shm-size/config-file-parts shared by
//! eval and probe so they don't circularly depend on each other),
//! `shell_descriptor` (wire-format struct shared between the test
//! binary's `--ktstr-shell-test=<NAME>` producer and cargo-ktstr's
//! shell-mode consumer), `sidecar` (per-run JSON records), `staged`
//! (`pub(crate) mod` — staged-payload writer), `timefmt` (ISO-8601
//! + run-id helpers), and `topo` (topology override parsing).
//!
//! A `#[cfg(test)] pub(crate) mod test_helpers` exists for cross-file
//! test wiring; it is not part of the production surface.

#[cfg(test)]
use crate::assert::AssertResult;
#[cfg(test)]
use crate::scenario::Ctx;
#[cfg(test)]
use anyhow::Result;

mod args;
mod dispatch;
mod entry;
mod eval;
mod metrics;
#[cfg(feature = "llm")]
mod model;
mod output;
mod payload;
mod probe;
mod probe_metrics;
mod profraw;
pub use eval::post_vm_skip;
pub use profraw::current_binary_is_coverage_instrumented;
pub mod runtime;
mod shell_descriptor;
pub use shell_descriptor::{SchedulerKind, ShellTestDescriptor};
#[cfg(feature = "wprof")]
pub mod wprof;
#[cfg(feature = "wprof")]
pub use wprof::{PERFETTO_TRACE_PACKETS_TAG, WPROF_PB_MIN_BYTES, assert_wprof_pb_shape};
mod sidecar;
pub(crate) mod staged;
#[cfg(test)]
pub(crate) mod test_helpers;
mod timefmt;
mod topo;

/// Shared callback signature for the
/// [`KtstrTestEntry::post_vm`](entry::KtstrTestEntry::post_vm) and
/// [`KtstrTestEntry::post_vm_unconditional`](entry::KtstrTestEntry::post_vm_unconditional)
/// host-side hooks. Both fields wrap this same shape in `Option<_>`;
/// the alias collapses the open-coded `fn(&crate::vmm::VmResult)
/// -> anyhow::Result<()>` repetition at the field declarations and
/// at the matching `with_post_vm{,_unconditional}` builder
/// parameters. Future post-VM hooks (e.g. an `expect_auto_repro`
/// artifact-existence checker) plug into the same shape without
/// triplicating the signature.
pub type PostVmCallback = fn(&crate::vmm::VmResult) -> anyhow::Result<()>;

// extract_probe_stack_arg and extract_work_type_arg are reached in
// production via `super::args::` (probe.rs, eval.rs); the re-export here
// preserves the flat-namespace invariant so `test_support::X` resolves
// uniformly across all CLI arg extractors.
#[cfg(feature = "export")]
pub(crate) use args::extract_export_output_arg;
#[allow(unused_imports)]
pub(crate) use args::{
    CellParentCgroupArg, cell_parent_path_is_valid, extract_export_test_arg,
    extract_probe_stack_arg, extract_shell_test_arg, extract_test_fn_arg, extract_topo_arg,
    extract_work_type_arg, parse_cell_parent_cgroup,
};
#[allow(unused_imports)]
pub(crate) use runtime::{append_base_sched_args, content_hash, scratch_dir, sys_rdy_budget_ms};
#[cfg(test)]
pub(crate) use sidecar::enriched_parse_error_message_for_test;
pub(crate) use sidecar::{
    SidecarIoError, SidecarParseError, apply_archive_source_override, collect_sidecars,
    collect_sidecars_with_errors, format_callback_profile, format_kvm_stats, format_verifier_stats,
    is_run_directory, is_sidecar_filename,
};
pub use sidecar::{
    SidecarResult, collect_pool, newest_run_dir, repo_is_dirty, runs_root, sidecar_dir,
};

pub use dispatch::{
    DEFAULT_HOST_CGROUP_PARENT, EXIT_FAIL, EXIT_INCONCLUSIVE, EXIT_PASS, analyze_sidecars,
    is_kernel_unavailable, is_resource_contention, ktstr_main, ktstr_test_early_dispatch,
    resolve_host_cgroup_parent, run_ktstr_test, sanitize_kernel_label,
};
pub use entry::{
    BinaryKindJson, BpfMapWrite, CgroupPath, KTSTR_SCHEDULERS, KTSTR_TESTS, KtstrTestEntry,
    MemSideCache, NumaDistance, NumaNode, Scheduler, SchedulerJson, SchedulerSpec, Sysctl,
    Topology, TopologyConstraints, TopologyConstraintsJson, TopologyJson,
    default_post_vm_periodic_fired, find_scheduler, find_test,
};
pub use eval::{KernelUnavailable, ResolveSource, resolve_scheduler, resolve_test_kernel};
pub(crate) use eval::{record_skip_sidecar, run_ktstr_test_inner};
pub use metrics::{
    MAX_WALK_DEPTH, WALK_TRUNCATION_SENTINEL_NAME, extract_metrics, is_truncation_sentinel_name,
    walk_json_leaves,
};
#[cfg(feature = "llm")]
pub use model::{
    CleanReport, DEFAULT_MODEL, LLM_DEBUG_RESPONSES_ENV, ModelSpec, ModelStatus, OFFLINE_ENV,
    ShaVerdict, clean, ensure, status,
};
pub(crate) use output::extract_panic_message;
pub use payload::{
    Metric, MetricBounds, MetricCheck, MetricHint, MetricSource, MetricStream, OutputFormat,
    Payload, PayloadKind, PayloadMetrics, Polarity,
};
pub(crate) use payload::{RawPayloadOutput, WireMetricHint};
pub(crate) use probe::maybe_dispatch_vm_test;
pub(crate) use probe::{
    finalize_probe_after_unwind, maybe_dispatch_vm_test_with_args,
    maybe_dispatch_vm_test_with_phase_a, propagate_rust_env_from_cmdline, start_probe_phase_a,
};
pub use probe_metrics::{
    MAX_SCAN_INDEX, ThreadLookup, count_indexed_metrics, find_metric, find_metric_u64,
    flat_metrics_dump, has_metric, lookup_thread, snapshot_count, snapshot_worker_allocated,
    thread_count,
};
pub use profraw::target_dir as profraw_target_dir;
pub(crate) use profraw::try_flush_profraw;
pub(crate) use timefmt::now_iso8601;
pub(crate) use topo::{TopoOverride, parse_topo_string};

/// Host capacity triple `(cpus, llcs, max_cpus_per_llc)` used to
/// filter gauntlet topology presets against what the host can actually
/// schedule. Both `dispatch::list_tests_*` (gauntlet variant filter)
/// and `dispatch::list_verifier_cells_all` (verifier sweep filter)
/// share this single source of truth so the two filters never drift.
/// Reads `available_parallelism()` for CPU count + `HostTopology::from_sysfs()`
/// for LLC layout; falls back to single-LLC + single-cpu-per-llc when
/// sysfs is unavailable.
pub fn host_capacity() -> (u32, u32, u32) {
    let host_cpus = std::thread::available_parallelism()
        .map(|n| n.get() as u32)
        .unwrap_or(1);
    let host_topo = crate::vmm::host_topology::HostTopology::from_sysfs().ok();
    let host_llcs = host_topo
        .as_ref()
        .map(|t| t.llc_groups.len() as u32)
        .unwrap_or(1);
    let host_max_cpus_per_llc = host_topo
        .as_ref()
        .map(|t| t.max_cores_per_llc() as u32)
        .unwrap_or(host_cpus);
    (host_cpus, host_llcs, host_max_cpus_per_llc)
}

// ---------------------------------------------------------------------------
// Test infrastructure requirements
// ---------------------------------------------------------------------------
//
// `require_*` helpers turn missing test infrastructure into a panic with
// an actionable message instead of a silent skip. Use them when a test
// is meaningless without the resource -- a missing kernel, vmlinux,
// scheduler binary, or kernel-symbol resolution means the harness is
// misconfigured, not that the test should pass quietly. CI silently
// passing 100 "tests" that all early-returned because no kernel was
// findable is the failure mode these helpers exist to prevent.
//
// For genuine skips (raw BTF at /sys/kernel/btf/vmlinux, host without
// the architectural dependency the test exercises), call the crate's
// `skip!("reason: {detail}")` macro (see `src/test_macros.rs`). It
// emits the canonical `ktstr: SKIP: ...` line and returns from the
// test.

/// Whether the current test process was launched by a cargo-ktstr
/// orchestration path (`cargo ktstr test`, `cargo ktstr verifier`)
/// vs. a raw `cargo nextest run` / `cargo test`.
///
/// Reads [`crate::KTSTR_ORCHESTRATED_ENV`]; only checks presence,
/// not value (cargo-ktstr always sets it to `"1"`, but the marker
/// semantics are presence-only). Returns `false` when the env var
/// is unset or unreadable.
///
/// Tests that boot real KVM VMs use this to skip when running
/// under raw nextest, where the 7000+-test concurrency starves
/// per-VM resource budgets and produces a misleading "kill set by
/// AP" failure that looks like a real bug. cargo-ktstr's
/// orchestrator constrains the VM-test concurrency so the budgets
/// hold; skipping under raw nextest surfaces the operator-error
/// (wrong runner) without masking real failures during proper
/// orchestrated runs.
///
/// `pub(crate)` — only callers are integration-test helpers under
/// `src/vmm/mod.rs`'s `#[cfg(test)]` mod. The env-var name itself
/// is `pub` via [`crate::KTSTR_ORCHESTRATED_ENV`] for
/// documentation purposes.
#[cfg(test)]
#[allow(dead_code)] // called from x86_64-only tests in vmm/mod.rs
pub(crate) fn cargo_ktstr_orchestrated() -> bool {
    std::env::var(crate::KTSTR_ORCHESTRATED_ENV).is_ok()
}

/// Skip-message body for vmm-boot tests that bail when the test
/// process wasn't launched by cargo-ktstr orchestration. The
/// canonical extended rationale lives inline at the
/// `boot_kernel_with_monitor` site; sibling sites reference back
/// to it via this shared const so a future message tweak lands in
/// one place instead of four. The 4 sibling sites previously
/// carried byte-for-byte-identical copies of this string — per
/// the no-mega-no-dupes policy the 3+-site threshold mandates a
/// shared const.
#[cfg(test)]
#[allow(dead_code)] // referenced from x86_64-only vmm/mod.rs tests
pub(crate) const SKIP_NOT_ORCHESTRATED_MSG: &str = "raw nextest fan-out starves KVM resource budgets — see \
     boot_kernel_with_monitor for the shared rationale. Run via \
     `cargo ktstr test --kernel ../linux`.";

#[cfg(test)]
mod cargo_ktstr_orchestrated_tests {
    //! Pin the env-var-presence detection contract. A regression
    //! that renamed [`crate::KTSTR_ORCHESTRATED_ENV`] silently
    //! would make every vmm-test skip even under cargo-ktstr
    //! orchestration (where the env IS set), turning the
    //! VM-boot test suite into an always-green no-op. Pin the
    //! two-arm contract (set → true, unset → false) so the rename
    //! surfaces here.
    use super::cargo_ktstr_orchestrated;
    use crate::test_support::test_helpers::{EnvVarGuard, lock_env};
    #[test]
    fn cargo_ktstr_orchestrated_true_when_env_set() {
        let _lock = lock_env();
        let _guard = EnvVarGuard::set(crate::KTSTR_ORCHESTRATED_ENV, "1");
        assert!(
            cargo_ktstr_orchestrated(),
            "KTSTR_ORCHESTRATED set → orchestrated check must return true"
        );
    }
    #[test]
    fn cargo_ktstr_orchestrated_false_when_env_unset() {
        let _lock = lock_env();
        let _guard = EnvVarGuard::remove(crate::KTSTR_ORCHESTRATED_ENV);
        assert!(
            !cargo_ktstr_orchestrated(),
            "KTSTR_ORCHESTRATED absent → orchestrated check must return false \
             (otherwise raw nextest invocations would run vmm tests and \
             starve KVM resource budgets)"
        );
    }
}

/// Resolve a kernel image path or panic with an actionable message.
///
/// Wraps [`crate::find_kernel`]: an `Err` (KTSTR_KERNEL points at a
/// path with no kernel image, cache lookup failed) and a successful
/// `Ok(None)` (no kernel discoverable) both panic. Tests that boot a
/// VM cannot proceed without a kernel; silently skipping turns CI
/// breakage into a green run.
#[cfg(test)]
#[allow(dead_code)] // called from x86_64-only tests in vmm/mod.rs
pub(crate) fn require_kernel() -> std::path::PathBuf {
    match crate::find_kernel() {
        Ok(Some(p)) => p,
        Ok(None) => panic!(
            "ktstr_test: test requires a kernel but none was found. {}",
            crate::KTSTR_KERNEL_HINT
        ),
        Err(e) => panic!("ktstr_test: kernel resolution failed: {e:#}"),
    }
}

/// Resolve a vmlinux path next to a kernel image or panic.
///
/// `kernel_path` is the value returned by [`require_kernel`]. The
/// vmlinux is required for symbol address lookup, BTF, and probe
/// source resolution -- a kernel image without vmlinux means the
/// cache entry is corrupt or the build was incomplete, which is an
/// infrastructure failure rather than a legitimate skip.
#[cfg(test)]
#[allow(dead_code)] // called from x86_64-only tests in vmm/mod.rs
pub(crate) fn require_vmlinux(kernel_path: &std::path::Path) -> std::path::PathBuf {
    crate::vmm::find_vmlinux(kernel_path).unwrap_or_else(|| {
        panic!(
            "ktstr_test: no vmlinux found alongside {}. The cache entry or \
             kernel build is incomplete. Rebuild with `cargo ktstr kernel \
             build --force`; the specified kernel must include `vmlinux` \
             alongside the boot image. {}",
            kernel_path.display(),
            crate::KTSTR_KERNEL_HINT,
        )
    })
}

/// Build a workspace package and return its binary path, or panic.
///
/// Wraps [`crate::build_and_find_binary`]. A failed build or missing
/// artifact for a required scheduler binary (e.g. `scx-ktstr`) is an
/// infrastructure failure -- the workspace is broken, not the test.
#[cfg(test)]
pub(crate) fn require_binary(package: &str) -> std::path::PathBuf {
    crate::build_and_find_binary(package).unwrap_or_else(|e| {
        panic!(
            "ktstr_test: build of `{package}` failed: {e:#}. \
             Run `cargo build -p {package}` to reproduce and diagnose."
        )
    })
}

/// Resolve [`crate::monitor::symbols::KernelSymbols`] from a vmlinux
/// or panic. The symbol table is required for any host-side memory
/// introspection; an unparseable vmlinux is an infrastructure failure.
#[cfg(test)]
#[allow(dead_code)] // called from x86_64-only tests in vmm/mod.rs
pub(crate) fn require_kernel_symbols(
    vmlinux_path: &std::path::Path,
) -> crate::monitor::symbols::KernelSymbols {
    crate::monitor::symbols::KernelSymbols::from_vmlinux(vmlinux_path).unwrap_or_else(|e| {
        panic!(
            "ktstr_test: kernel symbol resolution from {} failed: {e:#}",
            vmlinux_path.display(),
        )
    })
}

/// Resolve [`crate::monitor::btf_offsets::KernelOffsets`] from a vmlinux
/// or panic. BTF resolution is required for any host-side kernel
/// struct introspection; a vmlinux whose BTF fails to parse is an
/// infrastructure failure, not a test-skip condition.
#[cfg(test)]
pub(crate) fn require_kernel_offsets(
    vmlinux_path: &std::path::Path,
) -> crate::monitor::btf_offsets::KernelOffsets {
    crate::monitor::btf_offsets::KernelOffsets::from_vmlinux(vmlinux_path).unwrap_or_else(|e| {
        panic!(
            "ktstr_test: kernel BTF resolution from {} failed: {e:#}. \
             The kernel must be built with CONFIG_DEBUG_INFO_BTF=y; \
             rebuild with `cargo ktstr kernel build --force` if the \
             cache entry was produced without BTF.",
            vmlinux_path.display(),
        )
    })
}

/// Resolve [`crate::monitor::btf_offsets::BpfMapOffsets`] from a vmlinux
/// or panic. A vmlinux whose BTF fails to yield BPF map offsets is an
/// infrastructure failure, not a test-skip condition.
#[cfg(test)]
pub(crate) fn require_bpf_map_offsets(
    vmlinux_path: &std::path::Path,
) -> crate::monitor::btf_offsets::BpfMapOffsets {
    crate::monitor::btf_offsets::BpfMapOffsets::from_vmlinux(vmlinux_path).unwrap_or_else(|e| {
        panic!(
            "ktstr_test: BpfMapOffsets resolution from {} failed: {e:#}. \
             The kernel must be built with CONFIG_DEBUG_INFO_BTF=y; \
             rebuild with `cargo ktstr kernel build --force` if the \
             cache entry was produced without BTF.",
            vmlinux_path.display(),
        )
    })
}

/// Resolve [`crate::monitor::btf_offsets::BpfProgOffsets`] from a vmlinux
/// or panic. A vmlinux whose BTF fails to yield BPF program offsets is
/// an infrastructure failure, not a test-skip condition.
#[cfg(test)]
pub(crate) fn require_bpf_prog_offsets(
    vmlinux_path: &std::path::Path,
) -> crate::monitor::btf_offsets::BpfProgOffsets {
    crate::monitor::btf_offsets::BpfProgOffsets::from_vmlinux(vmlinux_path).unwrap_or_else(|e| {
        panic!(
            "ktstr_test: BpfProgOffsets resolution from {} failed: {e:#}. \
             The kernel must be built with CONFIG_DEBUG_INFO_BTF=y; \
             rebuild with `cargo ktstr kernel build --force` if the \
             cache entry was produced without BTF.",
            vmlinux_path.display(),
        )
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use linkme::distributed_slice;

    // Register a test entry in the distributed slice for unit testing find_test.
    fn __ktstr_inner_unit_test_dummy(_ctx: &Ctx) -> Result<AssertResult> {
        Ok(AssertResult::pass())
    }

    #[distributed_slice(KTSTR_TESTS)]
    static __KTSTR_ENTRY_UNIT_TEST_DUMMY: KtstrTestEntry = KtstrTestEntry {
        name: "__unit_test_dummy__",
        func: __ktstr_inner_unit_test_dummy,
        ..KtstrTestEntry::DEFAULT
    };

    #[test]
    fn find_test_registered_entry() {
        let entry = find_test("__unit_test_dummy__");
        assert!(entry.is_some(), "registered entry should be found");
        let entry = entry.unwrap();
        assert_eq!(entry.name, "__unit_test_dummy__");
        assert_eq!(entry.topology.llcs, 1);
        assert_eq!(entry.topology.cores_per_llc, 2);
    }

    #[test]
    fn find_test_nonexistent() {
        assert!(find_test("__nonexistent_test_xyz__").is_none());
    }

    #[test]
    fn find_test_from_distributed_slice() {
        // KTSTR_TESTS should contain at least the __unit_test_dummy__ entry.
        assert!(!KTSTR_TESTS.is_empty());
    }
}