1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/// Rust init (PID 1) for the VM guest.
///
/// When the test binary is
/// packed as `/init` in the initramfs, `ktstr_guest_init()` is called
/// from the ctor when PID 1 is detected.
/// It never returns — it mounts filesystems, then either dispatches
/// a test (start scheduler, run test, reboot) or drops into an
/// interactive shell (when `KTSTR_MODE=shell` is on the kernel
/// cmdline).
pub use fs;
pub use ;
pub use OpenOptionsExt;
pub use ;
pub use CommandExt;
pub use Path;
pub use ;
pub use Arc;
pub use OnceLock;
pub use ;
pub use crateLatch;
pub use ;
pub use ;
pub use openpty;
pub use ;
pub use ;
/// COM2 device path for sentinel and diagnostic output.
const COM2: &str = "/dev/ttyS1";
/// COM1 device path for kernel console / trace output.
const COM1: &str = "/dev/ttyS0";
/// Virtio-console device path. Used for shell I/O when available.
const HVC0: &str = "/dev/hvc0";
/// tracefs enable gate for the `sched_ext_dump` tracepoint. Writing
/// `"1"` activates the event, `"0"` deactivates it.
const TRACE_SCHED_EXT_DUMP_ENABLE: &str =
"/sys/kernel/tracing/events/sched_ext/sched_ext_dump/enable";
/// Global tracefs on/off switch. Writing `"0"` stops new events from
/// being recorded into the ring buffer (`ring_buffer_record_off`); the
/// userspace trace_pipe reader still has to drain whatever is already
/// buffered before reboot. Disabling the producer side first is what
/// makes the reader's drain window terminate — once no new events
/// arrive, poll eventually returns 0 and the drain_deadline elapses.
const TRACE_TRACING_ON: &str = "/sys/kernel/tracing/tracing_on";
/// tracefs streaming endpoint for the active trace. The trace_pipe
/// reader opens this once per boot and forwards every line to COM1.
const TRACE_PIPE: &str = "/sys/kernel/tracing/trace_pipe";
/// sysfs attribute exposing the active sched_ext root scheduler's
/// name. Empty / absent when no scheduler is registered; populated
/// (with a trailing newline) when registration has completed.
/// Kernel-side owner: `kernel/sched/ext.c` creates this via
/// `kobject_init_and_add` under the `sched_ext` kset after
/// `sch->ops.name` is set.
const SYSFS_SCHED_EXT_ROOT_OPS: &str = "/sys/kernel/sched_ext/root/ops";
/// Side channel for the scheduler PID published by [`start_scheduler`]
/// once `Command::spawn` returns. The guest test-dispatch path
/// (e.g. [`crate::test_support`] consumers that need the scheduler's
/// pid for cgroup attach / kill / probe) reads it via [`sched_pid`].
///
/// Replaces a previous `std::env::set_var("SCHED_PID", ...)` write.
/// Mutating glibc's global `__environ` array while another thread is
/// live (the Phase A probe thread spawned in `start_probe_phase_a`
/// runs concurrently with `start_scheduler`) is documented UB on
/// Linux — see
/// [`crate::test_support::propagate_rust_env_from_cmdline`] for the
/// mirroring rationale. An atomic side channel is the
/// data-race-free alternative.
///
/// Sentinel: `0` means "no scheduler started". `pid_t` is a signed
/// integer in glibc; the kernel never returns `0` from `fork(2)` to
/// the parent, so `0` is a safe "unset" marker for the producer to
/// initialise with and the consumer to filter on.
static SCHED_PID: AtomicI32 = new;
/// Maximum bytes per `MsgType::Stdout` / `MsgType::Stderr` TLV
/// chunk emitted by the pipe forwarder threads. 4 KiB matches a
/// page-size pipe read; well under the host-side per-frame cap
/// [`crate::vmm::bulk::MAX_BULK_FRAME_PAYLOAD`] so a chunk fits
/// comfortably in one frame even with the 16-byte header.
const STDIO_CHUNK_BYTES: usize = 4 * 1024;
/// Bound on [`reap_child_bounded`]: how long teardown waits for a
/// SIGKILL'd scheduler to exit before giving up and letting the VM reboot
/// reap it. A SIGKILL'd scheduler normally exits <<1s — post-crash bypass
/// keeps it CFS-schedulable, and it is NOT held in the kernel scx disable:
/// its `struct_ops` detach (`bpf_scx_unreg`) only `kthread_flush_work`s
/// the `scx_root_disable` the crash irq_work already kicked, which is
/// ms-scale (bypass + per-task reclass + one `synchronize_rcu` + the BPF
/// `ops.exit`, all fast for these schedulers). The bound is a defensive
/// cap — only a pathological multi-second `ops.exit` or RCU stall could
/// approach it — so teardown caps the wait rather than risk adding such a
/// stall to every crashed-scheduler teardown.
const SCHED_REAP_TIMEOUT: Duration = from_secs;
/// Grace given to a CRASHED scheduler to finish flushing its userspace
/// diagnostics to stderr and exit on its own BEFORE the hard SIGKILL, so
/// the kill doesn't truncate that output (`dump_sched_output` reads it).
/// Bounded so a userspace hang can't wedge teardown; returns early the
/// moment the scheduler exits. Only applied on a crash (dump_started).
/// Sized for the USERSPACE flush, not the kernel dump: the kernel's scx
/// exit dump is bounded and truncated in-kernel, but the scheduler's
/// userspace flush of it to stderr (plus libbpf teardown) can run past a
/// shorter window, and the SIGKILL then truncates the tail of THAT output.
const SCHED_KILL_GRACE: Duration = from_millis;
/// Bound on how long teardown waits for the exit dump's end-marker to be
/// forwarded to COM1 before disabling the `sched_ext_dump` tracepoint.
/// The kernel builds+emits the whole dump synchronously at crash time
/// (the `scx_disable_irq_workfn` irq path), so a SMALL dump's marker is
/// forwarded well before teardown and the wait returns at once. A LARGE
/// dump (many runnable tasks → scx_dump_state(dump_all_tasks) builds a
/// per-task dump) can take tens of seconds to forward byte-by-byte over
/// the slow PIO COM1 UART; this bound caps that so a big crash dump
/// cannot wedge teardown. On the bound the ftrace copy is truncated; the
/// authoritative full dump is the scheduler stderr log
/// (`dump_sched_output`) over the fast bulk port.
const SCX_DUMP_CAPTURE_TIMEOUT: Duration = from_secs;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;