ktstr 0.17.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
//! SCX walker capture builder for the failure-dump freeze path.
//!
//! Owns the per-CPU rq KVA/PA arrays the
//! [`crate::monitor::dump::ScxWalkerCapture`] borrows from. The
//! freeze coordinator calls [`build`] after the vCPU rendezvous;
//! when every prerequisite resolves, the returned
//! [`ScxWalkerOwned`] holds the data the coordinator stack-borrows
//! into the dump's [`crate::monitor::dump::ScxWalkerCapture`].
//!
//! The walker passes themselves (per-CPU rq->scx scalars +
//! runnable_list, per-CPU local DSQ, per-CPU bypass DSQ, per-NUMA-node
//! global DSQ, user dsq_hash, top-level scx_sched scalars) live in
//! [`crate::monitor::scx_walker`]. This builder only produces the owned
//! data the borrow-only capture needs; `dump_state` invokes the
//! walker functions on each capture and surfaces partial-degradation
//! (a single sub-group `None` blinds only that pass) via
//! `ScxWalkerOffsets::missing_groups()`.
//!
//! See [`crate::monitor::scx_walker`] for the kernel-source-grounded
//! walker semantics this capture feeds.

use crate::monitor::bpf_map::GuestMemMapAccessorOwned;
use crate::monitor::btf_offsets::ScxWalkerOffsets;
use crate::monitor::symbols::KernelSymbols;

/// Owned data the freeze coordinator stack-allocates to back a
/// [`crate::monitor::dump::ScxWalkerCapture`]. Holds every value the
/// borrow-only capture needs that does not already live on a
/// longer-lived owner (`GuestKernel`, `ScxWalkerOffsets`).
pub(crate) struct ScxWalkerOwned {
    /// Per-CPU rq kernel virtual addresses. Same shape the
    /// runnable_at scanner uses; index = CPU id. Empty when
    /// `per_cpu_offsets` was unavailable at build time (early-boot
    /// freeze before secondary CPUs populated `__per_cpu_offset[]`)
    /// — downstream rq->scx walk and per-CPU local DSQ pass yield
    /// no entries in that case, but `scx_tasks_kva` /
    /// `scx_root_kva` still drive their own walks.
    pub(crate) rq_kvas: Vec<u64>,
    /// Per-CPU rq guest physical addresses (parallel to `rq_kvas`).
    /// Empty in the same degraded-capture case described on
    /// [`Self::rq_kvas`].
    pub(crate) rq_pas: Vec<u64>,
    /// `scx_root` symbol KVA — the walker reads `*scx_root` to find
    /// the active `scx_sched`. Zero when the vmlinux had no
    /// `scx_root` symbol (kernel without sched_ext); the per-CPU
    /// rq->scx walks still run, but the DSQ-via-sched and scx_sched
    /// passes surface no state via the walker's own NULL checks.
    pub(crate) scx_root_kva: u64,
    /// `scx_tasks` symbol KVA — the kernel global LIST_HEAD that
    /// every scx-managed task is linked into via
    /// `task_struct.scx.tasks_node`. `0` when the vmlinux had no
    /// `scx_tasks` symbol (kernel without sched_ext or stripped
    /// vmlinux). The task-enrichment capture walks this list as
    /// the primary task source — it survives the per-rq
    /// runnable_list drain that scheduler teardown triggers
    /// (`scx_bypass`, kernel/sched/ext.c:5304-5404).
    pub(crate) scx_tasks_kva: u64,
    /// Runtime virtual KASLR slide. `scx_tasks_kva` is the LINK-TIME
    /// symbol value; the global list's `.next` pointers are runtime
    /// addresses (link + this slide), so the scx_tasks walk slides the
    /// head before its terminator comparison
    /// ([`crate::monitor::symbols::slid_kernel_kva`]). `0` when KASLR is
    /// off / not yet derived. Same slide the per-CPU `rq_kvas` already
    /// carry via [`crate::monitor::symbols::per_cpu_kva`].
    pub(crate) kaslr_offset: u64,
}

/// Build the SCX walker owned-data set when the hard prerequisites
/// resolve.
///
/// Returns `None` when any of the following is missing:
/// - `offsets`: no BTF sub-group resolved at all (the freeze coord's
///   `dump_scx_walker_offsets`); without it `dump_state` cannot
///   construct a [`crate::monitor::dump::ScxWalkerCapture`].
/// - `symbols`: no [`KernelSymbols`] (vmlinux ELF parse failed); the
///   `runqueues` per-CPU template KVA and `scx_root`/`scx_tasks`
///   symbol KVAs are unavailable.
///
/// `per_cpu_offsets` is NOT a hard prereq. When `None` (secondary
/// CPUs still booting — `__per_cpu_offset[]` not fully populated
/// yet, or accessor not constructed), the builder still returns a
/// degraded `ScxWalkerOwned` with empty `rq_kvas`/`rq_pas` but
/// populated `scx_root_kva` and `scx_tasks_kva`. This lets the
/// global `scx_tasks` walk still surface enrichments — the per-CPU
/// rq->scx walk and per-CPU local DSQ pass simply produce no
/// entries since the rq arrays are empty. Without this decoupling,
/// a freeze during early boot would lose ALL task-enrichment
/// signal because per_cpu_offsets gates the entire capture.
///
/// `scx_root` being absent on its own does NOT block the capture —
/// the per-CPU rq->scx walks still produce useful state on a kernel
/// without sched_ext. The DSQ-via-sched and scx_sched passes
/// degrade gracefully via the walker's own NULL checks (a `0`
/// `scx_root_kva` translates to an out-of-bounds PA that
/// `GuestMem::read_u64` returns zero for, then
/// `read_scx_sched_state` returns `None` on `sched_kva == 0`).
///
/// Per-sub-group `None` Options inside [`ScxWalkerOffsets`] do NOT
/// block the capture either: every walker pass independently gates
/// on the sub-groups it needs and skips when missing. A 6.12 kernel
/// that lacks `scx_sched_pcpu` (the per-CPU bypass DSQ struct didn't
/// land until v6.18) still produces a useful capture — only the
/// bypass-DSQ pass is blinded; the per-CPU local DSQ pass and the
/// rq->scx scalar capture run normally.
///
/// A `None` return propagates to
/// [`crate::monitor::dump::DumpContext::scx_walker_capture`]
/// being `None`, which leaves `rq_scx_states` / `dsq_states` /
/// `scx_sched_state` empty in the report and stamps
/// `scx_walker_unavailable` with
/// [`crate::monitor::dump::REASON_NO_SCX_WALKER`].
///
/// `owned_accessor` carries the [`crate::monitor::guest::GuestKernel`]
/// the walker reads through (used here only for `page_offset`);
/// `offsets` carries the BTF sub-group offsets the walker needs;
/// `symbols` carries the `runqueues` per-CPU template KVA and `scx_root`
/// symbol KVA; `per_cpu_offsets` is the `__per_cpu_offset[]` array
/// the walker uses to address each CPU's `scx_sched_pcpu.bypass_dsq`.
#[allow(dead_code)]
pub(crate) fn build(
    owned_accessor: &GuestMemMapAccessorOwned,
    offsets: Option<&ScxWalkerOffsets>,
    symbols: Option<&KernelSymbols>,
    per_cpu_offsets: Option<&[u64]>,
    kaslr_offset: u64,
) -> Option<ScxWalkerOwned> {
    // Hard prereqs. Each `?` short-circuits to a `None` capture which
    // surfaces as `REASON_NO_SCX_WALKER` in the dump.
    let _offs = offsets?;
    let syms = symbols?;

    let page_offset = owned_accessor.guest_kernel().page_offset();
    // scx_root and scx_tasks are read from symbol metadata and do
    // NOT depend on per_cpu_offsets — the global scx_tasks walk
    // and the *scx_root sched-state read can both run when
    // per_cpu_offsets hasn't cached yet (secondary CPUs still
    // booting). Defaulting absent symbols to 0 is the same
    // graceful-degradation pattern the walker functions expect.
    let scx_root_kva = syms.scx_root.unwrap_or(0);
    let scx_tasks_kva = syms.scx_tasks.unwrap_or(0);

    let pco = match per_cpu_offsets {
        Some(pco) => pco,
        None => {
            // Degraded path: no per-CPU offsets yet. Produce an
            // owned set with empty rq arrays but populated symbol
            // KVAs so the global-task walk and sched_state read
            // still surface signal. The rq->scx walk and per-CPU
            // local DSQ pass yield no entries (their iteration
            // ranges are empty), but `walk_scx_tasks_global` and
            // `read_scx_sched_state` are independent of per-CPU
            // addressing.
            tracing::debug!(
                "capture_scx::build: per_cpu_offsets absent — degraded \
                 capture with no rq arrays; global scx_tasks walk and \
                 *scx_root read still active",
            );
            return Some(ScxWalkerOwned {
                rq_kvas: Vec::new(),
                rq_pas: Vec::new(),
                scx_root_kva,
                scx_tasks_kva,
                kaslr_offset,
            });
        }
    };

    Some(compute_owned(
        page_offset,
        syms.runqueues,
        scx_root_kva,
        scx_tasks_kva,
        pco,
        kaslr_offset,
    ))
}

/// Pure builder for the owned-data set.
///
/// Splits the `&GuestMemMapAccessorOwned`-touching outer surface from
/// the deterministic per-CPU address math so the helper is testable
/// without a real owned accessor (the type has no `new_for_test`
/// constructor).
///
/// `runqueues_kva` is the link-time KVA of the per-CPU `runqueues`
/// template — readelf on a real vmlinux shows
/// `runqueues.st_value = 0xffffffff83bcb280` against `.data..percpu`
/// `sh_addr = 0xffffffff83ba0000` (NON-zero), so `st_value` IS a
/// KVA. The per-CPU rq KVA for CPU `n` is
/// `runqueues_kva + per_cpu_offsets[n]` (plus `virt_kaslr_offset`
/// under KASLR); the PA is the same KVA minus
/// `page_offset` (direct mapping).
fn compute_owned(
    page_offset: u64,
    runqueues_kva: u64,
    scx_root_kva: u64,
    scx_tasks_kva: u64,
    per_cpu_offsets: &[u64],
    kaslr_offset: u64,
) -> ScxWalkerOwned {
    // Build rq_pas and rq_kvas in a single pass over per_cpu_offsets
    // rather than calling compute_rq_pas (one walk) then mapping
    // again to recover the kvas (second walk). Saves one Vec
    // allocation and one full iteration on every freeze — matters
    // when nr_cpus is large because this runs while every vCPU is
    // paused.
    //
    // Production callers in `crate::vmm::freeze_coord` thread the
    // virt-KASLR slide through here via the shared
    // `kern_virt_kaslr` Arc (populated by either the BSP MSR_LSTAR
    // derive on x86_64 or the guest-channel KERN_ADDRS `_text`
    // subtraction on both arches). 0 fallback collapses to the
    // no-slide formula and matches KASLR-off / nokaslr-karg
    // semantics. See [`crate::monitor::symbols::per_cpu_kva`] for
    // the canonical formula every per-CPU walker now shares.
    let n = per_cpu_offsets.len();
    let mut rq_pas: Vec<u64> = Vec::with_capacity(n);
    let mut rq_kvas: Vec<u64> = Vec::with_capacity(n);
    for &offset in per_cpu_offsets {
        let kva = crate::monitor::symbols::per_cpu_kva(runqueues_kva, kaslr_offset, offset);
        let pa = crate::monitor::symbols::kva_to_pa(kva, page_offset);
        rq_pas.push(pa);
        rq_kvas.push(pa.wrapping_add(page_offset));
    }
    ScxWalkerOwned {
        rq_kvas,
        rq_pas,
        scx_root_kva,
        scx_tasks_kva,
        kaslr_offset,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::monitor::guest::GuestKernel;
    use crate::monitor::reader::GuestMem;
    use crate::monitor::symbols::DEFAULT_PAGE_OFFSET;
    use std::collections::HashMap;
    use std::sync::Arc;

    /// All sub-groups `None`. `build`'s degraded path only needs
    /// `offsets` to be `Some(&_)` (it binds `let _offs = offsets?`)
    /// — the sub-group contents are never read on the
    /// `per_cpu_offsets == None` arm, so an all-`None` value drives
    /// the early-return without resolving real BTF.
    fn empty_scx_walker_offsets() -> ScxWalkerOffsets {
        ScxWalkerOffsets {
            rq: None,
            scx_rq: None,
            task: None,
            see: None,
            dsq_lnode: None,
            dsq: None,
            sched: None,
            sched_pnode: None,
            sched_pcpu: None,
            rht: None,
        }
    }

    /// `KernelSymbols` with `scx_root` / `scx_tasks` set to the given
    /// values and every other field absent. `build` reads only
    /// `scx_root` and `scx_tasks` from the symbol set (plus
    /// `runqueues` on the non-degraded path), so this is sufficient
    /// to drive the degraded `build()` path.
    fn symbols_with_scx(scx_root: Option<u64>, scx_tasks: Option<u64>) -> KernelSymbols {
        KernelSymbols {
            runqueues: 0,
            per_cpu_offset: 0,
            page_offset_base_kva: None,
            phys_base_kva: None,
            scx_root,
            scx_tasks,
            init_top_pgt: None,
            pgtable_l5_enabled: None,
            prog_idr: None,
            scx_watchdog_timeout: None,
            scx_watchdog_timestamp: None,
            scx_watchdog_interval: None,
            jiffies_64: None,
            kernel_cpustat: None,
            kstat: None,
            tick_cpu_sched: None,
            node_data: None,
            entry_syscall_64_kva: None,
            kernel_text_kva: None,
        }
    }

    /// Build an owned accessor whose `GuestKernel` reports the
    /// default test `PAGE_OFFSET`. `build` reads only
    /// `guest_kernel().page_offset()` from the accessor (and never
    /// on the degraded arm), so an empty synthetic memory + symbol
    /// set suffices.
    fn test_accessor() -> GuestMemMapAccessorOwned {
        // SAFETY: `Box::leak` gives a `'static` backing buffer, so the
        // pointer GuestMem holds stays valid for the returned
        // accessor's whole lifetime. The degraded `build()` arm never
        // dereferences guest memory through this accessor (it reads
        // only the stored `page_offset`), so the bytes are never read.
        let buf = Box::leak(Box::new([0u8; 64]));
        let mem = unsafe { GuestMem::new(buf.as_mut_ptr(), buf.len() as u64) };
        let kernel =
            GuestKernel::new_for_test(Arc::new(mem), HashMap::new(), DEFAULT_PAGE_OFFSET, 0, false);
        GuestMemMapAccessorOwned::new_for_test(kernel)
    }

    /// Happy path: every prereq resolves. The pure builder produces
    /// per-CPU rq KVA/PA pairs that match
    /// `runqueues_kva + per_cpu_offset[cpu]` and pass `scx_root_kva`
    /// through unchanged. Mirrors the runnable scanner's address
    /// derivation in `crate::vmm::freeze_coord` so both code paths agree on
    /// the per-CPU rq base.
    #[test]
    fn compute_owned_happy_path() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        let runqueues_kva: u64 = 0x20_0000;
        let per_cpu = [0x10_0000u64, 0x14_0000u64, 0x18_0000u64];
        let scx_root_kva = 0xffff_ffff_8230_0000;
        let scx_tasks_kva = 0xffff_ffff_8240_0000;
        let owned = compute_owned(
            page_offset,
            runqueues_kva,
            scx_root_kva,
            scx_tasks_kva,
            &per_cpu,
            0,
        );

        assert_eq!(owned.scx_root_kva, scx_root_kva);
        assert_eq!(owned.scx_tasks_kva, scx_tasks_kva);
        assert_eq!(owned.rq_kvas.len(), 3);
        assert_eq!(owned.rq_pas.len(), 3);
        // Cross-check against the same `compute_rq_pas` the runnable
        // scanner uses — any drift between the two would surface
        // here as different per-CPU PAs.
        let expected_pas =
            crate::monitor::symbols::compute_rq_pas(runqueues_kva, &per_cpu, page_offset, 0);
        assert_eq!(owned.rq_pas, expected_pas);
        // Every rq_kva is the recovered KVA for the same CPU's PA.
        for (cpu, expected_pa) in expected_pas.iter().enumerate() {
            assert_eq!(owned.rq_kvas[cpu], expected_pa.wrapping_add(page_offset),);
        }
    }

    /// scx_root absent (kernel without sched_ext): builder still
    /// produces the per-CPU rq arrays. The walker's own NULL checks
    /// on the zero `scx_root_kva` degrade the DSQ / scx_sched passes
    /// to "no state" without invalidating the rq->scx pass.
    #[test]
    fn compute_owned_partial_scx_root_zero() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        let runqueues_kva: u64 = 0x20_0000;
        let per_cpu = [0x10_0000u64, 0x14_0000u64];
        let owned = compute_owned(page_offset, runqueues_kva, 0, 0, &per_cpu, 0);

        assert_eq!(owned.scx_root_kva, 0);
        assert_eq!(owned.scx_tasks_kva, 0);
        assert_eq!(owned.rq_kvas.len(), 2);
        assert_eq!(owned.rq_pas.len(), 2);
        let expected_pas =
            crate::monitor::symbols::compute_rq_pas(runqueues_kva, &per_cpu, page_offset, 0);
        assert_eq!(owned.rq_pas, expected_pas);
    }

    /// scx_tasks absent (stripped vmlinux) but scx_root present:
    /// the global-list walk degrades to "no tasks" without affecting
    /// the rq->scx walk or the DSQ/scx_sched passes that key off
    /// scx_root.
    #[test]
    fn compute_owned_partial_scx_tasks_zero() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        let runqueues_kva: u64 = 0x20_0000;
        let per_cpu = [0x10_0000u64];
        let scx_root_kva = 0xffff_ffff_8230_0000;
        let owned = compute_owned(page_offset, runqueues_kva, scx_root_kva, 0, &per_cpu, 0);

        assert_eq!(owned.scx_root_kva, scx_root_kva);
        assert_eq!(owned.scx_tasks_kva, 0);
        assert_eq!(owned.rq_kvas.len(), 1);
        assert_eq!(owned.rq_pas.len(), 1);
    }

    /// Degraded build (per_cpu_offsets absent, secondary CPUs still
    /// booting): drives the real [`build`] with `per_cpu_offsets ==
    /// None` and asserts it returns `Some` with empty rq arrays but
    /// the passed-through `scx_root_kva` / `scx_tasks_kva`. This is
    /// the early-return arm in `build()` (the `per_cpu_offsets`
    /// match's `None` branch) — without driving `build` directly a
    /// struct literal would only re-assert its own initializers. The
    /// global-task walk and *scx_root sched-state read still surface
    /// signal even when per-CPU addressing isn't ready; the
    /// downstream scx_walker functions iterate over
    /// `rq_kvas`/`rq_pas` (yielding nothing when empty) but consume
    /// `scx_tasks_kva` and `scx_root_kva` independently.
    #[test]
    fn degraded_build_shape_empty_rq_with_symbol_kvas() {
        let scx_root_kva = 0xffff_ffff_8230_0000;
        let scx_tasks_kva = 0xffff_ffff_8240_0000;
        let accessor = test_accessor();
        let offsets = empty_scx_walker_offsets();
        let symbols = symbols_with_scx(Some(scx_root_kva), Some(scx_tasks_kva));

        // per_cpu_offsets = None drives the degraded early-return.
        let owned = build(&accessor, Some(&offsets), Some(&symbols), None, 0)
            .expect("offsets + symbols present → degraded build returns Some");

        // The early-return arm sets empty rq arrays but passes the
        // symbol KVAs through.
        assert!(owned.rq_kvas.is_empty());
        assert!(owned.rq_pas.is_empty());
        assert_eq!(owned.scx_root_kva, scx_root_kva);
        assert_eq!(owned.scx_tasks_kva, scx_tasks_kva);
        assert_eq!(owned.kaslr_offset, 0);
    }

    /// `build`'s hard prereqs: `offsets` and `symbols` each gate the
    /// capture. With `per_cpu_offsets == None` but a hard prereq
    /// absent, `build` returns `None` (the `?` short-circuits) rather
    /// than a degraded `Some`. Pins that the degraded arm is reached
    /// ONLY after both hard prereqs resolve.
    #[test]
    fn build_returns_none_when_hard_prereq_absent() {
        let accessor = test_accessor();
        let offsets = empty_scx_walker_offsets();
        let symbols = symbols_with_scx(Some(0xffff_ffff_8230_0000), Some(0xffff_ffff_8240_0000));

        // offsets absent → None even though symbols + the degraded
        // per_cpu_offsets path would otherwise apply.
        assert!(build(&accessor, None, Some(&symbols), None, 0).is_none());
        // symbols absent → None.
        assert!(build(&accessor, Some(&offsets), None, None, 0).is_none());
    }

    /// Degraded build with a kernel that has `scx_tasks` but no
    /// `scx_root` (no sched_ext-root symbol). Drives the real
    /// [`build`] degraded path and asserts the absent `scx_root`
    /// defaults to `0` while `scx_tasks_kva` passes through —
    /// `scx_tasks_kva` is independent of both `scx_root` and the
    /// (empty) rq arrays, so the global `walk_scx_tasks_global`
    /// still has its anchor. A refactor that tied `scx_tasks_kva`
    /// to `rq_kvas` length or to `scx_root` presence would surface
    /// here as a zeroed / dropped `scx_tasks_kva`.
    #[test]
    fn degraded_build_scx_tasks_kva_independent_of_rq_arrays() {
        let scx_tasks_kva = 0xffff_ffff_82e5_e840;
        let accessor = test_accessor();
        let offsets = empty_scx_walker_offsets();
        // scx_root absent, scx_tasks present.
        let symbols = symbols_with_scx(None, Some(scx_tasks_kva));

        let owned = build(&accessor, Some(&offsets), Some(&symbols), None, 0)
            .expect("offsets + symbols present → degraded build returns Some");

        assert!(owned.rq_kvas.is_empty());
        assert!(owned.rq_pas.is_empty());
        // scx_tasks_kva survives even though rq arrays are empty and
        // scx_root is absent.
        assert_eq!(owned.scx_tasks_kva, scx_tasks_kva);
        // Absent scx_root symbol defaults to 0 (build()'s
        // `unwrap_or(0)`), the second independent-degradation axis.
        assert_eq!(owned.scx_root_kva, 0);
    }

    /// Empty per_cpu_offsets: builder produces an empty owned set.
    /// Mirrors a freeze before any vCPU has come up — the freeze
    /// coordinator's `per_cpu_offsets.contains(&0)` retry guard
    /// normally rejects this state, but the math itself must stay
    /// well-defined so the pre-retry path doesn't UB.
    #[test]
    fn compute_owned_empty_per_cpu_offsets() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        let runqueues_kva: u64 = 0x20_0000;
        let scx_root_kva = 0xffff_ffff_8230_0000;
        let scx_tasks_kva = 0xffff_ffff_8240_0000;
        let owned = compute_owned(
            page_offset,
            runqueues_kva,
            scx_root_kva,
            scx_tasks_kva,
            &[],
            0,
        );

        assert!(owned.rq_kvas.is_empty());
        assert!(owned.rq_pas.is_empty());
        assert_eq!(owned.scx_root_kva, scx_root_kva);
        assert_eq!(owned.scx_tasks_kva, scx_tasks_kva);
    }

    /// rq_pa wraps when the runqueues section offset + per_cpu offset
    /// straddles `page_offset`. Every step of the math uses
    /// `wrapping_*` so the result is well-defined; this test pins the
    /// behavior so a future refactor can't silently introduce a
    /// `checked_sub` that panics on the boundary case.
    #[test]
    fn compute_owned_wrapping_arithmetic() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        // Pick a per_cpu offset that, combined with the section-
        // relative runqueues_kva, lands at exactly page_offset — the
        // resulting rq_pa is 0 and the recovered rq_kva is page_offset.
        let runqueues_kva: u64 = 0x1000;
        let per_cpu = [page_offset.wrapping_sub(runqueues_kva)];
        let owned = compute_owned(page_offset, runqueues_kva, 0, 0, &per_cpu, 0);

        assert_eq!(owned.rq_pas, vec![0u64]);
        assert_eq!(owned.rq_kvas, vec![page_offset]);
    }

    /// rq_kvas and rq_pas remain index-aligned: for every CPU `i`,
    /// `rq_kvas[i] == rq_pas[i] + page_offset`. This invariant is what
    /// lets the runnable_list walker use `rq_kva` as the loop
    /// terminator and `rq_pa` as the read-base on the same CPU.
    #[test]
    fn compute_owned_kva_pa_pairwise_consistent() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        let runqueues_kva: u64 = 0x4_0000;
        let per_cpu = [
            0x10_0000u64,
            0x14_0000u64,
            0x18_0000u64,
            0x1c_0000u64,
            0x20_0000u64,
        ];
        let owned = compute_owned(
            page_offset,
            runqueues_kva,
            0xffff_ffff_8000_0000,
            0xffff_ffff_8001_0000,
            &per_cpu,
            0,
        );

        assert_eq!(owned.rq_kvas.len(), per_cpu.len());
        assert_eq!(owned.rq_pas.len(), per_cpu.len());
        for cpu in 0..per_cpu.len() {
            assert_eq!(
                owned.rq_kvas[cpu],
                owned.rq_pas[cpu].wrapping_add(page_offset),
                "kva/pa pair mismatch on cpu {cpu}",
            );
        }
    }

    /// Cross-check that `compute_owned` and `compute_rq_pas` agree
    /// under a non-zero `kaslr_offset`. Before the KASLR slide was
    /// threaded into both helpers, they silently omitted it and
    /// produced identical (wrong) PAs — the prior cross-checks at
    /// L257 / L281 pass `kaslr_offset = 0` to both sides, so they
    /// would not detect an asymmetry where one side picks up the new
    /// arg and the other doesn't. The shared helper + this test
    /// together pin the contract: a future change that drops the
    /// slide on one path but not the other surfaces here as
    /// divergent PAs.
    #[test]
    fn compute_owned_matches_compute_rq_pas_under_kaslr() {
        let page_offset = DEFAULT_PAGE_OFFSET;
        let runqueues_kva: u64 = 0x20_0000;
        let per_cpu = [0x10_0000u64, 0x14_0000u64, 0x18_0000u64];
        let kaslr = 0x1_0000_0000u64; // 4 GiB virt slide
        let owned = compute_owned(
            page_offset,
            runqueues_kva,
            0xffff_ffff_8230_0000,
            0xffff_ffff_8240_0000,
            &per_cpu,
            kaslr,
        );
        let expected_pas =
            crate::monitor::symbols::compute_rq_pas(runqueues_kva, &per_cpu, page_offset, kaslr);
        assert_eq!(
            owned.rq_pas, expected_pas,
            "compute_owned and compute_rq_pas must agree on non-zero \
             kaslr — asymmetric kaslr threading would surface here",
        );
    }
}