1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
//! Observe that `cpu_budget` overcommit produces guest-visible steal time.
//!
//! A `no_perf_mode` VM with `cpu_budget` < vCPUs masks every vCPU thread to
//! a smaller host-CPU pool (`set_thread_cpumask`). With each vCPU kept
//! runnable by a SpinWait worker, the host scheduler's time-sharing of the
//! oversubscribed vCPUs onto the budget's CPUs surfaces as steal time in the
//! guest's `/proc/stat` — the direct, guest-visible symptom of the overcommit.
//!
//! The topology uses 8 vCPUs, which is <= the host CPU count on the CI
//! runners, so the accrued steal is attributable to the `cpu_budget` mask
//! (2 host CPUs, 4x overcommit) rather than host-CPU-count oversubscription.
//! This makes the assertion a transitive check that the mask is actually
//! enforced: if `cpu_budget` did NOT limit the vCPU threads to the budget's
//! CPUs, 8 vCPUs <= host CPUs would run without contention and accrue no
//! steal. (On a host with fewer than 8 CPUs the steal is also host-count
//! driven, so the assertion still holds but no longer isolates the mask.)
use anyhow::{Result, ensure};
use ktstr::assert::AssertResult;
use ktstr::ktstr_test;
use ktstr::prelude::WorkType;
use ktstr::scenario::Ctx;
use ktstr::scenario::ops::{HoldSpec, Step, execute_steps};
/// Steal time (USER_HZ ticks) from the aggregate `cpu` line of
/// `/proc/stat`. Field layout after the `cpu` label: user nice system idle
/// iowait irq softirq STEAL guest guest_nice — so steal is the 8th
/// whitespace token counting the label as token 0.
fn read_steal_ticks() -> Result<u64> {
let stat = std::fs::read_to_string("/proc/stat")?;
let cpu = stat
.lines()
.next()
.filter(|l| l.starts_with("cpu "))
.ok_or_else(|| anyhow::anyhow!("/proc/stat missing aggregate `cpu ` line"))?;
cpu.split_whitespace()
.nth(8)
.ok_or_else(|| anyhow::anyhow!("/proc/stat `cpu` line has no steal field: {cpu:?}"))?
.parse::<u64>()
.map_err(|e| anyhow::anyhow!("parse steal field from {cpu:?}: {e}"))
}
/// 8 vCPUs (2 LLCs x 4 cores x 1 thread) masked to `cpu_budget = 2` host
/// CPUs (4x overcommit). SpinWait on all 8 keeps them runnable; the steal
/// the guest accrues while the host time-shares them onto 2 CPUs is the
/// observable overcommit symptom — and, since 8 vCPUs <= host CPUs on CI,
/// evidence the `cpu_budget` mask is enforced.
#[ktstr_test(
llcs = 2,
cores = 4,
threads = 1,
no_perf_mode,
cpu_budget = 2,
duration_s = 5
)]
fn cpu_budget_overcommit_accrues_guest_steal(ctx: &Ctx) -> Result<AssertResult> {
let total = ctx.topo.total_cpus();
ensure!(total == 8, "test assumes 8 vCPUs (2 x 4 x 1); got {total}");
let before = read_steal_ticks()?;
let steps = vec![Step {
setup: vec![
ctx.cgroup_def("load")
.workers(8)
.work_type(WorkType::SpinWait),
]
.into(),
ops: vec![],
hold: HoldSpec::FULL,
}];
let result = execute_steps(ctx, steps)?;
let after = read_steal_ticks()?;
ensure!(
after > before,
"expected guest steal to accrue under 4x cpu_budget overcommit \
(8 vCPUs masked to 2 host CPUs) but it did not advance: \
before={before} after={after} ticks. Possible causes: (1) the \
cpu_budget mask is not limiting the vCPU threads to the budget's \
host CPUs; (2) guest steal-time accounting (CONFIG_PARAVIRT_TIME_ACCOUNTING, \
a required ktstr guest config) is unavailable; (3) on aarch64, the \
host did not wire KVM PV stolen-time (KVM_ARM_VCPU_PVTIME_IPA — see \
vmm::aarch64::kvm setup_pvtime; it is skipped when the host kernel \
lacks CONFIG_SCHED_INFO, in which case steal cannot advance)"
);
Ok(result)
}