pub mod bpf_map;
pub mod bpf_prog;
pub mod btf_offsets;
pub mod guest;
pub mod idr;
pub mod reader;
pub mod symbols;
pub const DSQ_PLAUSIBILITY_CEILING: u32 = 10_000;
const PREEMPTION_TICK_MULTIPLE: u64 = 10;
const DEFAULT_HZ: u64 = 250;
pub(crate) fn vcpu_preemption_threshold_ns(kernel_path: Option<&std::path::Path>) -> u64 {
let hz = guest_kernel_hz(kernel_path);
let tick_ns = 1_000_000_000u64 / hz;
tick_ns * PREEMPTION_TICK_MULTIPLE
}
pub(crate) fn guest_kernel_hz(kernel_path: Option<&std::path::Path>) -> u64 {
if let Some(kp) = kernel_path {
if let Some(vmlinux) = find_vmlinux(kp)
&& let Some(hz) = read_hz_from_ikconfig(&vmlinux)
{
return hz;
}
if let Some(hz) = read_hz_from_kernel_dir(kp) {
return hz;
}
}
if let Some(hz) = read_hz_from_boot_config() {
return hz;
}
DEFAULT_HZ
}
use crate::vmm::find_vmlinux;
const IKCONFIG_MAGIC: &[u8] = b"IKCFG_ST";
fn read_hz_from_ikconfig(vmlinux_path: &std::path::Path) -> Option<u64> {
let data = std::fs::read(vmlinux_path).ok()?;
let pos = data
.windows(IKCONFIG_MAGIC.len())
.position(|w| w == IKCONFIG_MAGIC)?;
let gz_start = pos + IKCONFIG_MAGIC.len();
if gz_start >= data.len() {
return None;
}
let cursor = std::io::Cursor::new(&data[gz_start..]);
let mut decoder = flate2::read::GzDecoder::new(cursor);
let mut config = String::new();
std::io::Read::read_to_string(&mut decoder, &mut config).ok()?;
parse_config_hz(&config)
}
fn read_hz_from_kernel_dir(kernel_path: &std::path::Path) -> Option<u64> {
let mut dir = kernel_path.parent()?;
for _ in 0..4 {
let config = dir.join(".config");
if config.exists() {
let contents = std::fs::read_to_string(&config).ok()?;
return parse_config_hz(&contents);
}
dir = dir.parent()?;
}
None
}
fn read_hz_from_boot_config() -> Option<u64> {
let mut uname: libc::utsname = unsafe { std::mem::zeroed() };
if unsafe { libc::uname(&mut uname) } != 0 {
return None;
}
let release = unsafe { std::ffi::CStr::from_ptr(uname.release.as_ptr()) }
.to_str()
.ok()?;
let path = format!("/boot/config-{release}");
let contents = std::fs::read_to_string(path).ok()?;
parse_config_hz(&contents)
}
fn parse_config_hz(config: &str) -> Option<u64> {
for line in config.lines() {
let line = line.trim();
if let Some(val) = line.strip_prefix("CONFIG_HZ=") {
return val.parse().ok();
}
}
None
}
pub fn sample_looks_valid(sample: &MonitorSample) -> bool {
sample
.cpus
.iter()
.all(|cpu| cpu.local_dsq_depth <= DSQ_PLAUSIBILITY_CEILING)
}
#[cfg(test)]
pub fn find_test_vmlinux() -> Option<std::path::PathBuf> {
let kernel_dir = std::env::var("KTSTR_KERNEL").ok();
crate::kernel_path::resolve_btf(kernel_dir.as_deref())
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct MonitorReport {
pub samples: Vec<MonitorSample>,
pub summary: MonitorSummary,
#[serde(default)]
pub preemption_threshold_ns: u64,
}
#[derive(Debug, Clone, Default)]
struct SustainedViolationTracker {
consecutive: usize,
worst_run: usize,
worst_value: f64,
worst_at: usize,
}
impl SustainedViolationTracker {
fn record(&mut self, violated: bool, value: f64, at: usize) {
if violated {
self.consecutive += 1;
if self.consecutive > self.worst_run {
self.worst_run = self.consecutive;
self.worst_value = value;
self.worst_at = at;
}
} else {
self.consecutive = 0;
}
}
fn sustained(&self, threshold: usize) -> bool {
self.worst_run >= threshold
}
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct MonitorSample {
pub elapsed_ms: u64,
pub cpus: Vec<CpuSnapshot>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prog_stats: Option<Vec<bpf_prog::ProgRuntimeStats>>,
}
impl MonitorSample {
pub fn new(elapsed_ms: u64, cpus: Vec<CpuSnapshot>) -> Self {
Self {
elapsed_ms,
cpus,
prog_stats: None,
}
}
pub fn imbalance_ratio(&self) -> f64 {
if self.cpus.is_empty() {
return 1.0;
}
let mut min_nr = u32::MAX;
let mut max_nr = 0u32;
for cpu in &self.cpus {
min_nr = min_nr.min(cpu.nr_running);
max_nr = max_nr.max(cpu.nr_running);
}
max_nr as f64 / min_nr.max(1) as f64
}
pub fn sum_event_field(&self, f: fn(&ScxEventCounters) -> i64) -> Option<i64> {
let mut total = 0i64;
let mut any = false;
for cpu in &self.cpus {
if let Some(ev) = &cpu.event_counters {
total += f(ev);
any = true;
}
}
any.then_some(total)
}
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct CpuSnapshot {
pub nr_running: u32,
pub scx_nr_running: u32,
pub local_dsq_depth: u32,
pub rq_clock: u64,
pub scx_flags: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub event_counters: Option<ScxEventCounters>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub schedstat: Option<RqSchedstat>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub vcpu_cpu_time_ns: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sched_domains: Option<Vec<SchedDomainSnapshot>>,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct RqSchedstat {
pub run_delay: u64,
pub pcount: u64,
pub yld_count: u32,
pub sched_count: u32,
pub sched_goidle: u32,
pub ttwu_count: u32,
pub ttwu_local: u32,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct SchedDomainSnapshot {
pub level: i32,
pub name: String,
pub flags: i32,
pub span_weight: u32,
pub balance_interval: u32,
pub nr_balance_failed: u32,
pub newidle_call: u32,
pub newidle_success: u32,
pub newidle_ratio: u32,
pub max_newidle_lb_cost: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stats: Option<SchedDomainStats>,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct SchedDomainStats {
pub lb_count: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_failed: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_balanced: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_imbalance_load: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_imbalance_util: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_imbalance_task: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_imbalance_misfit: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_gained: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_hot_gained: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_nobusyg: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub lb_nobusyq: [u32; btf_offsets::CPU_MAX_IDLE_TYPES],
pub alb_count: u32,
pub alb_failed: u32,
pub alb_pushed: u32,
pub sbe_count: u32,
pub sbe_balanced: u32,
pub sbe_pushed: u32,
pub sbf_count: u32,
pub sbf_balanced: u32,
pub sbf_pushed: u32,
pub ttwu_wake_remote: u32,
pub ttwu_move_affine: u32,
pub ttwu_move_balance: u32,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct ScxEventCounters {
pub select_cpu_fallback: i64,
pub dispatch_local_dsq_offline: i64,
pub dispatch_keep_last: i64,
pub enq_skip_exiting: i64,
pub enq_skip_migration_disabled: i64,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct MonitorSummary {
pub total_samples: usize,
pub max_imbalance_ratio: f64,
pub max_local_dsq_depth: u32,
pub stall_detected: bool,
#[serde(default)]
pub avg_imbalance_ratio: f64,
#[serde(default)]
pub avg_nr_running: f64,
#[serde(default)]
pub avg_local_dsq_depth: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub event_deltas: Option<ScxEventDeltas>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub schedstat_deltas: Option<SchedstatDeltas>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prog_stats_deltas: Option<Vec<ProgStatsDelta>>,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct ProgStatsDelta {
pub name: String,
pub cnt: u64,
pub nsecs: u64,
pub nsecs_per_call: f64,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct SchedstatDeltas {
pub total_run_delay: u64,
pub run_delay_rate: f64,
pub total_pcount: u64,
pub total_sched_count: u64,
pub sched_count_rate: f64,
pub total_yld_count: u64,
pub total_sched_goidle: u64,
pub total_ttwu_count: u64,
pub total_ttwu_local: u64,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct ScxEventDeltas {
pub total_fallback: i64,
pub fallback_rate: f64,
pub max_fallback_burst: i64,
pub total_dispatch_offline: i64,
pub total_dispatch_keep_last: i64,
pub keep_last_rate: f64,
pub total_enq_skip_exiting: i64,
pub total_enq_skip_migration_disabled: i64,
}
impl MonitorSummary {
pub fn from_samples(samples: &[MonitorSample]) -> Self {
Self::from_samples_with_threshold(samples, 0)
}
pub fn from_samples_with_threshold(
samples: &[MonitorSample],
preemption_threshold_ns: u64,
) -> Self {
if samples.is_empty() {
return Self::default();
}
let mut max_imbalance_ratio: f64 = 1.0;
let mut max_local_dsq_depth: u32 = 0;
let mut sum_imbalance_ratio: f64 = 0.0;
let mut sum_nr_running: f64 = 0.0;
let mut sum_local_dsq_depth: f64 = 0.0;
let mut valid_sample_count: usize = 0;
let mut total_cpu_readings: usize = 0;
for sample in samples {
if sample.cpus.is_empty() || !sample_looks_valid(sample) {
continue;
}
valid_sample_count += 1;
for cpu in &sample.cpus {
max_local_dsq_depth = max_local_dsq_depth.max(cpu.local_dsq_depth);
sum_nr_running += cpu.nr_running as f64;
sum_local_dsq_depth += cpu.local_dsq_depth as f64;
total_cpu_readings += 1;
}
let ratio = sample.imbalance_ratio();
sum_imbalance_ratio += ratio;
if ratio > max_imbalance_ratio {
max_imbalance_ratio = ratio;
}
}
let avg_imbalance_ratio = if valid_sample_count > 0 {
sum_imbalance_ratio / valid_sample_count as f64
} else {
0.0
};
let avg_nr_running = if total_cpu_readings > 0 {
sum_nr_running / total_cpu_readings as f64
} else {
0.0
};
let avg_local_dsq_depth = if total_cpu_readings > 0 {
sum_local_dsq_depth / total_cpu_readings as f64
} else {
0.0
};
let threshold = if preemption_threshold_ns > 0 {
preemption_threshold_ns
} else {
vcpu_preemption_threshold_ns(None)
};
let mut stall_detected = false;
let valid_samples: Vec<&MonitorSample> = samples
.iter()
.filter(|s| !s.cpus.is_empty() && sample_looks_valid(s))
.collect();
for w in valid_samples.windows(2) {
let prev = w[0];
let curr = w[1];
let cpu_count = prev.cpus.len().min(curr.cpus.len());
for cpu in 0..cpu_count {
let idle = curr.cpus[cpu].nr_running == 0 && prev.cpus[cpu].nr_running == 0;
let cpu_time_advanced = match (
curr.cpus[cpu].vcpu_cpu_time_ns,
prev.cpus[cpu].vcpu_cpu_time_ns,
) {
(Some(curr_t), Some(prev_t)) => curr_t.saturating_sub(prev_t) >= threshold,
_ => true,
};
if curr.cpus[cpu].rq_clock != 0
&& curr.cpus[cpu].rq_clock == prev.cpus[cpu].rq_clock
&& !idle
&& cpu_time_advanced
{
stall_detected = true;
break;
}
}
if stall_detected {
break;
}
}
let event_deltas = Self::compute_event_deltas(samples);
let schedstat_deltas = Self::compute_schedstat_deltas(samples);
let prog_stats_deltas = Self::compute_prog_stats_deltas(samples);
Self {
total_samples: samples.len(),
max_imbalance_ratio,
max_local_dsq_depth,
stall_detected,
avg_imbalance_ratio,
avg_nr_running,
avg_local_dsq_depth,
event_deltas,
schedstat_deltas,
prog_stats_deltas,
}
}
fn compute_event_deltas(samples: &[MonitorSample]) -> Option<ScxEventDeltas> {
let has_events = |s: &MonitorSample| s.cpus.iter().any(|c| c.event_counters.is_some());
let first = samples.iter().find(|s| has_events(s))?;
let last = samples.iter().rev().find(|s| has_events(s))?;
let total_fallback = last.sum_event_field(|e| e.select_cpu_fallback).unwrap_or(0)
- first
.sum_event_field(|e| e.select_cpu_fallback)
.unwrap_or(0);
let total_keep_last = last.sum_event_field(|e| e.dispatch_keep_last).unwrap_or(0)
- first.sum_event_field(|e| e.dispatch_keep_last).unwrap_or(0);
let duration_ms = last.elapsed_ms.saturating_sub(first.elapsed_ms);
let duration_secs = duration_ms as f64 / 1000.0;
let fallback_rate = if duration_secs > 0.0 {
total_fallback as f64 / duration_secs
} else {
0.0
};
let keep_last_rate = if duration_secs > 0.0 {
total_keep_last as f64 / duration_secs
} else {
0.0
};
let mut max_fallback_burst: i64 = 0;
for w in samples.windows(2) {
let prev_sum = w[0].sum_event_field(|e| e.select_cpu_fallback).unwrap_or(0);
let curr_sum = w[1].sum_event_field(|e| e.select_cpu_fallback).unwrap_or(0);
let delta = curr_sum - prev_sum;
if delta > max_fallback_burst {
max_fallback_burst = delta;
}
}
let delta = |f: fn(&ScxEventCounters) -> i64| -> i64 {
last.sum_event_field(f).unwrap_or(0) - first.sum_event_field(f).unwrap_or(0)
};
Some(ScxEventDeltas {
total_fallback,
fallback_rate,
max_fallback_burst,
total_dispatch_offline: delta(|e| e.dispatch_local_dsq_offline),
total_dispatch_keep_last: total_keep_last,
keep_last_rate,
total_enq_skip_exiting: delta(|e| e.enq_skip_exiting),
total_enq_skip_migration_disabled: delta(|e| e.enq_skip_migration_disabled),
})
}
fn compute_schedstat_deltas(samples: &[MonitorSample]) -> Option<SchedstatDeltas> {
let has_schedstat = |s: &MonitorSample| s.cpus.iter().any(|c| c.schedstat.is_some());
let first = samples.iter().find(|s| has_schedstat(s))?;
let last = samples.iter().rev().find(|s| has_schedstat(s))?;
let sum_field = |s: &MonitorSample, f: fn(&RqSchedstat) -> u64| -> u64 {
s.cpus
.iter()
.filter_map(|c| c.schedstat.as_ref().map(&f))
.sum()
};
let sum_field_u32 = |s: &MonitorSample, f: fn(&RqSchedstat) -> u32| -> u64 {
s.cpus
.iter()
.filter_map(|c| c.schedstat.as_ref().map(|ss| f(ss) as u64))
.sum()
};
let total_run_delay =
sum_field(last, |ss| ss.run_delay).saturating_sub(sum_field(first, |ss| ss.run_delay));
let total_pcount =
sum_field(last, |ss| ss.pcount).saturating_sub(sum_field(first, |ss| ss.pcount));
let total_sched_count = sum_field_u32(last, |ss| ss.sched_count)
.saturating_sub(sum_field_u32(first, |ss| ss.sched_count));
let total_yld_count = sum_field_u32(last, |ss| ss.yld_count)
.saturating_sub(sum_field_u32(first, |ss| ss.yld_count));
let total_sched_goidle = sum_field_u32(last, |ss| ss.sched_goidle)
.saturating_sub(sum_field_u32(first, |ss| ss.sched_goidle));
let total_ttwu_count = sum_field_u32(last, |ss| ss.ttwu_count)
.saturating_sub(sum_field_u32(first, |ss| ss.ttwu_count));
let total_ttwu_local = sum_field_u32(last, |ss| ss.ttwu_local)
.saturating_sub(sum_field_u32(first, |ss| ss.ttwu_local));
let duration_ms = last.elapsed_ms.saturating_sub(first.elapsed_ms);
let duration_secs = duration_ms as f64 / 1000.0;
let run_delay_rate = if duration_secs > 0.0 {
total_run_delay as f64 / duration_secs
} else {
0.0
};
let sched_count_rate = if duration_secs > 0.0 {
total_sched_count as f64 / duration_secs
} else {
0.0
};
Some(SchedstatDeltas {
total_run_delay,
run_delay_rate,
total_pcount,
total_sched_count,
sched_count_rate,
total_yld_count,
total_sched_goidle,
total_ttwu_count,
total_ttwu_local,
})
}
fn compute_prog_stats_deltas(samples: &[MonitorSample]) -> Option<Vec<ProgStatsDelta>> {
let first = samples.iter().find(|s| s.prog_stats.is_some())?;
let last = samples.iter().rev().find(|s| s.prog_stats.is_some())?;
let first_progs = first.prog_stats.as_ref()?;
let last_progs = last.prog_stats.as_ref()?;
let deltas: Vec<ProgStatsDelta> = last_progs
.iter()
.map(|lp| {
let fp = first_progs.iter().find(|p| p.name == lp.name);
let cnt = lp.cnt.saturating_sub(fp.map_or(0, |p| p.cnt));
let nsecs = lp.nsecs.saturating_sub(fp.map_or(0, |p| p.nsecs));
let nsecs_per_call = if cnt > 0 {
nsecs as f64 / cnt as f64
} else {
0.0
};
ProgStatsDelta {
name: lp.name.clone(),
cnt,
nsecs,
nsecs_per_call,
}
})
.collect();
if deltas.is_empty() {
None
} else {
Some(deltas)
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct MonitorThresholds {
pub max_imbalance_ratio: f64,
pub max_local_dsq_depth: u32,
pub fail_on_stall: bool,
pub sustained_samples: usize,
pub max_fallback_rate: f64,
pub max_keep_last_rate: f64,
}
impl MonitorThresholds {
pub const DEFAULT: MonitorThresholds = MonitorThresholds {
max_imbalance_ratio: 4.0,
max_local_dsq_depth: 50,
fail_on_stall: true,
sustained_samples: 5,
max_fallback_rate: 200.0,
max_keep_last_rate: 100.0,
};
}
impl Default for MonitorThresholds {
fn default() -> Self {
Self::DEFAULT
}
}
#[derive(Debug, Clone)]
pub struct MonitorVerdict {
pub passed: bool,
pub details: Vec<String>,
pub summary: String,
}
impl MonitorThresholds {
pub fn evaluate(&self, report: &MonitorReport) -> MonitorVerdict {
let mut details = Vec::new();
if report.samples.is_empty() {
return MonitorVerdict {
passed: true,
details: vec![],
summary: "no monitor samples".into(),
};
}
if !Self::data_looks_valid(&report.samples) {
return MonitorVerdict {
passed: true,
details: vec![],
summary: "monitor data not yet initialized".into(),
};
}
let mut imbalance = SustainedViolationTracker::default();
let mut dsq = SustainedViolationTracker::default();
let mut worst_dsq_cpu = 0usize;
for (i, sample) in report.samples.iter().enumerate() {
if sample.cpus.is_empty() {
imbalance.record(false, 0.0, i);
dsq.record(false, 0.0, i);
continue;
}
let ratio = sample.imbalance_ratio();
imbalance.record(ratio > self.max_imbalance_ratio, ratio, i);
let mut dsq_violated = false;
let mut sample_worst_depth = 0u32;
let mut sample_worst_cpu = 0usize;
for (cpu_idx, cpu) in sample.cpus.iter().enumerate() {
if cpu.local_dsq_depth > self.max_local_dsq_depth
&& cpu.local_dsq_depth > sample_worst_depth
{
dsq_violated = true;
sample_worst_depth = cpu.local_dsq_depth;
sample_worst_cpu = cpu_idx;
}
}
dsq.record(dsq_violated, sample_worst_depth as f64, i);
if dsq_violated && dsq.worst_value == sample_worst_depth as f64 {
worst_dsq_cpu = sample_worst_cpu;
}
}
let mut failed = false;
if imbalance.sustained(self.sustained_samples) {
failed = true;
details.push(format!(
"imbalance ratio {:.1} exceeded threshold {:.1} for {} consecutive samples (ending at sample {})",
imbalance.worst_value,
self.max_imbalance_ratio,
imbalance.worst_run,
imbalance.worst_at,
));
}
if dsq.sustained(self.sustained_samples) {
failed = true;
details.push(format!(
"local DSQ depth {} on cpu{} exceeded threshold {} for {} consecutive samples (ending at sample {})",
dsq.worst_value as u32,
worst_dsq_cpu,
self.max_local_dsq_depth,
dsq.worst_run,
dsq.worst_at,
));
}
if self.fail_on_stall {
let threshold = if report.preemption_threshold_ns > 0 {
report.preemption_threshold_ns
} else {
vcpu_preemption_threshold_ns(None)
};
let num_cpus = report
.samples
.iter()
.map(|s| s.cpus.len())
.max()
.unwrap_or(0);
let mut stall: Vec<SustainedViolationTracker> =
vec![SustainedViolationTracker::default(); num_cpus];
for i in 1..report.samples.len() {
let prev = &report.samples[i - 1];
let curr = &report.samples[i];
let cpu_count = prev.cpus.len().min(curr.cpus.len());
#[allow(clippy::needless_range_loop)]
for cpu in 0..cpu_count {
let idle = curr.cpus[cpu].nr_running == 0 && prev.cpus[cpu].nr_running == 0;
let cpu_time_advanced = match (
curr.cpus[cpu].vcpu_cpu_time_ns,
prev.cpus[cpu].vcpu_cpu_time_ns,
) {
(Some(curr_t), Some(prev_t)) => curr_t.saturating_sub(prev_t) >= threshold,
_ => true,
};
let is_stall = curr.cpus[cpu].rq_clock != 0
&& curr.cpus[cpu].rq_clock == prev.cpus[cpu].rq_clock
&& !idle
&& cpu_time_advanced;
stall[cpu].record(is_stall, curr.cpus[cpu].rq_clock as f64, i);
}
}
#[allow(clippy::needless_range_loop)] for cpu in 0..num_cpus {
if stall[cpu].sustained(self.sustained_samples) {
failed = true;
details.push(format!(
"rq_clock stall on cpu{} for {} consecutive samples (ending at sample {}, clock={})",
cpu,
stall[cpu].worst_run,
stall[cpu].worst_at,
stall[cpu].worst_value as u64,
));
}
}
}
let mut fallback_rate = SustainedViolationTracker::default();
let mut keep_last_rate = SustainedViolationTracker::default();
for i in 1..report.samples.len() {
let prev = &report.samples[i - 1];
let curr = &report.samples[i];
let interval_s = curr.elapsed_ms.saturating_sub(prev.elapsed_ms) as f64 / 1000.0;
if interval_s <= 0.0 {
fallback_rate.record(false, 0.0, i);
keep_last_rate.record(false, 0.0, i);
continue;
}
if let (Some(prev_fb), Some(curr_fb)) = (
prev.sum_event_field(|e| e.select_cpu_fallback),
curr.sum_event_field(|e| e.select_cpu_fallback),
) {
let rate = (curr_fb - prev_fb) as f64 / interval_s;
fallback_rate.record(rate > self.max_fallback_rate, rate, i);
} else {
fallback_rate.record(false, 0.0, i);
}
if let (Some(prev_kl), Some(curr_kl)) = (
prev.sum_event_field(|e| e.dispatch_keep_last),
curr.sum_event_field(|e| e.dispatch_keep_last),
) {
let rate = (curr_kl - prev_kl) as f64 / interval_s;
keep_last_rate.record(rate > self.max_keep_last_rate, rate, i);
} else {
keep_last_rate.record(false, 0.0, i);
}
}
if fallback_rate.sustained(self.sustained_samples) {
failed = true;
details.push(format!(
"fallback rate {:.1}/s exceeded threshold {:.1}/s for {} consecutive intervals (ending at sample {})",
fallback_rate.worst_value,
self.max_fallback_rate,
fallback_rate.worst_run,
fallback_rate.worst_at,
));
}
if keep_last_rate.sustained(self.sustained_samples) {
failed = true;
details.push(format!(
"keep_last rate {:.1}/s exceeded threshold {:.1}/s for {} consecutive intervals (ending at sample {})",
keep_last_rate.worst_value,
self.max_keep_last_rate,
keep_last_rate.worst_run,
keep_last_rate.worst_at,
));
}
let summary = if failed {
format!("monitor FAILED: {} violation(s)", details.len())
} else {
"monitor OK".into()
};
MonitorVerdict {
passed: !failed,
details,
summary,
}
}
fn data_looks_valid(samples: &[MonitorSample]) -> bool {
let mut first_clock: Option<u64> = None;
let mut all_clocks_same = true;
for sample in samples {
if !sample_looks_valid(sample) {
return false;
}
for cpu in &sample.cpus {
match first_clock {
None => first_clock = Some(cpu.rq_clock),
Some(fc) => {
if cpu.rq_clock != fc {
all_clocks_same = false;
}
}
}
}
}
if first_clock.is_some() && all_clocks_same {
let total_readings: usize = samples.iter().map(|s| s.cpus.len()).sum();
if total_readings > 1 {
return false;
}
}
true
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_config_hz_standard() {
let config = "# comment\nCONFIG_HZ_1000=y\nCONFIG_HZ=1000\n";
assert_eq!(parse_config_hz(config), Some(1000));
}
#[test]
fn parse_config_hz_250() {
let config = "CONFIG_HZ=250\n";
assert_eq!(parse_config_hz(config), Some(250));
}
#[test]
fn parse_config_hz_100() {
let config = "CONFIG_HZ=100\n";
assert_eq!(parse_config_hz(config), Some(100));
}
#[test]
fn parse_config_hz_missing() {
let config = "CONFIG_PREEMPT=y\nCONFIG_HZ_1000=y\n";
assert_eq!(parse_config_hz(config), None);
}
#[test]
fn parse_config_hz_garbage_value() {
let config = "CONFIG_HZ=abc\n";
assert_eq!(parse_config_hz(config), None);
}
#[test]
fn parse_config_hz_whitespace() {
let config = " CONFIG_HZ=1000 \n";
assert_eq!(parse_config_hz(config), Some(1000));
}
#[test]
fn vcpu_threshold_reasonable_range() {
let t = vcpu_preemption_threshold_ns(None);
assert!(
(10_000_000..=100_000_000).contains(&t),
"threshold {t} ns outside expected range 10ms-100ms"
);
}
#[test]
fn vcpu_threshold_default_hz_fallback() {
let t = vcpu_preemption_threshold_ns(Some(std::path::Path::new("/nonexistent/bzImage")));
assert!(
(10_000_000..=100_000_000).contains(&t),
"fallback threshold {t} ns outside expected range"
);
}
fn make_ikconfig_blob(config_text: &str) -> Vec<u8> {
use flate2::Compression;
use flate2::write::GzEncoder;
use std::io::Write;
let mut blob = vec![0u8; 64]; blob.extend_from_slice(IKCONFIG_MAGIC);
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(config_text.as_bytes()).unwrap();
blob.extend(encoder.finish().unwrap());
blob.extend_from_slice(b"IKCFG_ED");
blob
}
#[test]
fn ikconfig_extracts_hz_1000() {
let blob = make_ikconfig_blob("CONFIG_HZ=1000\nCONFIG_PREEMPT=y\n");
let dir = std::env::temp_dir().join("ktstr-ikconfig-test-1000");
std::fs::create_dir_all(&dir).unwrap();
let path = dir.join("vmlinux");
std::fs::write(&path, &blob).unwrap();
assert_eq!(read_hz_from_ikconfig(&path), Some(1000));
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn ikconfig_extracts_hz_250() {
let blob = make_ikconfig_blob("CONFIG_HZ=250\n");
let dir = std::env::temp_dir().join("ktstr-ikconfig-test-250");
std::fs::create_dir_all(&dir).unwrap();
let path = dir.join("vmlinux");
std::fs::write(&path, &blob).unwrap();
assert_eq!(read_hz_from_ikconfig(&path), Some(250));
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn ikconfig_no_marker_returns_none() {
let dir = std::env::temp_dir().join("ktstr-ikconfig-test-none");
std::fs::create_dir_all(&dir).unwrap();
let path = dir.join("vmlinux");
std::fs::write(&path, b"no marker here").unwrap();
assert_eq!(read_hz_from_ikconfig(&path), None);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn ikconfig_missing_config_hz_returns_none() {
let blob = make_ikconfig_blob("CONFIG_PREEMPT=y\n");
let dir = std::env::temp_dir().join("ktstr-ikconfig-test-nohz");
std::fs::create_dir_all(&dir).unwrap();
let path = dir.join("vmlinux");
std::fs::write(&path, &blob).unwrap();
assert_eq!(read_hz_from_ikconfig(&path), None);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn empty_samples_default_summary() {
let summary = MonitorSummary::from_samples(&[]);
assert_eq!(summary.total_samples, 0);
assert_eq!(summary.max_imbalance_ratio, 0.0);
assert_eq!(summary.max_local_dsq_depth, 0);
assert!(!summary.stall_detected);
assert_eq!(summary.avg_imbalance_ratio, 0.0);
assert_eq!(summary.avg_nr_running, 0.0);
assert_eq!(summary.avg_local_dsq_depth, 0.0);
}
#[test]
fn single_sample_imbalanced_cpus() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
local_dsq_depth: 3,
rq_clock: 1000,
..Default::default()
},
CpuSnapshot {
nr_running: 4,
local_dsq_depth: 1,
rq_clock: 2000,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert_eq!(summary.total_samples, 1);
assert!((summary.max_imbalance_ratio - 4.0).abs() < f64::EPSILON);
assert_eq!(summary.max_local_dsq_depth, 3);
assert!(!summary.stall_detected);
assert!((summary.avg_imbalance_ratio - 4.0).abs() < f64::EPSILON);
assert!((summary.avg_nr_running - 2.5).abs() < f64::EPSILON);
assert!((summary.avg_local_dsq_depth - 2.0).abs() < f64::EPSILON);
}
#[test]
fn stall_detected_when_clock_stuck() {
let s1 = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
};
let s2 = MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[s1, s2]);
assert!(summary.stall_detected);
}
#[test]
fn balanced_cpus_ratio_one() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 50,
cpus: vec![
CpuSnapshot {
nr_running: 3,
rq_clock: 100,
..Default::default()
},
CpuSnapshot {
nr_running: 3,
rq_clock: 200,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert!((summary.max_imbalance_ratio - 1.0).abs() < f64::EPSILON);
assert!(!summary.stall_detected);
assert!((summary.avg_imbalance_ratio - 1.0).abs() < f64::EPSILON);
assert!((summary.avg_nr_running - 3.0).abs() < f64::EPSILON);
assert!((summary.avg_local_dsq_depth - 0.0).abs() < f64::EPSILON);
}
#[test]
fn single_cpu_no_division_by_zero() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 10,
cpus: vec![CpuSnapshot {
nr_running: 5,
local_dsq_depth: 2,
rq_clock: 1000,
..Default::default()
}],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert_eq!(summary.total_samples, 1);
assert!((summary.max_imbalance_ratio - 1.0).abs() < f64::EPSILON);
assert_eq!(summary.max_local_dsq_depth, 2);
assert!(!summary.stall_detected);
}
#[test]
fn all_zero_snapshots() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![CpuSnapshot::default(), CpuSnapshot::default()],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert_eq!(summary.total_samples, 1);
assert!((summary.max_imbalance_ratio - 1.0).abs() < f64::EPSILON);
assert_eq!(summary.max_local_dsq_depth, 0);
assert!(!summary.stall_detected);
assert_eq!(summary.avg_imbalance_ratio, 0.0);
assert_eq!(summary.avg_nr_running, 0.0);
assert_eq!(summary.avg_local_dsq_depth, 0.0);
}
#[test]
fn empty_cpus_in_sample() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 10,
cpus: vec![],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert_eq!(summary.total_samples, 1);
assert!((summary.max_imbalance_ratio - 1.0).abs() < f64::EPSILON);
assert_eq!(summary.avg_imbalance_ratio, 0.0);
assert_eq!(summary.avg_nr_running, 0.0);
assert_eq!(summary.avg_local_dsq_depth, 0.0);
}
#[test]
fn min_nr_zero_division_guard() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 10,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 100,
..Default::default()
},
CpuSnapshot {
nr_running: 0,
rq_clock: 200,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert!((summary.max_imbalance_ratio - 1.0).abs() < f64::EPSILON);
}
#[test]
fn min_nr_zero_max_nr_nonzero() {
let sample = MonitorSample {
prog_stats: None,
elapsed_ms: 10,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 100,
..Default::default()
},
CpuSnapshot {
nr_running: 5,
rq_clock: 200,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[sample]);
assert!((summary.max_imbalance_ratio - 5.0).abs() < f64::EPSILON);
}
#[test]
fn advancing_clocks_no_stall() {
let s1 = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 2000,
..Default::default()
},
],
};
let s2 = MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1500,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 2500,
..Default::default()
},
],
};
let s3 = MonitorSample {
prog_stats: None,
elapsed_ms: 300,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 2000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 3000,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[s1, s2, s3]);
assert!(!summary.stall_detected);
assert_eq!(summary.total_samples, 3);
}
#[test]
fn different_length_cpu_vecs() {
let s1 = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 2000,
..Default::default()
},
],
};
let s2 = MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1500,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 2500,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 3000,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[s1, s2]);
assert!(!summary.stall_detected);
assert_eq!(summary.total_samples, 2);
assert_eq!(summary.max_local_dsq_depth, 0);
}
fn balanced_sample(elapsed_ms: u64, clock_base: u64) -> MonitorSample {
MonitorSample {
prog_stats: None,
elapsed_ms,
cpus: vec![
CpuSnapshot {
nr_running: 2,
rq_clock: clock_base,
local_dsq_depth: 3,
..Default::default()
},
CpuSnapshot {
nr_running: 2,
rq_clock: clock_base + 100,
local_dsq_depth: 2,
..Default::default()
},
],
}
}
#[test]
fn thresholds_default_values() {
let t = MonitorThresholds::default();
assert!((t.max_imbalance_ratio - 4.0).abs() < f64::EPSILON);
assert_eq!(t.max_local_dsq_depth, 50);
assert!(t.fail_on_stall);
assert_eq!(t.sustained_samples, 5);
}
#[test]
fn thresholds_empty_report_passes() {
let t = MonitorThresholds::default();
let report = MonitorReport {
samples: vec![],
summary: MonitorSummary::default(),
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed);
assert!(v.details.is_empty());
}
#[test]
fn thresholds_balanced_samples_pass() {
let t = MonitorThresholds::default();
let samples: Vec<_> = (0..10)
.map(|i| balanced_sample(i * 100, 1000 + i * 500))
.collect();
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "balanced samples should pass: {:?}", v.details);
}
#[test]
fn thresholds_imbalance_below_sustained_passes() {
let t = MonitorThresholds {
sustained_samples: 5,
max_imbalance_ratio: 4.0,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..4 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 10,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
});
}
samples.push(balanced_sample(400, 3000));
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"4 imbalanced < sustained_samples=5: {:?}",
v.details
);
}
#[test]
fn thresholds_imbalance_at_sustained_fails() {
let t = MonitorThresholds {
sustained_samples: 5,
max_imbalance_ratio: 4.0,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..5u64 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 10,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
});
}
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
assert!(v.details.iter().any(|d| d.contains("imbalance")));
}
#[test]
fn thresholds_dsq_depth_sustained_fails() {
let t = MonitorThresholds {
sustained_samples: 3,
max_local_dsq_depth: 10,
fail_on_stall: false,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..3u64 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 2,
local_dsq_depth: 20,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 2,
local_dsq_depth: 5,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
});
}
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
assert!(v.details.iter().any(|d| d.contains("DSQ depth")));
}
#[test]
fn thresholds_dsq_depth_below_sustained_passes() {
let t = MonitorThresholds {
sustained_samples: 3,
max_local_dsq_depth: 10,
fail_on_stall: false,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..2u64 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 2,
local_dsq_depth: 20,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 2,
local_dsq_depth: 5,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
});
}
samples.push(balanced_sample(200, 2000));
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "2 DSQ violations < sustained=3: {:?}", v.details);
}
#[test]
fn thresholds_stall_detected_fails() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
}, CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
assert!(v.details.iter().any(|d| d.contains("rq_clock stall")));
}
#[test]
fn thresholds_stall_disabled_passes() {
let t = MonitorThresholds {
fail_on_stall: false,
sustained_samples: 100,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
}],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
}, ],
},
];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "stall disabled should pass: {:?}", v.details);
}
#[test]
fn thresholds_imbalance_interrupted_by_balanced_resets() {
let t = MonitorThresholds {
sustained_samples: 5,
max_imbalance_ratio: 4.0,
fail_on_stall: false,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..3u64 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 10,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
});
}
samples.push(balanced_sample(300, 2500));
for i in 4..7u64 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 3000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 10,
rq_clock: 3100 + i * 500,
..Default::default()
},
],
});
}
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"interrupted imbalance should pass: {:?}",
v.details
);
}
#[test]
fn thresholds_multiple_violations() {
let t = MonitorThresholds {
sustained_samples: 2,
max_imbalance_ratio: 2.0,
fail_on_stall: true,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
},
CpuSnapshot {
nr_running: 5,
rq_clock: 2000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
}, CpuSnapshot {
nr_running: 5,
rq_clock: 3000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 300,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
}, CpuSnapshot {
nr_running: 5,
rq_clock: 4000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
assert!(v.details.iter().any(|d| d.contains("imbalance")));
assert!(v.details.iter().any(|d| d.contains("rq_clock stall")));
}
#[test]
fn thresholds_empty_cpus_samples_pass() {
let t = MonitorThresholds::default();
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![],
},
];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed);
}
#[test]
fn thresholds_uninitialized_memory_passes() {
let t = MonitorThresholds::default();
let garbage_clock = 10314579376562252011u64;
let samples: Vec<_> = (0..10)
.map(|i| MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: garbage_clock,
local_dsq_depth: 1550435906,
..Default::default()
},
CpuSnapshot {
nr_running: 0,
rq_clock: garbage_clock,
local_dsq_depth: 1550435906,
..Default::default()
},
],
})
.collect();
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"uninitialized guest memory should be skipped: {:?}",
v.details
);
}
#[test]
fn thresholds_all_same_clocks_passes() {
let t = MonitorThresholds {
fail_on_stall: true,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"all-same clocks should be treated as uninitialized: {:?}",
v.details
);
}
#[test]
fn thresholds_dsq_over_plausibility_ceiling_passes() {
let t = MonitorThresholds::default();
let samples = vec![MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
local_dsq_depth: 50000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 2000,
local_dsq_depth: 5,
..Default::default()
},
],
}];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"implausible DSQ depth should skip evaluation: {:?}",
v.details
);
}
#[test]
fn thresholds_single_cpu_single_sample_valid() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
}],
}];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "single reading should be valid: {:?}", v.details);
}
fn sample_with_events(
elapsed_ms: u64,
clock_base: u64,
fallback: i64,
keep_last: i64,
) -> MonitorSample {
MonitorSample {
prog_stats: None,
elapsed_ms,
cpus: vec![
CpuSnapshot {
nr_running: 2,
rq_clock: clock_base,
event_counters: Some(ScxEventCounters {
select_cpu_fallback: fallback,
dispatch_keep_last: keep_last,
..Default::default()
}),
..Default::default()
},
CpuSnapshot {
nr_running: 2,
rq_clock: clock_base + 100,
event_counters: Some(ScxEventCounters {
select_cpu_fallback: fallback,
dispatch_keep_last: keep_last,
..Default::default()
}),
..Default::default()
},
],
}
}
#[test]
fn thresholds_fallback_rate_sustained_fails() {
let t = MonitorThresholds {
sustained_samples: 3,
max_fallback_rate: 10.0,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..4)
.map(|i| sample_with_events(i * 100, 1000 + i * 500, i as i64 * 10, 0))
.collect();
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
assert!(v.details.iter().any(|d| d.contains("fallback rate")));
}
#[test]
fn thresholds_fallback_rate_below_sustained_passes() {
let t = MonitorThresholds {
sustained_samples: 3,
max_fallback_rate: 10.0,
fail_on_stall: false,
..Default::default()
};
let mut samples: Vec<_> = (0..3)
.map(|i| sample_with_events(i * 100, 1000 + i * 500, i as i64 * 10, 0))
.collect();
samples.push(sample_with_events(300, 2500, 20, 0));
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "2 violations < sustained=3: {:?}", v.details);
}
#[test]
fn thresholds_keep_last_rate_sustained_fails() {
let t = MonitorThresholds {
sustained_samples: 3,
max_keep_last_rate: 10.0,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..4)
.map(|i| sample_with_events(i * 100, 1000 + i * 500, 0, i as i64 * 10))
.collect();
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
assert!(v.details.iter().any(|d| d.contains("keep_last rate")));
}
#[test]
fn thresholds_keep_last_rate_below_sustained_passes() {
let t = MonitorThresholds {
sustained_samples: 3,
max_keep_last_rate: 10.0,
fail_on_stall: false,
..Default::default()
};
let mut samples: Vec<_> = (0..3)
.map(|i| sample_with_events(i * 100, 1000 + i * 500, 0, i as i64 * 10))
.collect();
samples.push(sample_with_events(300, 2500, 0, 20));
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "2 violations < sustained=3: {:?}", v.details);
}
#[test]
fn thresholds_event_rate_interrupted_resets() {
let t = MonitorThresholds {
sustained_samples: 3,
max_fallback_rate: 10.0,
fail_on_stall: false,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..3u64 {
samples.push(sample_with_events(
i * 100,
1000 + i * 500,
i as i64 * 10,
0,
));
}
samples.push(sample_with_events(300, 2500, 20, 0));
for i in 0..2u64 {
samples.push(sample_with_events(
400 + i * 100,
3000 + i * 500,
30 + (i + 1) as i64 * 10,
0,
));
}
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"interrupted rate violations should pass: {:?}",
v.details
);
}
#[test]
fn thresholds_no_event_counters_skips_rate_check() {
let t = MonitorThresholds {
sustained_samples: 1,
max_fallback_rate: 0.0, max_keep_last_rate: 0.0,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..5)
.map(|i| balanced_sample(i * 100, 1000 + i * 500))
.collect();
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"no event counters should skip rate check: {:?}",
v.details
);
}
#[test]
fn thresholds_default_event_rate_values() {
let t = MonitorThresholds::default();
assert!((t.max_fallback_rate - 200.0).abs() < f64::EPSILON);
assert!((t.max_keep_last_rate - 100.0).abs() < f64::EPSILON);
}
#[test]
fn summary_keep_last_rate_computed() {
let samples = vec![
sample_with_events(0, 1000, 0, 0),
sample_with_events(100, 1500, 0, 5),
sample_with_events(200, 2000, 0, 10),
];
let summary = MonitorSummary::from_samples(&samples);
let deltas = summary.event_deltas.unwrap();
assert!((deltas.keep_last_rate - 100.0).abs() < f64::EPSILON);
}
#[test]
fn event_deltas_none_without_counters() {
let samples = vec![balanced_sample(100, 1000), balanced_sample(200, 1500)];
let summary = MonitorSummary::from_samples(&samples);
assert!(summary.event_deltas.is_none());
}
#[test]
fn event_deltas_single_sample() {
let samples = vec![sample_with_events(100, 1000, 50, 25)];
let summary = MonitorSummary::from_samples(&samples);
let deltas = summary.event_deltas.unwrap();
assert_eq!(deltas.fallback_rate, 0.0);
assert_eq!(deltas.keep_last_rate, 0.0);
}
#[test]
fn event_deltas_max_fallback_burst() {
let samples = vec![
sample_with_events(0, 1000, 0, 0),
sample_with_events(100, 1500, 5, 0),
sample_with_events(200, 2000, 100, 0),
];
let summary = MonitorSummary::from_samples(&samples);
let deltas = summary.event_deltas.unwrap();
assert!(deltas.max_fallback_burst > 0);
}
#[test]
fn event_deltas_all_counters_computed() {
let make = |elapsed_ms, fb, kl, dsq_off, exit, migdis| MonitorSample {
prog_stats: None,
elapsed_ms,
cpus: vec![CpuSnapshot {
nr_running: 1,
rq_clock: elapsed_ms * 10,
event_counters: Some(ScxEventCounters {
select_cpu_fallback: fb,
dispatch_local_dsq_offline: dsq_off,
dispatch_keep_last: kl,
enq_skip_exiting: exit,
enq_skip_migration_disabled: migdis,
}),
..Default::default()
}],
};
let samples = vec![
make(100, 10, 20, 30, 40, 50),
make(200, 110, 120, 130, 140, 150),
];
let summary = MonitorSummary::from_samples(&samples);
let d = summary.event_deltas.unwrap();
assert_eq!(d.total_fallback, 100);
assert_eq!(d.total_dispatch_keep_last, 100);
assert_eq!(d.total_dispatch_offline, 100);
assert_eq!(d.total_enq_skip_exiting, 100);
assert_eq!(d.total_enq_skip_migration_disabled, 100);
}
#[test]
fn data_looks_valid_empty() {
assert!(MonitorThresholds::data_looks_valid(&[]));
}
#[test]
fn data_looks_valid_normal() {
let samples = vec![balanced_sample(100, 1000), balanced_sample(200, 2000)];
assert!(MonitorThresholds::data_looks_valid(&samples));
}
#[test]
fn data_looks_valid_all_same_clocks() {
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
rq_clock: 5000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
rq_clock: 5000,
..Default::default()
},
],
},
];
assert!(!MonitorThresholds::data_looks_valid(&samples));
}
#[test]
fn data_looks_valid_dsq_over_ceiling() {
let samples = vec![MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![CpuSnapshot {
local_dsq_depth: 50000,
rq_clock: 1000,
..Default::default()
}],
}];
assert!(!MonitorThresholds::data_looks_valid(&samples));
}
#[test]
fn imbalance_ratio_empty_cpus() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![],
};
assert!((s.imbalance_ratio() - 1.0).abs() < f64::EPSILON);
}
#[test]
fn imbalance_ratio_single_cpu() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![CpuSnapshot {
nr_running: 5,
..Default::default()
}],
};
assert!((s.imbalance_ratio() - 1.0).abs() < f64::EPSILON);
}
#[test]
fn imbalance_ratio_balanced() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![
CpuSnapshot {
nr_running: 3,
..Default::default()
},
CpuSnapshot {
nr_running: 3,
..Default::default()
},
],
};
assert!((s.imbalance_ratio() - 1.0).abs() < f64::EPSILON);
}
#[test]
fn imbalance_ratio_imbalanced() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![
CpuSnapshot {
nr_running: 2,
..Default::default()
},
CpuSnapshot {
nr_running: 8,
..Default::default()
},
],
};
assert!((s.imbalance_ratio() - 4.0).abs() < f64::EPSILON);
}
#[test]
fn imbalance_ratio_zero_min() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![
CpuSnapshot {
nr_running: 0,
..Default::default()
},
CpuSnapshot {
nr_running: 5,
..Default::default()
},
],
};
assert!((s.imbalance_ratio() - 5.0).abs() < f64::EPSILON);
}
#[test]
fn sum_event_field_none_when_no_counters() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![CpuSnapshot::default(), CpuSnapshot::default()],
};
assert!(s.sum_event_field(|e| e.select_cpu_fallback).is_none());
}
#[test]
fn sum_event_field_sums_across_cpus() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![
CpuSnapshot {
event_counters: Some(ScxEventCounters {
select_cpu_fallback: 10,
..Default::default()
}),
..Default::default()
},
CpuSnapshot {
event_counters: Some(ScxEventCounters {
select_cpu_fallback: 20,
..Default::default()
}),
..Default::default()
},
],
};
assert_eq!(s.sum_event_field(|e| e.select_cpu_fallback), Some(30));
}
#[test]
fn sum_event_field_mixed_some_none() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 0,
cpus: vec![
CpuSnapshot {
event_counters: Some(ScxEventCounters {
dispatch_keep_last: 7,
..Default::default()
}),
..Default::default()
},
CpuSnapshot::default(),
],
};
assert_eq!(s.sum_event_field(|e| e.dispatch_keep_last), Some(7));
}
#[test]
fn sample_looks_valid_normal() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![CpuSnapshot {
local_dsq_depth: 5,
..Default::default()
}],
};
assert!(sample_looks_valid(&s));
}
#[test]
fn sample_looks_valid_at_ceiling() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![CpuSnapshot {
local_dsq_depth: DSQ_PLAUSIBILITY_CEILING,
..Default::default()
}],
};
assert!(sample_looks_valid(&s));
}
#[test]
fn sample_looks_valid_over_ceiling() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![CpuSnapshot {
local_dsq_depth: DSQ_PLAUSIBILITY_CEILING + 1,
..Default::default()
}],
};
assert!(!sample_looks_valid(&s));
}
#[test]
fn sample_looks_valid_empty_cpus() {
let s = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![],
};
assert!(sample_looks_valid(&s));
}
#[test]
fn from_samples_fields_sane_values() {
let samples: Vec<_> = (0..5u64)
.map(|i| MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: (i as u32 + 1),
scx_nr_running: i as u32,
local_dsq_depth: (i as u32) % 3,
rq_clock: 1000 + i * 500,
scx_flags: 0,
event_counters: Some(ScxEventCounters {
select_cpu_fallback: i as i64 * 2,
dispatch_keep_last: i as i64,
..Default::default()
}),
schedstat: None,
vcpu_cpu_time_ns: None,
sched_domains: None,
},
CpuSnapshot {
nr_running: (i as u32 + 2),
scx_nr_running: i as u32 + 1,
local_dsq_depth: 0,
rq_clock: 1100 + i * 600,
scx_flags: 0,
event_counters: Some(ScxEventCounters {
select_cpu_fallback: i as i64 * 3,
dispatch_keep_last: i as i64 * 2,
..Default::default()
}),
schedstat: None,
vcpu_cpu_time_ns: None,
sched_domains: None,
},
],
})
.collect();
let summary = MonitorSummary::from_samples(&samples);
assert_eq!(summary.total_samples, 5);
assert!(
summary.max_imbalance_ratio >= 1.0,
"ratio must be >= 1.0: {}",
summary.max_imbalance_ratio
);
assert!(
summary.max_imbalance_ratio <= 10.0,
"ratio must be reasonable: {}",
summary.max_imbalance_ratio
);
assert!(
summary.max_local_dsq_depth <= DSQ_PLAUSIBILITY_CEILING,
"dsq depth must be below plausibility ceiling: {}",
summary.max_local_dsq_depth
);
assert!(
summary.max_local_dsq_depth <= 10,
"dsq depth must be small in this controlled test: {}",
summary.max_local_dsq_depth
);
assert!(
!summary.stall_detected,
"no stall expected with advancing rq_clock"
);
let deltas = summary
.event_deltas
.as_ref()
.expect("event deltas must be present");
assert!(
deltas.total_fallback >= 0,
"fallback count must be non-negative"
);
assert!(
deltas.total_dispatch_keep_last >= 0,
"keep_last count must be non-negative"
);
assert!(
deltas.fallback_rate >= 0.0,
"fallback rate must be non-negative"
);
assert!(
deltas.keep_last_rate >= 0.0,
"keep_last rate must be non-negative"
);
assert!(
summary.avg_imbalance_ratio >= 1.0,
"avg imbalance must be >= 1.0: {}",
summary.avg_imbalance_ratio,
);
assert!(
summary.avg_nr_running > 0.0,
"avg nr_running must be positive: {}",
summary.avg_nr_running,
);
assert!(
summary.avg_local_dsq_depth >= 0.0,
"avg dsq_depth must be non-negative: {}",
summary.avg_local_dsq_depth,
);
}
#[test]
fn from_samples_empty_all_defaults() {
let summary = MonitorSummary::from_samples(&[]);
assert_eq!(summary.total_samples, 0);
assert_eq!(summary.max_imbalance_ratio, 0.0);
assert_eq!(summary.max_local_dsq_depth, 0);
assert!(!summary.stall_detected);
assert_eq!(summary.avg_imbalance_ratio, 0.0);
assert_eq!(summary.avg_nr_running, 0.0);
assert_eq!(summary.avg_local_dsq_depth, 0.0);
assert!(
summary.event_deltas.is_none(),
"empty input must not produce event deltas"
);
}
#[test]
fn neg_tight_imbalance_threshold_catches_mild_imbalance() {
let t = MonitorThresholds {
max_imbalance_ratio: 1.0,
sustained_samples: 2,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..3u64)
.map(|i| MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 2,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 3,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
})
.collect();
let summary = MonitorSummary::from_samples(&samples);
assert!(
summary.max_imbalance_ratio >= 1.5,
"summary must capture ratio"
);
assert!(!summary.stall_detected, "no stall in this scenario");
assert_eq!(summary.total_samples, 3);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed, "imbalance=1.5 must fail threshold=1.0");
let detail = v.details.iter().find(|d| d.contains("imbalance")).unwrap();
assert!(detail.contains("ratio"), "must include 'ratio': {detail}");
assert!(
detail.contains("exceeded threshold"),
"must include threshold: {detail}"
);
assert!(
detail.contains("1.0"),
"must show threshold value: {detail}"
);
assert!(
detail.contains("consecutive samples"),
"must show sustained count: {detail}"
);
assert!(
detail.contains("ending at sample"),
"must show sample index: {detail}"
);
assert!(
v.summary.contains("FAILED"),
"summary must say FAILED: {}",
v.summary
);
}
#[test]
fn neg_tight_dsq_threshold_catches_small_depth() {
let t = MonitorThresholds {
max_local_dsq_depth: 1,
sustained_samples: 2,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..3u64)
.map(|i| MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
local_dsq_depth: 3,
rq_clock: 1000 + i * 500,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
local_dsq_depth: 0,
rq_clock: 1100 + i * 500,
..Default::default()
},
],
})
.collect();
let summary = MonitorSummary::from_samples(&samples);
assert_eq!(
summary.max_local_dsq_depth, 3,
"summary must capture max depth"
);
assert!(
summary.max_local_dsq_depth <= DSQ_PLAUSIBILITY_CEILING,
"depth must be plausible"
);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed, "dsq_depth=3 must fail threshold=1");
let detail = v.details.iter().find(|d| d.contains("DSQ depth")).unwrap();
assert!(detail.contains("3"), "must show depth value: {detail}");
assert!(detail.contains("cpu0"), "must show CPU number: {detail}");
assert!(
detail.contains("threshold 1"),
"must show threshold: {detail}"
);
assert!(
detail.contains("consecutive samples"),
"must show count: {detail}"
);
}
#[test]
fn neg_stall_detection_catches_frozen_rq_clock() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
assert!(
summary.stall_detected,
"summary.stall_detected must be true"
);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed, "frozen rq_clock must be detected");
let detail = v
.details
.iter()
.find(|d| d.contains("rq_clock stall"))
.unwrap();
assert!(detail.contains("cpu0"), "must name frozen CPU: {detail}");
assert!(
detail.contains("consecutive samples"),
"must show sustained count: {detail}"
);
assert!(
detail.contains("clock=5000"),
"must include frozen clock value: {detail}"
);
}
#[test]
fn neg_combined_imbalance_and_stall_both_reported() {
let t = MonitorThresholds {
max_imbalance_ratio: 2.0,
sustained_samples: 1,
fail_on_stall: true,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
},
CpuSnapshot {
nr_running: 10,
rq_clock: 2000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 1000,
..Default::default()
},
CpuSnapshot {
nr_running: 10,
rq_clock: 3000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
assert!(summary.stall_detected);
assert!(summary.max_imbalance_ratio >= 10.0);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed);
let imb = v.details.iter().find(|d| d.contains("imbalance")).unwrap();
assert!(
imb.contains("exceeded threshold 2.0"),
"imbalance format: {imb}"
);
let stall = v
.details
.iter()
.find(|d| d.contains("rq_clock stall"))
.unwrap();
assert!(stall.contains("cpu0"), "stall format: {stall}");
assert!(
v.details.len() >= 2,
"both violations must be reported, got {}",
v.details.len()
);
assert!(v.summary.contains("FAILED"), "summary: {}", v.summary);
}
#[test]
fn stall_idle_cpu_exempt() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
assert!(
!summary.stall_detected,
"idle CPU should not trigger stall in summary"
);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
v.passed,
"idle CPU should not trigger stall: {:?}",
v.details
);
}
#[test]
fn stall_idle_to_busy_not_exempt() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
assert!(
summary.stall_detected,
"busy CPU with frozen clock is a stall"
);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
!v.passed,
"busy CPU with frozen clock must fail: {:?}",
v.details
);
}
#[test]
fn stall_sustained_window_filters_transient() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 3,
..Default::default()
};
let mut samples = Vec::new();
for i in 0..3u64 {
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000 + i * 500, ..Default::default()
},
],
});
}
samples.push(MonitorSample {
prog_stats: None,
elapsed_ms: 300,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7500,
..Default::default()
},
],
});
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "2 stall pairs < sustained=3: {:?}", v.details);
}
#[test]
fn stall_sustained_window_catches_real_stall() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 3,
..Default::default()
};
let samples: Vec<_> = (0..4u64)
.map(|i| MonitorSample {
prog_stats: None,
elapsed_ms: i * 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000 + i * 500, ..Default::default()
},
],
})
.collect();
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed, "3 consecutive stall pairs must fail");
assert!(v.details.iter().any(|d| d.contains("rq_clock stall")));
}
#[test]
fn from_samples_idle_cpu_no_stall() {
let s1 = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
};
let s2 = MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 0,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples(&[s1, s2]);
assert!(!summary.stall_detected);
}
#[test]
fn stall_below_sustained_passes() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 5,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 300,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 8000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(v.passed, "1 stall < sustained=5: {:?}", v.details);
}
#[test]
fn neg_fallback_rate_threshold_fires() {
let t = MonitorThresholds {
sustained_samples: 2,
max_fallback_rate: 5.0,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..3u64)
.map(|i| sample_with_events(i * 100, 1000 + i * 500, i as i64 * 10, 0))
.collect();
let summary = MonitorSummary::from_samples(&samples);
assert!(
summary.event_deltas.is_some(),
"event deltas must be computed"
);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed, "fallback rate must be caught");
let detail = v
.details
.iter()
.find(|d| d.contains("fallback rate"))
.unwrap();
assert!(detail.contains("/s"), "must include rate unit: {detail}");
assert!(
detail.contains("exceeded threshold"),
"must state threshold: {detail}"
);
assert!(
detail.contains("5.0/s"),
"must show threshold value: {detail}"
);
assert!(
detail.contains("consecutive intervals"),
"must show sustained count: {detail}"
);
}
#[test]
fn neg_keep_last_rate_threshold_fires() {
let t = MonitorThresholds {
sustained_samples: 2,
max_keep_last_rate: 5.0,
fail_on_stall: false,
..Default::default()
};
let samples: Vec<_> = (0..3u64)
.map(|i| sample_with_events(i * 100, 1000 + i * 500, 0, i as i64 * 10))
.collect();
let summary = MonitorSummary::from_samples(&samples);
assert!(summary.event_deltas.is_some());
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(!v.passed, "keep_last rate must be caught");
let detail = v
.details
.iter()
.find(|d| d.contains("keep_last rate"))
.unwrap();
assert!(detail.contains("/s"), "must include rate unit: {detail}");
assert!(
detail.contains("exceeded threshold"),
"must state threshold: {detail}"
);
assert!(
detail.contains("5.0/s"),
"must show threshold value: {detail}"
);
}
#[test]
fn evaluate_suppresses_stall_when_vcpu_preempted() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
vcpu_cpu_time_ns: Some(1_000_000_000),
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
vcpu_cpu_time_ns: Some(1_000_000_000),
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, vcpu_cpu_time_ns: Some(1_000_500_000), ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
vcpu_cpu_time_ns: Some(1_010_000_000),
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples_with_threshold(&samples, 10_000_000);
assert!(
!summary.stall_detected,
"preempted vCPU should not flag stall in summary"
);
let report = MonitorReport {
samples,
summary,
preemption_threshold_ns: 10_000_000,
};
let v = t.evaluate(&report);
assert!(
v.passed,
"preempted vCPU should suppress stall: {:?}",
v.details
);
}
#[test]
fn evaluate_catches_stall_when_vcpu_running() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
vcpu_cpu_time_ns: Some(1_000_000_000),
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
vcpu_cpu_time_ns: Some(1_000_000_000),
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, vcpu_cpu_time_ns: Some(1_010_000_000), ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
vcpu_cpu_time_ns: Some(1_010_000_000),
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples_with_threshold(&samples, 10_000_000);
assert!(
summary.stall_detected,
"running vCPU with stuck clock is a stall"
);
let report = MonitorReport {
samples,
summary,
preemption_threshold_ns: 10_000_000,
};
let v = t.evaluate(&report);
assert!(!v.passed, "running vCPU stall must fail: {:?}", v.details);
assert!(v.details.iter().any(|d| d.contains("rq_clock stall")));
}
#[test]
fn evaluate_stall_none_vcpu_time_falls_back_to_current_behavior() {
let t = MonitorThresholds {
fail_on_stall: true,
sustained_samples: 1,
..Default::default()
};
let samples = vec![
MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
..Default::default()
},
],
},
MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
..Default::default()
},
],
},
];
let summary = MonitorSummary::from_samples(&samples);
assert!(
summary.stall_detected,
"None vcpu time should not suppress stall"
);
let report = MonitorReport {
samples,
summary,
..Default::default()
};
let v = t.evaluate(&report);
assert!(
!v.passed,
"None vcpu time should detect stall: {:?}",
v.details
);
}
#[test]
fn from_samples_suppresses_stall_when_vcpu_preempted() {
let s1 = MonitorSample {
prog_stats: None,
elapsed_ms: 100,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000,
vcpu_cpu_time_ns: Some(1_000_000_000),
..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 6000,
vcpu_cpu_time_ns: Some(1_000_000_000),
..Default::default()
},
],
};
let s2 = MonitorSample {
prog_stats: None,
elapsed_ms: 200,
cpus: vec![
CpuSnapshot {
nr_running: 1,
rq_clock: 5000, vcpu_cpu_time_ns: Some(1_000_100_000), ..Default::default()
},
CpuSnapshot {
nr_running: 1,
rq_clock: 7000,
vcpu_cpu_time_ns: Some(1_010_000_000),
..Default::default()
},
],
};
let summary = MonitorSummary::from_samples_with_threshold(&[s1, s2], 10_000_000);
assert!(
!summary.stall_detected,
"preempted vCPU should not flag stall"
);
}
fn sample_with_schedstat(
elapsed_ms: u64,
clock_base: u64,
run_delay: u64,
pcount: u64,
sched_count: u32,
ttwu_count: u32,
) -> MonitorSample {
MonitorSample {
prog_stats: None,
elapsed_ms,
cpus: vec![
CpuSnapshot {
nr_running: 2,
rq_clock: clock_base,
schedstat: Some(RqSchedstat {
run_delay,
pcount,
sched_count,
ttwu_count,
..Default::default()
}),
..Default::default()
},
CpuSnapshot {
nr_running: 2,
rq_clock: clock_base + 100,
schedstat: Some(RqSchedstat {
run_delay,
pcount,
sched_count,
ttwu_count,
..Default::default()
}),
..Default::default()
},
],
}
}
#[test]
fn schedstat_deltas_computed_from_samples() {
let samples = vec![
sample_with_schedstat(0, 1000, 1000, 10, 50, 30),
sample_with_schedstat(1000, 2000, 5000, 20, 100, 60),
];
let summary = MonitorSummary::from_samples(&samples);
let d = summary.schedstat_deltas.unwrap();
assert_eq!(d.total_run_delay, 8000);
assert_eq!(d.total_pcount, 20);
assert_eq!(d.total_sched_count, 100);
assert_eq!(d.total_ttwu_count, 60);
assert!((d.run_delay_rate - 8000.0).abs() < f64::EPSILON);
assert!((d.sched_count_rate - 100.0).abs() < f64::EPSILON);
}
#[test]
fn schedstat_deltas_none_without_schedstat() {
let samples = vec![balanced_sample(100, 1000), balanced_sample(200, 1500)];
let summary = MonitorSummary::from_samples(&samples);
assert!(summary.schedstat_deltas.is_none());
}
#[test]
fn schedstat_deltas_single_sample() {
let samples = vec![sample_with_schedstat(100, 1000, 5000, 10, 50, 30)];
let summary = MonitorSummary::from_samples(&samples);
let d = summary.schedstat_deltas.unwrap();
assert_eq!(d.run_delay_rate, 0.0);
assert_eq!(d.sched_count_rate, 0.0);
assert_eq!(d.total_run_delay, 0);
}
#[test]
fn schedstat_deltas_rates() {
let samples = vec![
sample_with_schedstat(0, 1000, 1000, 5, 10, 20),
sample_with_schedstat(500, 2000, 3000, 15, 50, 40),
];
let summary = MonitorSummary::from_samples(&samples);
let d = summary.schedstat_deltas.unwrap();
assert_eq!(d.total_run_delay, 4000);
assert!((d.run_delay_rate - 8000.0).abs() < f64::EPSILON);
assert_eq!(d.total_sched_count, 80);
assert!((d.sched_count_rate - 160.0).abs() < f64::EPSILON);
}
#[test]
fn schedstat_deltas_all_fields() {
let make = |elapsed_ms, rd, pc, yc, sc, sg, tc, tl| MonitorSample {
prog_stats: None,
elapsed_ms,
cpus: vec![CpuSnapshot {
nr_running: 1,
rq_clock: elapsed_ms * 10,
schedstat: Some(RqSchedstat {
run_delay: rd,
pcount: pc,
yld_count: yc,
sched_count: sc,
sched_goidle: sg,
ttwu_count: tc,
ttwu_local: tl,
}),
..Default::default()
}],
};
let samples = vec![
make(100, 100, 10, 1, 20, 5, 30, 15),
make(200, 500, 25, 4, 50, 12, 70, 35),
];
let summary = MonitorSummary::from_samples(&samples);
let d = summary.schedstat_deltas.unwrap();
assert_eq!(d.total_run_delay, 400);
assert_eq!(d.total_pcount, 15);
assert_eq!(d.total_yld_count, 3);
assert_eq!(d.total_sched_count, 30);
assert_eq!(d.total_sched_goidle, 7);
assert_eq!(d.total_ttwu_count, 40);
assert_eq!(d.total_ttwu_local, 20);
}
#[test]
fn sustained_tracker_no_violations() {
let t = SustainedViolationTracker::default();
assert!(!t.sustained(3));
assert_eq!(t.worst_run, 0);
}
#[test]
fn sustained_tracker_single_violation_not_sustained() {
let mut t = SustainedViolationTracker::default();
t.record(true, 5.0, 0);
assert!(!t.sustained(3));
assert_eq!(t.worst_run, 1);
assert_eq!(t.worst_at, 0);
assert!((t.worst_value - 5.0).abs() < f64::EPSILON);
}
#[test]
fn sustained_tracker_meets_threshold() {
let mut t = SustainedViolationTracker::default();
t.record(true, 2.0, 0);
t.record(true, 3.0, 1);
t.record(true, 4.0, 2);
assert!(t.sustained(3));
assert_eq!(t.worst_run, 3);
assert_eq!(t.worst_at, 2);
assert!((t.worst_value - 4.0).abs() < f64::EPSILON);
}
#[test]
fn sustained_tracker_reset_on_non_violation() {
let mut t = SustainedViolationTracker::default();
t.record(true, 1.0, 0);
t.record(true, 2.0, 1);
t.record(false, 0.0, 2); t.record(true, 3.0, 3);
assert!(!t.sustained(3));
assert_eq!(t.worst_run, 2); assert_eq!(t.consecutive, 1); }
#[test]
fn sustained_tracker_worst_run_preserved_after_reset() {
let mut t = SustainedViolationTracker::default();
for i in 0..5 {
t.record(true, i as f64, i);
}
t.record(false, 0.0, 5);
t.record(true, 99.0, 6);
t.record(true, 100.0, 7);
assert_eq!(t.worst_run, 5);
assert!(t.sustained(5));
assert!(!t.sustained(6));
}
}