use std::collections::BTreeMap;
use super::{AggRule, Aggregated, ScaleLadder, Section};
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub struct CtprofMetricDef {
pub name: &'static str,
pub rule: AggRule,
pub sched_class: Option<&'static str>,
pub config_gates: &'static [&'static str],
pub is_dead: bool,
pub description: &'static str,
pub section: Section,
}
pub static CTPROF_METRICS: &[CtprofMetricDef] = &[
CtprofMetricDef {
name: "thread_count",
rule: AggRule::SumCount(|_| crate::metric_types::MonotonicCount(1)),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Number of threads in this group. Each thread contributes 1; the sum is the group population. Useful for --sort-by thread_count:desc to find groups where thread count changed the most.",
section: Section::Primary,
},
CtprofMetricDef {
name: "policy",
rule: AggRule::Mode(|t| t.policy.clone()),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Scheduling policy (SCHED_OTHER, SCHED_FIFO, SCHED_RR, SCHED_BATCH, SCHED_IDLE, SCHED_DEADLINE, SCHED_EXT).",
section: Section::Primary,
},
CtprofMetricDef {
name: "nice",
rule: AggRule::RangeI32(|t| t.nice),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Nice value (-20..19); CFS priority knob.",
section: Section::Primary,
},
CtprofMetricDef {
name: "priority",
rule: AggRule::RangeI32(|t| t.priority),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Kernel task priority from /proc/<tid>/stat field 18 (CFS=[0..39], RT=[-2..-100], DL=-101).",
section: Section::Primary,
},
CtprofMetricDef {
name: "rt_priority",
rule: AggRule::RangeU32(|t| t.rt_priority),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Real-time scheduler priority (0..99); 0 for non-RT tasks.",
section: Section::Primary,
},
CtprofMetricDef {
name: "cpu_affinity",
rule: AggRule::Affinity(|t| t.cpu_affinity.clone()),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Set of CPUs the task is allowed to run on (sched_getaffinity result).",
section: Section::Primary,
},
CtprofMetricDef {
name: "processor",
rule: AggRule::RangeI32(|t| t.processor),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Last CPU the task ran on.",
section: Section::Primary,
},
CtprofMetricDef {
name: "state",
rule: AggRule::ModeChar(|t| t.state),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Task state letter (R running, S sleeping, D uninterruptible, Z zombie, T stopped).",
section: Section::Primary,
},
CtprofMetricDef {
name: "ext_enabled",
rule: AggRule::ModeBool(|t| t.ext_enabled),
sched_class: None,
config_gates: &["CONFIG_SCHED_CLASS_EXT"],
is_dead: false,
description: "Whether the task is currently dispatched on the sched_ext class.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_threads",
rule: AggRule::MaxGaugeCount(|t| t.nr_threads),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Process-wide thread count (signal_struct->nr_threads); leader-only.",
section: Section::Primary,
},
CtprofMetricDef {
name: "run_time_ns",
rule: AggRule::SumNs(|t| t.run_time_ns),
sched_class: None,
config_gates: &["CONFIG_SCHED_INFO"],
is_dead: false,
description: "Cumulative on-CPU time, ns; /proc/<tid>/schedstat field 1.",
section: Section::Primary,
},
CtprofMetricDef {
name: "wait_time_ns",
rule: AggRule::SumNs(|t| t.wait_time_ns),
sched_class: None,
config_gates: &["CONFIG_SCHED_INFO"],
is_dead: false,
description: "Cumulative time waiting on the runqueue, ns; schedstat field 2.",
section: Section::Primary,
},
CtprofMetricDef {
name: "timeslices",
rule: AggRule::SumCount(|t| t.timeslices),
sched_class: None,
config_gates: &["CONFIG_SCHED_INFO"],
is_dead: false,
description: "Number of times the task was run on a CPU; schedstat field 3.",
section: Section::Primary,
},
CtprofMetricDef {
name: "voluntary_csw",
rule: AggRule::SumCount(|t| t.voluntary_csw),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Voluntary context switches (task gave up the CPU itself).",
section: Section::Primary,
},
CtprofMetricDef {
name: "nonvoluntary_csw",
rule: AggRule::SumCount(|t| t.nonvoluntary_csw),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Involuntary context switches (task was preempted).",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups",
rule: AggRule::SumCount(|t| t.nr_wakeups),
sched_class: None,
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Total wakeups via try_to_wake_up().",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups_local",
rule: AggRule::SumCount(|t| t.nr_wakeups_local),
sched_class: None,
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Wakeups landed on the same CPU as the waker.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups_remote",
rule: AggRule::SumCount(|t| t.nr_wakeups_remote),
sched_class: None,
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Wakeups landed on a different CPU than the waker.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups_sync",
rule: AggRule::SumCount(|t| t.nr_wakeups_sync),
sched_class: None,
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "WF_SYNC wakeups (synchronous wakeup hint to scheduler).",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups_migrate",
rule: AggRule::SumCount(|t| t.nr_wakeups_migrate),
sched_class: None,
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Wakeups where the task migrated to a different CPU than its prior one (WF_MIGRATED); distinct from nr_wakeups_remote (waker CPU != target CPU).",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups_affine",
rule: AggRule::SumCount(|t| t.nr_wakeups_affine),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Wakeups that succeeded under the wake_affine() heuristic.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_wakeups_affine_attempts",
rule: AggRule::SumCount(|t| t.nr_wakeups_affine_attempts),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "wake_affine() attempts; success rate = nr_wakeups_affine / attempts.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_migrations",
rule: AggRule::SumCount(|t| t.nr_migrations),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Cumulative cross-CPU migrations of the task.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_forced_migrations",
rule: AggRule::SumCount(|t| t.nr_forced_migrations),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Migrations forced by the CFS load balancer.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_failed_migrations_affine",
rule: AggRule::SumCount(|t| t.nr_failed_migrations_affine),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Load-balancer migrations rejected for cpu-affinity reasons.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_failed_migrations_running",
rule: AggRule::SumCount(|t| t.nr_failed_migrations_running),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Load-balancer migrations rejected because the task was running.",
section: Section::Primary,
},
CtprofMetricDef {
name: "nr_failed_migrations_hot",
rule: AggRule::SumCount(|t| t.nr_failed_migrations_hot),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Load-balancer migrations rejected because the task was cache-hot.",
section: Section::Primary,
},
CtprofMetricDef {
name: "wait_sum",
rule: AggRule::SumNs(|t| t.wait_sum),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Cumulative time the task waited on the runqueue, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "wait_count",
rule: AggRule::SumCount(|t| t.wait_count),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Number of distinct runqueue-wait intervals the task accumulated.",
section: Section::Primary,
},
CtprofMetricDef {
name: "wait_max",
rule: AggRule::MaxPeak(|t| t.wait_max),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Longest single runqueue-wait interval observed, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "voluntary_sleep_ns",
rule: AggRule::SumNs(|t| t.voluntary_sleep_ns),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Pure voluntary sleep time (TASK_INTERRUPTIBLE only), ns; capture-side normalized as sum_sleep_runtime - sum_block_runtime so the kernel's sleep/block double-count is stripped before delta math.",
section: Section::Primary,
},
CtprofMetricDef {
name: "sleep_max",
rule: AggRule::MaxPeak(|t| t.sleep_max),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Longest single sleep interval observed, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "block_sum",
rule: AggRule::SumNs(|t| t.block_sum),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Cumulative time the task spent blocked (TASK_UNINTERRUPTIBLE), ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "block_max",
rule: AggRule::MaxPeak(|t| t.block_max),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Longest single uninterruptible-block interval observed, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "iowait_sum",
rule: AggRule::SumNs(|t| t.iowait_sum),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Cumulative time the task spent in iowait, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "iowait_count",
rule: AggRule::SumCount(|t| t.iowait_count),
sched_class: Some("non-ext"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Number of distinct iowait intervals the task accumulated.",
section: Section::Primary,
},
CtprofMetricDef {
name: "exec_max",
rule: AggRule::MaxPeak(|t| t.exec_max),
sched_class: None,
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Longest single uninterrupted on-CPU run observed, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "slice_max",
rule: AggRule::MaxPeak(|t| t.slice_max),
sched_class: Some("cfs-only"),
config_gates: &["CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Longest CFS slice the task was granted, ns.",
section: Section::Primary,
},
CtprofMetricDef {
name: "core_forceidle_sum",
rule: AggRule::SumNs(|t| t.core_forceidle_sum),
sched_class: None,
config_gates: &["CONFIG_SCHED_CORE", "CONFIG_SCHEDSTATS"],
is_dead: false,
description: "Cumulative time this task forced its SMT sibling idle, ns (core scheduling).",
section: Section::Primary,
},
CtprofMetricDef {
name: "fair_slice_ns",
rule: AggRule::MaxGaugeNs(|t| t.fair_slice_ns),
sched_class: Some("fair-policy"),
config_gates: &[],
is_dead: false,
description: "Current scheduler slice, ns; snapshot from /proc/<tid>/sched (stale under sched_ext).",
section: Section::Primary,
},
CtprofMetricDef {
name: "allocated_bytes",
rule: AggRule::SumBytes(|t| t.allocated_bytes),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "jemalloc per-thread allocated bytes (TSD thread_allocated counter).",
section: Section::Primary,
},
CtprofMetricDef {
name: "deallocated_bytes",
rule: AggRule::SumBytes(|t| t.deallocated_bytes),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "jemalloc per-thread deallocated bytes (TSD thread_deallocated counter).",
section: Section::Primary,
},
CtprofMetricDef {
name: "minflt",
rule: AggRule::SumCount(|t| t.minflt),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Minor page faults (resolved without I/O).",
section: Section::Primary,
},
CtprofMetricDef {
name: "majflt",
rule: AggRule::SumCount(|t| t.majflt),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Major page faults (required disk I/O to resolve).",
section: Section::Primary,
},
CtprofMetricDef {
name: "utime_clock_ticks",
rule: AggRule::SumTicks(|t| t.utime_clock_ticks),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "User-mode CPU time, USER_HZ ticks; /proc/<tid>/stat field 14.",
section: Section::Primary,
},
CtprofMetricDef {
name: "stime_clock_ticks",
rule: AggRule::SumTicks(|t| t.stime_clock_ticks),
sched_class: None,
config_gates: &[],
is_dead: false,
description: "Kernel-mode CPU time, USER_HZ ticks; /proc/<tid>/stat field 15.",
section: Section::Primary,
},
CtprofMetricDef {
name: "rchar",
rule: AggRule::SumBytes(|t| t.rchar),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Bytes read at the read syscall layer (incl. cached / pagecache hits).",
section: Section::Primary,
},
CtprofMetricDef {
name: "wchar",
rule: AggRule::SumBytes(|t| t.wchar),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Bytes written at the write syscall layer (incl. pagecache / writeback).",
section: Section::Primary,
},
CtprofMetricDef {
name: "syscr",
rule: AggRule::SumCount(|t| t.syscr),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Number of read syscalls.",
section: Section::Primary,
},
CtprofMetricDef {
name: "syscw",
rule: AggRule::SumCount(|t| t.syscw),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Number of write syscalls.",
section: Section::Primary,
},
CtprofMetricDef {
name: "read_bytes",
rule: AggRule::SumBytes(|t| t.read_bytes),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Bytes that hit the storage device on read (excludes pagecache hits).",
section: Section::Primary,
},
CtprofMetricDef {
name: "write_bytes",
rule: AggRule::SumBytes(|t| t.write_bytes),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Bytes that hit the storage device on write (post-writeback).",
section: Section::Primary,
},
CtprofMetricDef {
name: "cancelled_write_bytes",
rule: AggRule::SumBytes(|t| t.cancelled_write_bytes),
sched_class: None,
config_gates: &["CONFIG_TASK_IO_ACCOUNTING"],
is_dead: false,
description: "Bytes the kernel deaccounted from a prior dirty-write because the page was reclaimed without writeback (truncate / inode invalidation); recorded on the truncating task, not the writer. Per-thread `write_bytes - cancelled_write_bytes` is NOT a valid derivation — see field doc.",
section: Section::Primary,
},
CtprofMetricDef {
name: "cpu_delay_count",
rule: AggRule::SumCount(|t| t.cpu_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of off-CPU windows the task waited for the runqueue to schedule it (taskstats cpu_count). RACY: count + total are not updated atomically.",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "cpu_delay_total_ns",
rule: AggRule::SumNs(|t| t.cpu_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns the task waited on the runqueue (taskstats cpu_delay_total). Distinct from `wait_sum` (schedstat) which captures the same wait-for-CPU bucket via a different code path. RACY (see cpu_delay_count).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "cpu_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.cpu_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single CPU-wait window observed, ns (taskstats cpu_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "cpu_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.cpu_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero CPU-wait window observed, ns (taskstats cpu_delay_min). Sentinel 0 means \"no events observed\" — compare against cpu_delay_count.",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "blkio_delay_count",
rule: AggRule::SumCount(|t| t.blkio_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of synchronous block-I/O wait windows (taskstats blkio_count).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "blkio_delay_total_ns",
rule: AggRule::SumNs(|t| t.blkio_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns waiting on synchronous block I/O (taskstats blkio_delay_total). Distinct from `iowait_sum` (schedstat).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "blkio_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.blkio_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single block-I/O wait observed, ns (taskstats blkio_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "blkio_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.blkio_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero block-I/O wait observed, ns (taskstats blkio_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "swapin_delay_count",
rule: AggRule::SumCount(|t| t.swapin_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of swap-in wait windows (taskstats swapin_count). OVERLAPS with thrashing_delay_count — do not sum.",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "swapin_delay_total_ns",
rule: AggRule::SumNs(|t| t.swapin_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns waiting for swap-in to complete (taskstats swapin_delay_total).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "swapin_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.swapin_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single swap-in wait observed, ns (taskstats swapin_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "swapin_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.swapin_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero swap-in wait observed, ns (taskstats swapin_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "freepages_delay_count",
rule: AggRule::SumCount(|t| t.freepages_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of direct-reclaim wait windows (taskstats freepages_count).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "freepages_delay_total_ns",
rule: AggRule::SumNs(|t| t.freepages_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns waiting in direct memory reclaim (taskstats freepages_delay_total).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "freepages_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.freepages_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single direct-reclaim wait observed, ns (taskstats freepages_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "freepages_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.freepages_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero direct-reclaim wait observed, ns (taskstats freepages_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "thrashing_delay_count",
rule: AggRule::SumCount(|t| t.thrashing_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of thrashing wait windows (taskstats thrashing_count). OVERLAPS with swapin_delay_count — do not sum.",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "thrashing_delay_total_ns",
rule: AggRule::SumNs(|t| t.thrashing_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns waiting under thrashing pressure (taskstats thrashing_delay_total).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "thrashing_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.thrashing_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single thrashing wait observed, ns (taskstats thrashing_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "thrashing_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.thrashing_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero thrashing wait observed, ns (taskstats thrashing_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "compact_delay_count",
rule: AggRule::SumCount(|t| t.compact_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of memory-compaction wait windows (taskstats compact_count).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "compact_delay_total_ns",
rule: AggRule::SumNs(|t| t.compact_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns waiting on memory compaction (taskstats compact_delay_total).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "compact_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.compact_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single compaction wait observed, ns (taskstats compact_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "compact_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.compact_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero compaction wait observed, ns (taskstats compact_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "wpcopy_delay_count",
rule: AggRule::SumCount(|t| t.wpcopy_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of write-protect-copy (CoW) fault wait windows (taskstats wpcopy_count).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "wpcopy_delay_total_ns",
rule: AggRule::SumNs(|t| t.wpcopy_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns waiting on write-protect-copy faults (taskstats wpcopy_delay_total).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "wpcopy_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.wpcopy_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single write-protect-copy fault wait observed, ns (taskstats wpcopy_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "wpcopy_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.wpcopy_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero write-protect-copy fault wait observed, ns (taskstats wpcopy_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "irq_delay_count",
rule: AggRule::SumCount(|t| t.irq_delay_count),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Number of IRQ-handler windows charged to the task (taskstats irq_count).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "irq_delay_total_ns",
rule: AggRule::SumNs(|t| t.irq_delay_total_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Cumulative ns of IRQ handling charged to the task (taskstats irq_delay_total).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "irq_delay_max_ns",
rule: AggRule::MaxPeak(|t| t.irq_delay_max_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Longest single IRQ-handler window observed, ns (taskstats irq_delay_max).",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "irq_delay_min_ns",
rule: AggRule::MaxPeak(|t| t.irq_delay_min_ns),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_DELAY_ACCT"],
is_dead: false,
description: "Shortest non-zero IRQ-handler window observed, ns (taskstats irq_delay_min). Sentinel 0 means \"no events observed\".",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "hiwater_rss_bytes",
rule: AggRule::MaxPeakBytes(|t| t.hiwater_rss_bytes),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_XACCT"],
is_dead: false,
description: "Lifetime high-watermark of resident-set size, bytes (taskstats hiwater_rss). Distinct from smaps_rollup_kb[\"Rss\"] which is the CURRENT RSS.",
section: Section::TaskstatsDelay,
},
CtprofMetricDef {
name: "hiwater_vm_bytes",
rule: AggRule::MaxPeakBytes(|t| t.hiwater_vm_bytes),
sched_class: None,
config_gates: &["CONFIG_TASKSTATS", "CONFIG_TASK_XACCT"],
is_dead: false,
description: "Lifetime high-watermark of virtual-memory size, bytes (taskstats hiwater_vm).",
section: Section::TaskstatsDelay,
},
];
#[derive(Debug, Clone, Copy, PartialEq)]
#[non_exhaustive]
pub enum DerivedValue {
Scalar(f64),
}
impl DerivedValue {
pub fn as_f64(&self) -> f64 {
match self {
DerivedValue::Scalar(v) => *v,
}
}
}
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub struct DerivedMetricDef {
pub name: &'static str,
pub ladder: ScaleLadder,
pub description: &'static str,
pub inputs: &'static [&'static str],
pub is_ratio: bool,
pub compute: fn(&BTreeMap<String, Aggregated>) -> Option<DerivedValue>,
pub section: Section,
}
fn input_scalar(metrics: &BTreeMap<String, Aggregated>, name: &str) -> Option<f64> {
metrics.get(name).and_then(|a| a.numeric())
}
fn ratio_compute(
metrics: &BTreeMap<String, Aggregated>,
numerator: &str,
denominator: &str,
) -> Option<DerivedValue> {
let num = input_scalar(metrics, numerator)?;
let den = input_scalar(metrics, denominator)?;
if den == 0.0 {
return None;
}
Some(DerivedValue::Scalar(num / den))
}
fn ratio_of_sum_compute(
metrics: &BTreeMap<String, Aggregated>,
numerator: &str,
addend: &str,
) -> Option<DerivedValue> {
let num = input_scalar(metrics, numerator)?;
let other = input_scalar(metrics, addend)?;
let den = num + other;
if den == 0.0 {
return None;
}
Some(DerivedValue::Scalar(num / den))
}
pub static CTPROF_DERIVED_METRICS: &[DerivedMetricDef] = &[
DerivedMetricDef {
name: "affine_success_ratio",
ladder: ScaleLadder::None,
description: "wake_affine() success ratio: nr_wakeups_affine / nr_wakeups_affine_attempts.",
inputs: &["nr_wakeups_affine", "nr_wakeups_affine_attempts"],
is_ratio: true,
compute: |m| ratio_compute(m, "nr_wakeups_affine", "nr_wakeups_affine_attempts"),
section: Section::Derived,
},
DerivedMetricDef {
name: "avg_wait_ns",
ladder: ScaleLadder::Ns,
description: "Average runqueue-wait duration per scheduling event: wait_sum / wait_count (ns/event).",
inputs: &["wait_sum", "wait_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "wait_sum", "wait_count"),
section: Section::Derived,
},
DerivedMetricDef {
name: "cpu_efficiency",
ladder: ScaleLadder::None,
description: "Fraction of total scheduler-tracked time spent on-CPU: run_time_ns / (run_time_ns + wait_time_ns).",
inputs: &["run_time_ns", "wait_time_ns"],
is_ratio: true,
compute: |m| ratio_of_sum_compute(m, "run_time_ns", "wait_time_ns"),
section: Section::Derived,
},
DerivedMetricDef {
name: "avg_slice_ns",
ladder: ScaleLadder::Ns,
description: "Average on-CPU slice length per timeslice: run_time_ns / timeslices (ns/timeslice).",
inputs: &["run_time_ns", "timeslices"],
is_ratio: false,
compute: |m| ratio_compute(m, "run_time_ns", "timeslices"),
section: Section::Derived,
},
DerivedMetricDef {
name: "involuntary_csw_ratio",
ladder: ScaleLadder::None,
description: "Fraction of context switches that were preemptions: nonvoluntary_csw / (voluntary_csw + nonvoluntary_csw).",
inputs: &["nonvoluntary_csw", "voluntary_csw"],
is_ratio: true,
compute: |m| ratio_of_sum_compute(m, "nonvoluntary_csw", "voluntary_csw"),
section: Section::Derived,
},
DerivedMetricDef {
name: "disk_io_fraction",
ladder: ScaleLadder::None,
description: "Fraction of read syscall bytes that hit storage: read_bytes / rchar. Typically <= 1.0 but can exceed when readahead pulls more block-device bytes than the syscall requested.",
inputs: &["read_bytes", "rchar"],
is_ratio: true,
compute: |m| ratio_compute(m, "read_bytes", "rchar"),
section: Section::Derived,
},
DerivedMetricDef {
name: "live_heap_estimate",
ladder: ScaleLadder::Bytes,
description: "jemalloc live-heap estimate: allocated_bytes - deallocated_bytes. Signed: negative when deallocations dominate (freelist drains memory allocated before capture, or sampled mid-update on a thread that just released a large arena). Renders with explicit `-` and the IEC binary suffix (e.g. `-1.907MiB`).",
inputs: &["allocated_bytes", "deallocated_bytes"],
is_ratio: false,
compute: |m| {
let alloc = input_scalar(m, "allocated_bytes")?;
let dealloc = input_scalar(m, "deallocated_bytes")?;
Some(DerivedValue::Scalar(alloc - dealloc))
},
section: Section::Derived,
},
DerivedMetricDef {
name: "avg_iowait_ns",
ladder: ScaleLadder::Ns,
description: "Average iowait interval per iowait event: iowait_sum / iowait_count (ns/event).",
inputs: &["iowait_sum", "iowait_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "iowait_sum", "iowait_count"),
section: Section::Derived,
},
DerivedMetricDef {
name: "avg_cpu_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average CPU-wait per scheduling event: cpu_delay_total_ns / cpu_delay_count (ns/event). RACY: the kernel updates count + total via the lockless sched_info path, so a concurrent reader may observe one ahead of the other; the quotient is approximate at the sub-event scale and stable at the integrated scale.",
inputs: &["cpu_delay_total_ns", "cpu_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "cpu_delay_total_ns", "cpu_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_blkio_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average synchronous block-I/O wait per event: blkio_delay_total_ns / blkio_delay_count (ns/event). Distinct from avg_iowait_ns (schedstat) — this travels through the delayacct path and is the canonical delay-accounting block-I/O reading.",
inputs: &["blkio_delay_total_ns", "blkio_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "blkio_delay_total_ns", "blkio_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_swapin_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average swap-in wait per event: swapin_delay_total_ns / swapin_delay_count (ns/event). OVERLAPS with thrashing — every thrashing event is also a swapin event from the syscall layer; do not sum the two averages or the underlying totals directly.",
inputs: &["swapin_delay_total_ns", "swapin_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "swapin_delay_total_ns", "swapin_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_freepages_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average direct-reclaim wait per event: freepages_delay_total_ns / freepages_delay_count (ns/event).",
inputs: &["freepages_delay_total_ns", "freepages_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "freepages_delay_total_ns", "freepages_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_thrashing_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average thrashing wait per event: thrashing_delay_total_ns / thrashing_delay_count (ns/event). OVERLAPS with swapin (see avg_swapin_delay_ns).",
inputs: &["thrashing_delay_total_ns", "thrashing_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "thrashing_delay_total_ns", "thrashing_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_compact_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average memory-compaction wait per event: compact_delay_total_ns / compact_delay_count (ns/event).",
inputs: &["compact_delay_total_ns", "compact_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "compact_delay_total_ns", "compact_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_wpcopy_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average write-protect-copy fault wait per event: wpcopy_delay_total_ns / wpcopy_delay_count (ns/event).",
inputs: &["wpcopy_delay_total_ns", "wpcopy_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "wpcopy_delay_total_ns", "wpcopy_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "avg_irq_delay_ns",
ladder: ScaleLadder::Ns,
description: "Average IRQ-handler window per event: irq_delay_total_ns / irq_delay_count (ns/event).",
inputs: &["irq_delay_total_ns", "irq_delay_count"],
is_ratio: false,
compute: |m| ratio_compute(m, "irq_delay_total_ns", "irq_delay_count"),
section: Section::TaskstatsDelay,
},
DerivedMetricDef {
name: "total_offcpu_delay_ns",
ladder: ScaleLadder::Ns,
description: "Sum of all off-CPU delay-accounting buckets, ns: cpu + blkio + freepages + compact + wpcopy + irq + max(swapin, thrashing). The swapin/thrashing pair is OR'd with .max() rather than summed because the two share syscall-layer events (every thrashing event is also a swapin). Returns `-` when any input is missing (CONFIG_TASK_DELAY_ACCT off, runtime toggle off, or kernel older than the bucket's introduction version).",
inputs: &[
"cpu_delay_total_ns",
"blkio_delay_total_ns",
"swapin_delay_total_ns",
"freepages_delay_total_ns",
"thrashing_delay_total_ns",
"compact_delay_total_ns",
"wpcopy_delay_total_ns",
"irq_delay_total_ns",
],
is_ratio: false,
compute: |m| {
let cpu = input_scalar(m, "cpu_delay_total_ns")?;
let blkio = input_scalar(m, "blkio_delay_total_ns")?;
let swapin = input_scalar(m, "swapin_delay_total_ns")?;
let freepages = input_scalar(m, "freepages_delay_total_ns")?;
let thrashing = input_scalar(m, "thrashing_delay_total_ns")?;
let compact = input_scalar(m, "compact_delay_total_ns")?;
let wpcopy = input_scalar(m, "wpcopy_delay_total_ns")?;
let irq = input_scalar(m, "irq_delay_total_ns")?;
let mem_overlap = swapin.max(thrashing);
Some(DerivedValue::Scalar(
cpu + blkio + freepages + compact + wpcopy + irq + mem_overlap,
))
},
section: Section::TaskstatsDelay,
},
];
pub fn metric_display_name(metric: &CtprofMetricDef) -> &'static str {
metric.name
}
pub fn metric_tags(metric: &CtprofMetricDef) -> String {
let mut out = String::new();
if let Some(class) = metric.sched_class {
out.push('[');
out.push_str(class);
out.push(']');
}
if metric.is_dead {
if !out.is_empty() {
out.push(' ');
}
out.push_str("[dead]");
}
for gate in metric.config_gates {
if !out.is_empty() {
out.push(' ');
}
out.push('[');
let short = gate.strip_prefix("CONFIG_").unwrap_or(gate);
out.push_str(short);
out.push(']');
}
out
}