ktstr 0.4.14

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
//! User-facing comparison configuration.
//!
//! Holds the operator-supplied knobs that drive [`super::compare`]
//! and the aggregation rule taxonomy that the metric registry
//! ([`super::CTPROF_METRICS`]) is parameterized over.
//!
//! Three layers:
//!
//! 1. [`GroupBy`] + [`GroupByOrDefault`] + [`CompareOptions`] +
//!    [`SortKey`] — the inputs the operator types into the CLI
//!    or constructs programmatically; [`super::compare`] receives
//!    a `&CompareOptions` and dispatches grouping / sort behavior
//!    accordingly. [`GroupByOrDefault`] is the newtype wrapper
//!    that gives [`CompareOptions::default`] a meaningful default
//!    grouping (`GroupBy::Pcomm`) without requiring every field
//!    to be spelled out.
//!
//! 2. [`AggRule`] — closed enumeration of per-metric reductions
//!    over a thread bucket. Each variant carries a typed accessor
//!    `fn(&ThreadState) -> SomeNewtype` from [`crate::metric_types`]
//!    so the compiler enforces wrapper / reducer pairing at
//!    registry-build time. The dispatch lives in
//!    [`super::aggregate`].
//!
//! 3. [`AggRule::ladder`] — the per-variant
//!    [`super::ScaleLadder`] mapping consumed by the cell formatters
//!    in [`super::scale`]. Co-located with [`AggRule`] (rather than
//!    in [`super::scale`]) so a future contributor adding a new
//!    `AggRule` variant updates the ladder dispatch in the same
//!    file as the variant — the compiler's exhaustiveness check on
//!    the closed match catches the omission immediately. Splitting
//!    the impl into [`super::scale`] would create a back-edge
//!    (scale → AggRule) that obscures this discipline.

use crate::ctprof::ThreadState;

use super::ScaleLadder;

/// Grouping key for the ctprof compare.
///
/// The default is [`GroupBy::Pcomm`] — aggregate every thread
/// belonging to the same process name together with token-based
/// pattern normalization, so ephemeral worker pools whose pcomm
/// differs only by digit-suffix collapse across snapshots. The
/// other variants exist for operators who want to slice along a
/// different axis: `Cgroup` groups by cgroup path (useful for
/// container-per-workload deployments); `Comm` groups by thread
/// name across every process with the same token-based pattern
/// normalization (so `tokio-worker-{0..N}` collapse into one
/// `tokio-worker-{N}` bucket and `kworker/0:1H-events_highpri`,
/// `kworker/1:0H-events_highpri`, ... collapse into one
/// `kworker/{N}:{N}H-events_highpri` bucket); `CommExact` groups
/// by literal thread name (useful when distinct token values
/// carry meaning that the normalizer would erase, e.g. tracking
/// each per-CPU `kworker/u8:N` independently).
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum GroupBy {
    /// Group by process name (`pcomm`). Default grouping: pcomm
    /// is the leader thread's `task->comm`, read from
    /// `/proc/<tgid>/comm` at capture time (see
    /// [`crate::ctprof::ThreadState::pcomm`]). Per-thread `comm`
    /// values, by contrast, can drift over a process's lifetime
    /// (worker threads reset their comm under load, `taskset`
    /// toggles names, etc.); the leader's comm is the
    /// per-process identity captured at snapshot time and stays
    /// constant across that snapshot. Pcomm grouping is therefore
    /// the most reliable axis for "give me the per-process
    /// resource picture", which is why it's the default.
    ///
    /// Naive pcomm grouping has one common failure mode: workers
    /// with digit suffixes (`worker-0`, `worker-1`, ...) each
    /// land in their own bucket and the per-pool aggregate gets
    /// scattered across N rows. Token-based pattern
    /// normalization handles this: pcomms that produce the same
    /// skeleton under [`pattern_key`]'s normalizer cluster into
    /// one bucket whose internal join key is the skeleton. The
    /// normalizer splits each pcomm on a separator class
    /// (`[.\-_/:@+\[\]\s]+`) and classifies each token as
    /// pure-digit (`{N}`), hex-like (`{H}`),
    /// alpha-prefix-plus-digits (`prefix{N}`),
    /// digits-plus-alpha-suffix (`{N}suffix`), or literal —
    /// identical rules to the [`Comm`](Self::Comm) axis.
    /// Singleton buckets revert to the literal pcomm so a lone
    /// process stays ungrouped instead of advertising a
    /// `worker-{N}` pattern that no other process shares.
    /// Display labels are generated by `grex` for buckets with
    /// ≥ 2 distinct member pcomms; the rendered label is a
    /// regex showing the shared prefix + alternation, while the
    /// join key remains deterministic across snapshots. Disable
    /// normalization with [`CompareOptions::no_thread_normalize`]
    /// to group by literal `pcomm`.
    Pcomm,
    /// Group by cgroup path. Cgroup-level enrichment is surfaced
    /// in the output alongside the aggregated thread metrics.
    Cgroup,
    /// Group by thread name pattern across every process. Threads
    /// whose names produce the same skeleton under
    /// [`pattern_key`]'s token-based normalizer cluster into one
    /// bucket whose internal join key is the skeleton. The
    /// normalizer splits each comm on a separator class
    /// (`[.\-_/:@+\[\]\s]+`) and classifies each token as pure-digit
    /// (`{N}`), hex-like (`{H}`), alpha-prefix-plus-digits
    /// (`prefix{N}`), digits-plus-alpha-suffix (`{N}suffix`), or
    /// literal. Singleton buckets revert to the literal thread
    /// name so a lone worker stays ungrouped.
    /// Display labels are generated by `grex` for buckets with ≥2
    /// distinct member names; the rendered label is a regex
    /// showing the shared prefix + alternation, while the join key
    /// remains deterministic across snapshots. Disable
    /// normalization with
    /// [`CompareOptions::no_thread_normalize`].
    Comm,
    /// Group by literal thread name (`comm`) — exact match, no
    /// pattern aggregation. Use this when distinct token values
    /// carry meaning the normalizer would erase, e.g. tracking each
    /// per-CPU `kworker/u8:N` independently rather than collapsing
    /// the fleet into one `kworker/u{N}:{N}` bucket.
    ///
    /// Distinct from `--group-by comm --no-thread-normalize`:
    /// this variant ONLY disables thread-axis normalization,
    /// leaving the smaps_rollup pcomm keying still normalized
    /// (per [`collect_smaps_rollup`]). The
    /// `--no-thread-normalize` flag, by contrast, disables
    /// normalization across every name-family axis (Comm, Pcomm,
    /// AND smaps_rollup). Pick `CommExact` when you want literal
    /// thread names but still want smaps to join across
    /// snapshots; pick `Comm + no_thread_normalize` when you
    /// also want literal smaps PID identity.
    CommExact,
    /// Run all three pattern-aware axes (Cgroup → Pcomm → Comm)
    /// and render each as a labeled block. Gives a comprehensive
    /// at-a-glance summary without re-running with different
    /// `--group-by` flags. Each axis gets its own `## Primary
    /// metrics` section, independently truncated by `--limit`.
    All,
}

/// Options controlling [`compare`].
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub struct CompareOptions {
    pub group_by: GroupByOrDefault,
    /// Glob patterns that collapse dynamic cgroup path segments
    /// to a canonical form before grouping. Applied in listed
    /// order; each pattern that matches a thread's cgroup path
    /// rewrites the matched segments with the literal portions
    /// of the pattern. See [`flatten_cgroup_path`] for the
    /// rewrite rule and examples.
    ///
    /// Independent of [`Self::no_cg_normalize`] — explicit
    /// glob patterns apply first; auto-normalization (token-based)
    /// runs after, gated by `no_cg_normalize`.
    pub cgroup_flatten: Vec<String>,
    /// When true, disable token-based pattern normalization
    /// across every name-family axis: [`GroupBy::Comm`],
    /// [`GroupBy::Pcomm`], AND the smaps_rollup keying in
    /// [`collect_smaps_rollup`] (which keys by
    /// `pattern_key(&t.pcomm)` under default normalization, but
    /// reverts to literal `pcomm[tgid]` when this flag is set so
    /// each PID stays attributable).
    ///
    /// Under this flag: threads / processes group by their
    /// literal name; smaps rows preserve their per-PID identity.
    /// The pure-digit/hex/alpha+digits placeholders never fire on
    /// any of those axes. Mirror of [`Self::no_cg_normalize`] for
    /// the thread / process axes. Has no effect under
    /// [`GroupBy::CommExact`] (already literal) or
    /// [`GroupBy::Cgroup`].
    pub no_thread_normalize: bool,
    /// When true, disable token-based pattern normalization for
    /// cgroup-path grouping ([`GroupBy::Cgroup`]). Cgroup paths
    /// group by their literal post-flatten path (no Layer 1, 2,
    /// or 3 substitutions). Explicit `cgroup_flatten` glob
    /// patterns still apply. Has no effect under other groupings.
    pub no_cg_normalize: bool,
    /// Multi-key sort spec for the diff rows. When non-empty,
    /// overrides the default `delta_pct desc` sort. Each
    /// [`SortKey`] names one metric from
    /// [`CTPROF_METRICS`] or [`CTPROF_DERIVED_METRICS`]
    /// and a direction; groups rank by the tuple
    /// (`metric_1_delta`, `metric_2_delta`, ...) under
    /// lexicographic order with per-key direction. Within a
    /// group, rows appear in registry order. The sort
    /// composes with [`Self::group_by`]: groups are formed under
    /// the chosen axis (pcomm / cgroup / comm / comm-exact) and
    /// then ranked by their aggregated metric values, so the
    /// same `sort_by` spec works under every grouping. See
    /// [`parse_sort_by`] for the CLI string parser.
    pub sort_by: Vec<SortKey>,
}

/// One key in a multi-key `--sort-by` spec. Names a metric from
/// [`CTPROF_METRICS`] or [`CTPROF_DERIVED_METRICS`] and
/// the sort direction for that key. Direction defaults to
/// descending (largest delta first) so the common operator
/// request — "show me the biggest regressions first" — is the
/// unmarked form.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SortKey {
    /// Metric name. Holds one of the [`CTPROF_METRICS`] or
    /// [`CTPROF_DERIVED_METRICS`] entries' `name` fields
    /// verbatim — [`parse_sort_by`] looks up the input string in
    /// either registry and stores the matched `&'static str`, so
    /// this never carries an allocation. Equality against a
    /// registry `name` is by content (`str::eq`); both sides
    /// reference the same `&'static str` from the registry, so
    /// the byte-by-byte comparison succeeds in O(name.len())
    /// without any heap access. The two registries are disjoint
    /// (the `registry_and_derived_names_disjoint` test pins
    /// this) so a `metric` value resolves unambiguously to one
    /// or the other.
    pub metric: &'static str,
    /// True for descending (largest first), false for ascending
    /// (smallest first).
    pub descending: bool,
}

/// Newtype wrapper around [`GroupBy`] that defaults to
/// [`GroupBy::Pcomm`]. Separate type so `CompareOptions::default()`
/// does not need to spell out every field.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct GroupByOrDefault(pub GroupBy);

impl Default for GroupByOrDefault {
    fn default() -> Self {
        Self(GroupBy::Pcomm)
    }
}

impl From<GroupBy> for GroupByOrDefault {
    fn from(g: GroupBy) -> Self {
        Self(g)
    }
}

/// Aggregation rule for a single metric.
///
/// Encoded as an enum rather than a trait object so the registry
/// table ([`CTPROF_METRICS`]) can live in static memory. Each
/// variant's accessor returns the typed
/// [`crate::metric_types`] newtype that matches the reduction
/// — the reader and rule are paired by construction so a new
/// metric cannot register a peak field against a sum reducer
/// (`SumNs(|t| t.wait_max)` fails to compile because `wait_max`
/// is `PeakNs`, not `MonotonicNs`).
///
/// Each variant maps 1:1 to a marker trait in
/// [`crate::metric_types`]: `Sum*` variants take a [`Summable`]
/// type, `Max*` variants take a [`Maxable`] type that is NOT
/// also `Summable` (counters use `Sum*` even though they
/// implement both — registering a counter as `Max*` would mask
/// the sum semantics with the per-contributor maximum), `Range*`
/// variants take a [`Rangeable`] type, `Mode*` variants take a
/// [`Modeable`] type or a primitive that the dispatch coerces to
/// `String`, and [`AggRule::Affinity`] takes the dedicated
/// [`crate::metric_types::CpuSet`] for the affinity-summary
/// reduction.
///
/// [`Summable`]: crate::metric_types::Summable
/// [`Maxable`]: crate::metric_types::Maxable
/// [`Rangeable`]: crate::metric_types::Rangeable
/// [`Modeable`]: crate::metric_types::Modeable
#[derive(Debug, Clone, Copy)]
pub enum AggRule {
    /// Sum across the group of a [`MonotonicCount`] field. Used
    /// for unitless cumulative counters (`nr_wakeups`,
    /// `voluntary_csw`, `minflt`, syscall counts, …). The
    /// dispatch routes through
    /// [`crate::metric_types::Summable::sum_across`] which uses
    /// `saturating_add` per the no-wraparound contract.
    ///
    /// [`MonotonicCount`]: crate::metric_types::MonotonicCount
    SumCount(fn(&ThreadState) -> crate::metric_types::MonotonicCount),
    /// Sum across the group of a [`MonotonicNs`] field. Used for
    /// cumulative-time counters in nanoseconds (`run_time_ns`,
    /// `wait_time_ns`, `wait_sum`, `voluntary_sleep_ns`,
    /// `block_sum`, `iowait_sum`, `core_forceidle_sum`).
    ///
    /// [`MonotonicNs`]: crate::metric_types::MonotonicNs
    SumNs(fn(&ThreadState) -> crate::metric_types::MonotonicNs),
    /// Sum across the group of a [`ClockTicks`] field. Used for
    /// USER_HZ-scaled cumulative time counters
    /// (`utime_clock_ticks`, `stime_clock_ticks`).
    ///
    /// [`ClockTicks`]: crate::metric_types::ClockTicks
    SumTicks(fn(&ThreadState) -> crate::metric_types::ClockTicks),
    /// Sum across the group of a [`Bytes`] field. Used for
    /// IEC-binary-scaled byte counters (`allocated_bytes`,
    /// `deallocated_bytes`, `rchar`, `wchar`, `read_bytes`,
    /// `write_bytes`, `cancelled_write_bytes`).
    ///
    /// [`Bytes`]: crate::metric_types::Bytes
    SumBytes(fn(&ThreadState) -> crate::metric_types::Bytes),
    /// Maximum across the group of a [`PeakNs`] field — the
    /// kernel `*_max` schedstats (`wait_max`, `sleep_max`,
    /// `block_max`, `exec_max`, `slice_max`). Each thread
    /// already carries its own lifetime max-seen value from the
    /// kernel's scheduler call sites (e.g. `update_se` in
    /// `kernel/sched/fair.c` for `exec_max`; see
    /// `struct sched_statistics` in `include/linux/sched.h`).
    /// Group-level reduction takes the largest across members so
    /// a row surfaces the worst single window any thread in the
    /// group has ever experienced. Summing per-thread maxes
    /// would conflate "one thread with a 1s spike" with "1000
    /// threads with 1ms spikes each" — `PeakNs` therefore does
    /// NOT implement `Summable`, and trying to register one as
    /// `SumNs` is a compile error.
    ///
    /// [`PeakNs`]: crate::metric_types::PeakNs
    MaxPeak(fn(&ThreadState) -> crate::metric_types::PeakNs),
    /// Maximum across the group of a [`PeakBytes`] field — the
    /// byte-typed twin of [`MaxPeak`]. Used for taskstats-sourced
    /// lifetime memory watermarks (`hiwater_rss`, `hiwater_vm`).
    /// `xacct_add_tsk` (`kernel/tsacct.c::xacct_add_tsk`, lines
    /// 99-104) reads the watermark out of the SHARED `mm_struct`
    /// via `get_mm_hiwater_rss(mm)` / `get_mm_hiwater_vm(mm)`, so
    /// sibling threads of the same tgid all report the same
    /// value; cross-thread Max within a single process is a no-op.
    /// Cross-PROCESS Max (e.g. under `--group-by pcomm` when the
    /// bucket spans multiple parent processes) is the meaningful
    /// reduction: it picks the largest watermark any tgid in the
    /// bucket reported. Routes through the IEC binary auto-scale
    /// ladder ([`crate::metric_types::ScaleLadder::Bytes`]) so a
    /// 7.5 GiB watermark renders as `7.500GiB` instead of
    /// dominating the table with raw byte counts. Summing
    /// watermarks would over-count shared address-space mappings
    /// across sibling threads N-fold — `PeakBytes` does NOT
    /// implement `Summable`.
    ///
    /// [`PeakBytes`]: crate::metric_types::PeakBytes
    MaxPeakBytes(fn(&ThreadState) -> crate::metric_types::PeakBytes),
    /// Maximum across the group of a [`GaugeNs`] field —
    /// instantaneous-time gauges where summing is meaningless.
    /// `fair_slice_ns` is the per-thread CURRENT scheduler slice
    /// (stale under SCHED_EXT — see field doc) read at capture
    /// time, not a high-water value. Summing instantaneous
    /// gauges produces a number with no physical meaning — N
    /// nearly-identical instantaneous values sum to `N * gauge`
    /// regardless of group composition, drowning the signal.
    /// Max instead surfaces "the longest current slice any
    /// thread in the bucket is running with", which IS the
    /// signal a user comparing two snapshots cares about.
    ///
    /// [`GaugeNs`]: crate::metric_types::GaugeNs
    MaxGaugeNs(fn(&ThreadState) -> crate::metric_types::GaugeNs),
    /// Maximum across the group of a [`GaugeCount`] field —
    /// leader-deduped structural counts. `nr_threads` is
    /// populated only on the tgid leader (`tid == tgid`) and
    /// zero on every non-leader thread of the same process; see
    /// `capture_thread_at_with_tally`. Sum across a comm- or
    /// cgroup-bucketed group would render 0 for any bucket
    /// whose leader fell elsewhere because non-leader members
    /// each contribute 0. Max instead reads through to the
    /// leader's value, surfacing "the largest process
    /// represented in this bucket" regardless of which axis the
    /// bucket is built around. The row count already covers
    /// "how many threads are here", so the structural field's
    /// value adds new information rather than restating the row
    /// count.
    ///
    /// [`GaugeCount`]: crate::metric_types::GaugeCount
    MaxGaugeCount(fn(&ThreadState) -> crate::metric_types::GaugeCount),
    /// Ordinal i32, aggregated as the observed [min, max] range.
    /// Used for signed-domain ordinals (`nice`, `priority`,
    /// `processor`). Delta math uses the midpoint of each range
    /// as the scalar; output prints both the range and the
    /// delta. The dispatch routes through
    /// [`crate::metric_types::Rangeable::range_across`] and
    /// widens to `i64` for [`Aggregated::OrdinalRange`].
    ///
    /// [`OrdinalI32`]: crate::metric_types::OrdinalI32
    RangeI32(fn(&ThreadState) -> crate::metric_types::OrdinalI32),
    /// Ordinal u32, aggregated as the observed [min, max] range.
    /// Used for unsigned-domain ordinals (`rt_priority`,
    /// kernel-typed `unsigned int`). Same shape as
    /// [`AggRule::RangeI32`] but the inner width matches the
    /// kernel-side `unsigned int` declaration; the dispatch
    /// widens the resulting `u32` to `i64` for
    /// [`Aggregated::OrdinalRange`].
    ///
    /// [`OrdinalU32`]: crate::metric_types::OrdinalU32
    RangeU32(fn(&ThreadState) -> crate::metric_types::OrdinalU32),
    /// Categorical string, aggregated as the mode (most-frequent
    /// value). Used for `policy` (string-valued
    /// [`crate::metric_types::CategoricalString`]). Delta is
    /// textual: "same" if both modes agree, "differs" otherwise
    /// — there is no arithmetic on a policy name. The dispatch
    /// routes through
    /// [`crate::metric_types::Modeable::mode_across`].
    Mode(fn(&ThreadState) -> crate::metric_types::CategoricalString),
    /// Categorical char, aggregated as the mode. Used for
    /// `state` (single-letter task state from
    /// `/proc/<tid>/status`). The dispatch coerces the `char`
    /// to a `String` via `to_string()` before reducing — `char`
    /// itself is NOT [`Modeable`] (only
    /// [`crate::metric_types::CategoricalString`] is), so this
    /// variant exists to keep the registry's accessor type
    /// matching the `ThreadState` field type without forcing the
    /// field into a wrapper. If a second char-valued metric
    /// appears, promote both fields to a dedicated
    /// `CategoricalChar` wrapper rather than continuing the
    /// ad-hoc coercion (mirrors the `CategoricalBool`
    /// promotion guidance on [`AggRule::ModeBool`]).
    ///
    /// [`Modeable`]: crate::metric_types::Modeable
    ModeChar(fn(&ThreadState) -> char),
    /// Categorical bool, aggregated as the mode. Used for
    /// `ext_enabled` (sched_ext class membership). Same shape as
    /// [`AggRule::ModeChar`]: the dispatch coerces via
    /// `to_string()` so `"true"`/`"false"` participate in the
    /// mode reduction. If a second bool-valued metric appears,
    /// promote both fields to a dedicated `CategoricalBool`
    /// wrapper rather than continuing the ad-hoc coercion.
    ///
    /// Tiebreak skew (FA-2): the lex-smallest-wins tiebreak
    /// inside `Modeable::mode_across` makes `"false"` (`'f'`,
    /// 0x66) win an equal-count tie against `"true"` (`'t'`,
    /// 0x74). This matches the legacy pre-phase-3 behavior —
    /// the old `to_string()` coercion fed the same string pair
    /// through the same lex-tiebreak — but is worth flagging
    /// explicitly: a 50/50 sched_ext-on/off bucket renders
    /// `false` as the mode rather than picking the more
    /// "informative" `true`. Operators reading a `false` mode
    /// in a heterogeneous bucket should check the `count/total`
    /// fraction.
    ModeBool(fn(&ThreadState) -> bool),
    /// CPU affinity set, aggregated as the num_cpus range across
    /// the group plus a uniform-cpuset rendering when every
    /// thread shared the same allowed set. Used for
    /// `cpu_affinity`. The accessor returns
    /// [`crate::metric_types::CpuSet`]; the dispatch unwraps to
    /// `Vec<u32>` for the [`AffinitySummary`] reduction.
    ///
    /// Unlike the `Sum*` / `Max*` / `Range*` / `Mode*` rules,
    /// `Affinity` does NOT route through a
    /// [`crate::metric_types`] trait method — its reduction
    /// produces an [`AffinitySummary`] (num_cpus range +
    /// uniform-cpuset flag), not a homogeneous `CpuSet`, so the
    /// inline aggregator in [`aggregate`] walks the per-thread
    /// `Vec<u32>` directly. A future `Affinable` trait could
    /// fold the body into [`crate::metric_types`] but the
    /// summary type is single-use today.
    ///
    /// Type-system bypass caveat (FA-1): the typed `AggRule`
    /// shape catches "wrong wrapper" mistakes
    /// (`SumNs(|t| t.wait_max)` fails to compile because
    /// `wait_max` is `PeakNs`), but a closure body that
    /// actively MISWRAPS the underlying field — e.g.
    /// `SumNs(|t| MonotonicNs(t.wait_max.0))` — laundering a
    /// peak through the sum wrapper still type-checks. Don't
    /// do that. The wrapper category is load-bearing; the type
    /// system catches the variant mismatch but cannot
    /// inspect the inside of an arbitrary closure.
    Affinity(fn(&ThreadState) -> crate::metric_types::CpuSet),
}

impl AggRule {
    /// The auto-scale ladder for this rule's value cell.
    ///
    /// Closed match — adding a new [`AggRule`] variant requires
    /// adding the ladder mapping here, which is the type-system
    /// enforcement phase 4 introduces. The mapping is one-to-one
    /// with the typed accessor newtype: [`AggRule::SumNs`] →
    /// [`ScaleLadder::Ns`], [`AggRule::SumBytes`] →
    /// [`ScaleLadder::Bytes`], etc.
    pub fn ladder(&self) -> ScaleLadder {
        match self {
            // Cumulative counters — Sum reductions, ladder
            // determined by the unit family of the typed
            // accessor. SumCount and MaxGaugeCount both produce a
            // unitless count; SumNs / MaxPeak / MaxGaugeNs all
            // produce a ns value; SumTicks produces ticks;
            // SumBytes / MaxPeakBytes produce bytes.
            AggRule::SumCount(_) => ScaleLadder::Unitless,
            AggRule::SumNs(_) => ScaleLadder::Ns,
            AggRule::SumTicks(_) => ScaleLadder::Ticks,
            AggRule::SumBytes(_) => ScaleLadder::Bytes,
            AggRule::MaxPeak(_) => ScaleLadder::Ns,
            AggRule::MaxPeakBytes(_) => ScaleLadder::Bytes,
            AggRule::MaxGaugeNs(_) => ScaleLadder::Ns,
            AggRule::MaxGaugeCount(_) => ScaleLadder::Unitless,
            // Range / Mode / Affinity carry no ladder — the
            // Aggregated Display impl handles render directly.
            AggRule::RangeI32(_)
            | AggRule::RangeU32(_)
            | AggRule::Mode(_)
            | AggRule::ModeChar(_)
            | AggRule::ModeBool(_)
            | AggRule::Affinity(_) => ScaleLadder::None,
        }
    }
}