taktora_executor/stats/mod.rs
1//! Executor-side telemetry value types (`REQ_0103`).
2//!
3//! Aggregation math lives in `taktora-stats` ([`ExecutorCycleStats`][ext]);
4//! this module provides the std-side wiring carried across the [`Observer`]
5//! boundary: the per-cycle push observation [`CycleObservation`] and the
6//! pull snapshot [`StatsSnapshot`] / [`TaskStatsEntry`].
7//!
8//! [ext]: taktora_stats::ExecutorCycleStats
9//! [`Observer`]: crate::Observer
10
11use crate::TaskId;
12
13/// A single per-task observation pushed by the executor each cycle.
14///
15/// One value is emitted per dispatched task per cycle and handed to the
16/// observer (or buffered for aggregation). Because [`TaskId`] is `Arc<str>`
17/// under the hood, [`CycleObservation`] is `Clone` but not `Copy`.
18///
19/// **Absent vs. zero (`REQ_0103`).** Every measured quantity is an
20/// [`Option`]: `None` means "not measured this cycle", which is *not* the
21/// same as a measured `0`. A faulted scan (see [`faulted`](Self::faulted)
22/// and `REQ_0107`) advances `cycle_index` but enters no task body, so
23/// `took_ns`/`jitter_ns`/`lateness_ns` are all `None`; `actual_period_ns`
24/// is also `None` on the very first cycle. This mirrors the connector's
25/// observation contract (`REQ_0267`), where a faulted wire round reports
26/// `wire_round_ns: None` — so a consumer joining the two push streams on
27/// `cycle_index` sees a consistent "absent on fault" signal from both
28/// layers instead of an ambiguous `0`.
29#[derive(Clone, Debug)]
30pub struct CycleObservation {
31 /// Monotonic cycle counter, advances on every dispatch attempt including
32 /// faulted scans (`REQ_0107`).
33 pub cycle_index: u64,
34
35 /// Identifier of the task this observation belongs to.
36 pub task_id: TaskId,
37
38 /// Stable zero-based registration index of the task, assigned at
39 /// `Executor::add` time and constant for the executor's lifetime
40 /// (`REQ_0103`). The flat `u32` join/identity key for telemetry export
41 /// (`REQ_0111`'s `task_id` column) — frees consumers from hashing the
42 /// `Arc<str>` [`task_id`](Self::task_id) on the hot path.
43 pub task_index: u32,
44
45 /// `true` when this scan was fault-routed / skipped: the task body was
46 /// not entered, so every measured field below is `None` (`REQ_0107`).
47 /// The cross-layer twin of the connector's `CycleOutcome::Fault`
48 /// (`REQ_0267`).
49 pub faulted: bool,
50
51 /// Declared (nominal) scan period in nanoseconds. Always known.
52 pub period_ns: u64,
53
54 /// Telemetry-clock nanosecond instant of **task-logic start** — the
55 /// canonical reference point (`pre_execute`), the same instant the
56 /// period/jitter/lateness folds are sampled against (`REQ_0103`,
57 /// `REQ_0101`). The single time source for an exported sample's time
58 /// axis; never a second clock read. Always populated.
59 pub pre_ns: u64,
60
61 /// Measured period since the previous dispatch of this task in
62 /// nanoseconds. `None` on the first cycle (no previous timestamp).
63 pub actual_period_ns: Option<u64>,
64
65 /// Absolute jitter: `|actual_period_ns − period_ns|`. `None` when not
66 /// measurable (first cycle) or on a faulted scan.
67 pub jitter_ns: Option<u64>,
68
69 /// Signed deadline lateness relative to the nominal dispatch grid in
70 /// nanoseconds; positive means late (`REQ_0106`). `None` on a faulted
71 /// scan or an event-driven task.
72 pub lateness_ns: Option<i64>,
73
74 /// Nominal grid slots the dispatcher passed over **unserved** between the
75 /// slot served by this task's previous dispatch and the slot served by
76 /// this one (the skip-realign of `REQ_0268`), per `REQ_0840`. Always
77 /// present: `0` in steady state, always `0` in `Legacy` dispatch mode
78 /// (which never skips slots) and on a task's first recorded cycle. The
79 /// lateness grid of `REQ_0106` advances by exactly `1 + skipped_slots`.
80 pub skipped_slots: u32,
81
82 /// Wall-clock execution duration of the task in nanoseconds. `None` on
83 /// a faulted scan (the body was not entered) or when no sample was
84 /// recorded this cycle (e.g. a fault handler ran in the item's place).
85 pub took_ns: Option<u64>,
86}
87
88/// Aggregated statistics for a single task, produced by a pull snapshot.
89///
90/// **Precision contract.** `min_ns`/`max_ns` are **exact** (`REQ_0105`) and
91/// are the values to use for any threshold or regression decision. The
92/// percentile fields (`p50_ns`, `p95_ns`, `p99_ns`) are octave-bucket
93/// *estimates* from the `taktora-stats` histogram, carrying up to
94/// [`PERCENTILE_MAX_REL_ERR_PCT`](taktora_stats::PERCENTILE_MAX_REL_ERR_PCT)
95/// relative error — they locate the order of magnitude, not the exact
96/// figure. (`REQ_0100`'s ≤ 1% target awaits a sub-octave histogram.)
97#[derive(Clone, Debug)]
98pub struct TaskStatsEntry {
99 /// Identifier of the task these statistics belong to.
100 pub task_id: TaskId,
101
102 /// Estimated 50th-percentile execution duration in nanoseconds
103 /// (octave-bucket estimate; see the struct-level precision contract).
104 pub p50_ns: u64,
105
106 /// Estimated 95th-percentile execution duration in nanoseconds
107 /// (octave-bucket estimate; see the struct-level precision contract).
108 pub p95_ns: u64,
109
110 /// Estimated 99th-percentile execution duration in nanoseconds
111 /// (octave-bucket estimate; see the struct-level precision contract).
112 pub p99_ns: u64,
113
114 /// Exact minimum execution duration observed (`REQ_0105`).
115 pub min_ns: u64,
116
117 /// Exact maximum execution duration observed (`REQ_0105`).
118 pub max_ns: u64,
119
120 /// Peak jitter (maximum `|actual_period − period|`) observed (`REQ_0101`).
121 pub max_jitter_ns: u64,
122
123 /// Peak (unsigned) deadline lateness observed (`REQ_0106`).
124 pub max_lateness_ns: u64,
125
126 /// Number of times this task exceeded its execution deadline (`REQ_0102`),
127 /// read from the per-task overrun counter.
128 pub overrun_count: u64,
129}
130
131/// A point-in-time pull snapshot of executor telemetry (`REQ_0103`).
132///
133/// Contains one [`TaskStatsEntry`] per registered task in registration order.
134/// The `Vec` allocation is on the caller's side; internal ring-buffer
135/// accounting is out of scope for `REQ_0104`.
136#[derive(Clone, Debug)]
137pub struct StatsSnapshot {
138 /// Per-task aggregated statistics, one entry per registered task in
139 /// registration order.
140 pub per_task: Vec<TaskStatsEntry>,
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146 use crate::TaskId;
147
148 #[test]
149 fn cycle_observation_is_clone_and_holds_all_fields() {
150 let obs = CycleObservation {
151 cycle_index: 3,
152 task_id: TaskId::from("t0"),
153 task_index: 0,
154 faulted: false,
155 period_ns: 10_000_000,
156 pre_ns: 0,
157 actual_period_ns: Some(10_050_000),
158 jitter_ns: Some(50_000),
159 lateness_ns: Some(-120),
160 skipped_slots: 0,
161 took_ns: Some(1_000_000),
162 };
163 // Verify Clone is implemented and produces an independent copy;
164 // both original and copy are read so the clone is genuinely exercised.
165 let copy = obs.clone();
166 assert_eq!(obs.cycle_index, 3);
167 assert_eq!(copy.cycle_index, obs.cycle_index);
168 assert_eq!(copy.task_id.as_str(), "t0");
169 assert_eq!(copy.lateness_ns, Some(-120));
170 }
171
172 #[test]
173 fn stats_snapshot_holds_per_task_entries() {
174 let snap = StatsSnapshot {
175 per_task: vec![TaskStatsEntry {
176 task_id: TaskId::from("t0"),
177 p50_ns: 1,
178 p95_ns: 2,
179 p99_ns: 3,
180 min_ns: 1,
181 max_ns: 4,
182 max_jitter_ns: 5,
183 max_lateness_ns: 6,
184 overrun_count: 7,
185 }],
186 };
187 assert_eq!(snap.per_task.len(), 1);
188 assert_eq!(snap.per_task[0].overrun_count, 7);
189 }
190}