taktora_executor/stats/mod.rs
1//! Executor-side telemetry value types (`REQ_0103`).
2//!
3//! Aggregation math lives in `taktora-stats` ([`ExecutorCycleStats`][ext]);
4//! this module provides the std-side wiring carried across the [`Observer`]
5//! boundary: the per-cycle push observation [`CycleObservation`] and the
6//! pull snapshot [`StatsSnapshot`] / [`TaskStatsEntry`].
7//!
8//! [ext]: taktora_stats::ExecutorCycleStats
9//! [`Observer`]: crate::Observer
10
11use crate::TaskId;
12
13/// A single per-task observation pushed by the executor each cycle.
14///
15/// One value is emitted per dispatched task per cycle and handed to the
16/// observer (or buffered for aggregation). Because [`TaskId`] is `Arc<str>`
17/// under the hood, [`CycleObservation`] is `Clone` but not `Copy`.
18///
19/// **Absent vs. zero (`REQ_0103`).** Every measured quantity is an
20/// [`Option`]: `None` means "not measured this cycle", which is *not* the
21/// same as a measured `0`. A faulted scan (see [`faulted`](Self::faulted)
22/// and `REQ_0107`) advances `cycle_index` but enters no task body, so
23/// `took_ns`/`jitter_ns`/`lateness_ns` are all `None`; `actual_period_ns`
24/// is also `None` on the very first cycle. This mirrors the connector's
25/// observation contract (`REQ_0267`), where a faulted wire round reports
26/// `wire_round_ns: None` — so a consumer joining the two push streams on
27/// `cycle_index` sees a consistent "absent on fault" signal from both
28/// layers instead of an ambiguous `0`.
29#[derive(Clone, Debug)]
30pub struct CycleObservation {
31 /// Monotonic cycle counter, advances on every dispatch attempt including
32 /// faulted scans (`REQ_0107`).
33 pub cycle_index: u64,
34
35 /// Identifier of the task this observation belongs to.
36 pub task_id: TaskId,
37
38 /// Stable zero-based registration index of the task, assigned at
39 /// `Executor::add` time and constant for the executor's lifetime
40 /// (`REQ_0103`). The flat `u32` join/identity key for telemetry export
41 /// (`REQ_0111`'s `task_id` column) — frees consumers from hashing the
42 /// `Arc<str>` [`task_id`](Self::task_id) on the hot path.
43 pub task_index: u32,
44
45 /// `true` when this scan was fault-routed / skipped: the task body was
46 /// not entered, so every measured field below is `None` (`REQ_0107`).
47 /// The cross-layer twin of the connector's `CycleOutcome::Fault`
48 /// (`REQ_0267`).
49 pub faulted: bool,
50
51 /// Declared (nominal) scan period in nanoseconds. Always known.
52 pub period_ns: u64,
53
54 /// Telemetry-clock nanosecond instant of **task-logic start** — the
55 /// canonical reference point (`pre_execute`), the same instant the
56 /// period/jitter/lateness folds are sampled against (`REQ_0103`,
57 /// `REQ_0101`). The single time source for an exported sample's time
58 /// axis; never a second clock read. Always populated.
59 pub pre_ns: u64,
60
61 /// Measured period since the previous dispatch of this task in
62 /// nanoseconds. `None` on the first cycle (no previous timestamp).
63 pub actual_period_ns: Option<u64>,
64
65 /// Absolute jitter: `|actual_period_ns − period_ns|`. `None` when not
66 /// measurable (first cycle) or on a faulted scan.
67 pub jitter_ns: Option<u64>,
68
69 /// Signed deadline lateness relative to the nominal dispatch grid in
70 /// nanoseconds; positive means late (`REQ_0106`). `None` on a faulted
71 /// scan or an event-driven task.
72 pub lateness_ns: Option<i64>,
73
74 /// Wall-clock execution duration of the task in nanoseconds. `None` on
75 /// a faulted scan (the body was not entered) or when no sample was
76 /// recorded this cycle (e.g. a fault handler ran in the item's place).
77 pub took_ns: Option<u64>,
78}
79
80/// Aggregated statistics for a single task, produced by a pull snapshot.
81///
82/// **Precision contract.** `min_ns`/`max_ns` are **exact** (`REQ_0105`) and
83/// are the values to use for any threshold or regression decision. The
84/// percentile fields (`p50_ns`, `p95_ns`, `p99_ns`) are octave-bucket
85/// *estimates* from the `taktora-stats` histogram, carrying up to
86/// [`PERCENTILE_MAX_REL_ERR_PCT`](taktora_stats::PERCENTILE_MAX_REL_ERR_PCT)
87/// relative error — they locate the order of magnitude, not the exact
88/// figure. (`REQ_0100`'s ≤ 1% target awaits a sub-octave histogram.)
89#[derive(Clone, Debug)]
90pub struct TaskStatsEntry {
91 /// Identifier of the task these statistics belong to.
92 pub task_id: TaskId,
93
94 /// Estimated 50th-percentile execution duration in nanoseconds
95 /// (octave-bucket estimate; see the struct-level precision contract).
96 pub p50_ns: u64,
97
98 /// Estimated 95th-percentile execution duration in nanoseconds
99 /// (octave-bucket estimate; see the struct-level precision contract).
100 pub p95_ns: u64,
101
102 /// Estimated 99th-percentile execution duration in nanoseconds
103 /// (octave-bucket estimate; see the struct-level precision contract).
104 pub p99_ns: u64,
105
106 /// Exact minimum execution duration observed (`REQ_0105`).
107 pub min_ns: u64,
108
109 /// Exact maximum execution duration observed (`REQ_0105`).
110 pub max_ns: u64,
111
112 /// Peak jitter (maximum `|actual_period − period|`) observed (`REQ_0101`).
113 pub max_jitter_ns: u64,
114
115 /// Peak (unsigned) deadline lateness observed (`REQ_0106`).
116 pub max_lateness_ns: u64,
117
118 /// Number of times this task exceeded its execution deadline (`REQ_0102`),
119 /// read from the per-task overrun counter.
120 pub overrun_count: u64,
121}
122
123/// A point-in-time pull snapshot of executor telemetry (`REQ_0103`).
124///
125/// Contains one [`TaskStatsEntry`] per registered task in registration order.
126/// The `Vec` allocation is on the caller's side; internal ring-buffer
127/// accounting is out of scope for `REQ_0104`.
128#[derive(Clone, Debug)]
129pub struct StatsSnapshot {
130 /// Per-task aggregated statistics, one entry per registered task in
131 /// registration order.
132 pub per_task: Vec<TaskStatsEntry>,
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138 use crate::TaskId;
139
140 #[test]
141 fn cycle_observation_is_clone_and_holds_all_fields() {
142 let obs = CycleObservation {
143 cycle_index: 3,
144 task_id: TaskId::from("t0"),
145 task_index: 0,
146 faulted: false,
147 period_ns: 10_000_000,
148 pre_ns: 0,
149 actual_period_ns: Some(10_050_000),
150 jitter_ns: Some(50_000),
151 lateness_ns: Some(-120),
152 took_ns: Some(1_000_000),
153 };
154 // Verify Clone is implemented and produces an independent copy;
155 // both original and copy are read so the clone is genuinely exercised.
156 let copy = obs.clone();
157 assert_eq!(obs.cycle_index, 3);
158 assert_eq!(copy.cycle_index, obs.cycle_index);
159 assert_eq!(copy.task_id.as_str(), "t0");
160 assert_eq!(copy.lateness_ns, Some(-120));
161 }
162
163 #[test]
164 fn stats_snapshot_holds_per_task_entries() {
165 let snap = StatsSnapshot {
166 per_task: vec![TaskStatsEntry {
167 task_id: TaskId::from("t0"),
168 p50_ns: 1,
169 p95_ns: 2,
170 p99_ns: 3,
171 min_ns: 1,
172 max_ns: 4,
173 max_jitter_ns: 5,
174 max_lateness_ns: 6,
175 overrun_count: 7,
176 }],
177 };
178 assert_eq!(snap.per_task.len(), 1);
179 assert_eq!(snap.per_task[0].overrun_count, 7);
180 }
181}