Skip to main content

taktora_executor/stats/
mod.rs

1//! Executor-side telemetry value types (`REQ_0103`).
2//!
3//! Aggregation math lives in `taktora-stats` ([`ExecutorCycleStats`][ext]);
4//! this module provides the std-side wiring carried across the [`Observer`]
5//! boundary: the per-cycle push observation [`CycleObservation`] and the
6//! pull snapshot [`StatsSnapshot`] / [`TaskStatsEntry`].
7//!
8//! [ext]: taktora_stats::ExecutorCycleStats
9//! [`Observer`]: crate::Observer
10
11use crate::TaskId;
12
13/// A single per-task observation pushed by the executor each cycle.
14///
15/// One value is emitted per dispatched task per cycle and handed to the
16/// observer (or buffered for aggregation). Because [`TaskId`] is `Arc<str>`
17/// under the hood, [`CycleObservation`] is `Clone` but not `Copy`.
18///
19/// **Absent vs. zero (`REQ_0103`).** Every measured quantity is an
20/// [`Option`]: `None` means "not measured this cycle", which is *not* the
21/// same as a measured `0`. A faulted scan (see [`faulted`](Self::faulted)
22/// and `REQ_0107`) advances `cycle_index` but enters no task body, so
23/// `took_ns`/`jitter_ns`/`lateness_ns` are all `None`; `actual_period_ns`
24/// is also `None` on the very first cycle. This mirrors the connector's
25/// observation contract (`REQ_0267`), where a faulted wire round reports
26/// `wire_round_ns: None` — so a consumer joining the two push streams on
27/// `cycle_index` sees a consistent "absent on fault" signal from both
28/// layers instead of an ambiguous `0`.
29#[derive(Clone, Debug)]
30pub struct CycleObservation {
31    /// Monotonic cycle counter, advances on every dispatch attempt including
32    /// faulted scans (`REQ_0107`).
33    pub cycle_index: u64,
34
35    /// Identifier of the task this observation belongs to.
36    pub task_id: TaskId,
37
38    /// Stable zero-based registration index of the task, assigned at
39    /// `Executor::add` time and constant for the executor's lifetime
40    /// (`REQ_0103`). The flat `u32` join/identity key for telemetry export
41    /// (`REQ_0111`'s `task_id` column) — frees consumers from hashing the
42    /// `Arc<str>` [`task_id`](Self::task_id) on the hot path.
43    pub task_index: u32,
44
45    /// `true` when this scan was fault-routed / skipped: the task body was
46    /// not entered, so every measured field below is `None` (`REQ_0107`).
47    /// The cross-layer twin of the connector's `CycleOutcome::Fault`
48    /// (`REQ_0267`).
49    pub faulted: bool,
50
51    /// Declared (nominal) scan period in nanoseconds. Always known.
52    pub period_ns: u64,
53
54    /// Telemetry-clock nanosecond instant of **task-logic start** — the
55    /// canonical reference point (`pre_execute`), the same instant the
56    /// period/jitter/lateness folds are sampled against (`REQ_0103`,
57    /// `REQ_0101`). The single time source for an exported sample's time
58    /// axis; never a second clock read. Always populated.
59    pub pre_ns: u64,
60
61    /// Measured period since the previous dispatch of this task in
62    /// nanoseconds. `None` on the first cycle (no previous timestamp).
63    pub actual_period_ns: Option<u64>,
64
65    /// Absolute jitter: `|actual_period_ns − period_ns|`. `None` when not
66    /// measurable (first cycle) or on a faulted scan.
67    pub jitter_ns: Option<u64>,
68
69    /// Signed deadline lateness relative to the nominal dispatch grid in
70    /// nanoseconds; positive means late (`REQ_0106`). `None` on a faulted
71    /// scan or an event-driven task.
72    pub lateness_ns: Option<i64>,
73
74    /// Nominal grid slots the dispatcher passed over **unserved** between the
75    /// slot served by this task's previous dispatch and the slot served by
76    /// this one (the skip-realign of `REQ_0268`), per `REQ_0840`. Always
77    /// present: `0` in steady state, always `0` in `Legacy` dispatch mode
78    /// (which never skips slots) and on a task's first recorded cycle. The
79    /// lateness grid of `REQ_0106` advances by exactly `1 + skipped_slots`.
80    pub skipped_slots: u32,
81
82    /// Wall-clock execution duration of the task in nanoseconds. `None` on
83    /// a faulted scan (the body was not entered) or when no sample was
84    /// recorded this cycle (e.g. a fault handler ran in the item's place).
85    pub took_ns: Option<u64>,
86}
87
88/// Aggregated statistics for a single task, produced by a pull snapshot.
89///
90/// **Precision contract.** `min_ns`/`max_ns` are **exact** (`REQ_0105`) and
91/// are the values to use for any threshold or regression decision. The
92/// percentile fields (`p50_ns`, `p95_ns`, `p99_ns`) are octave-bucket
93/// *estimates* from the `taktora-stats` histogram, carrying up to
94/// [`PERCENTILE_MAX_REL_ERR_PCT`](taktora_stats::PERCENTILE_MAX_REL_ERR_PCT)
95/// relative error — they locate the order of magnitude, not the exact
96/// figure. (`REQ_0100`'s ≤ 1% target awaits a sub-octave histogram.)
97#[derive(Clone, Debug)]
98pub struct TaskStatsEntry {
99    /// Identifier of the task these statistics belong to.
100    pub task_id: TaskId,
101
102    /// Estimated 50th-percentile execution duration in nanoseconds
103    /// (octave-bucket estimate; see the struct-level precision contract).
104    pub p50_ns: u64,
105
106    /// Estimated 95th-percentile execution duration in nanoseconds
107    /// (octave-bucket estimate; see the struct-level precision contract).
108    pub p95_ns: u64,
109
110    /// Estimated 99th-percentile execution duration in nanoseconds
111    /// (octave-bucket estimate; see the struct-level precision contract).
112    pub p99_ns: u64,
113
114    /// Exact minimum execution duration observed (`REQ_0105`).
115    pub min_ns: u64,
116
117    /// Exact maximum execution duration observed (`REQ_0105`).
118    pub max_ns: u64,
119
120    /// Peak jitter (maximum `|actual_period − period|`) observed (`REQ_0101`).
121    pub max_jitter_ns: u64,
122
123    /// Peak (unsigned) deadline lateness observed (`REQ_0106`).
124    pub max_lateness_ns: u64,
125
126    /// Number of times this task exceeded its execution deadline (`REQ_0102`),
127    /// read from the per-task overrun counter.
128    pub overrun_count: u64,
129}
130
131/// A point-in-time pull snapshot of executor telemetry (`REQ_0103`).
132///
133/// Contains one [`TaskStatsEntry`] per registered task in registration order.
134/// The `Vec` allocation is on the caller's side; internal ring-buffer
135/// accounting is out of scope for `REQ_0104`.
136#[derive(Clone, Debug)]
137pub struct StatsSnapshot {
138    /// Per-task aggregated statistics, one entry per registered task in
139    /// registration order.
140    pub per_task: Vec<TaskStatsEntry>,
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use crate::TaskId;
147
148    #[test]
149    fn cycle_observation_is_clone_and_holds_all_fields() {
150        let obs = CycleObservation {
151            cycle_index: 3,
152            task_id: TaskId::from("t0"),
153            task_index: 0,
154            faulted: false,
155            period_ns: 10_000_000,
156            pre_ns: 0,
157            actual_period_ns: Some(10_050_000),
158            jitter_ns: Some(50_000),
159            lateness_ns: Some(-120),
160            skipped_slots: 0,
161            took_ns: Some(1_000_000),
162        };
163        // Verify Clone is implemented and produces an independent copy;
164        // both original and copy are read so the clone is genuinely exercised.
165        let copy = obs.clone();
166        assert_eq!(obs.cycle_index, 3);
167        assert_eq!(copy.cycle_index, obs.cycle_index);
168        assert_eq!(copy.task_id.as_str(), "t0");
169        assert_eq!(copy.lateness_ns, Some(-120));
170    }
171
172    #[test]
173    fn stats_snapshot_holds_per_task_entries() {
174        let snap = StatsSnapshot {
175            per_task: vec![TaskStatsEntry {
176                task_id: TaskId::from("t0"),
177                p50_ns: 1,
178                p95_ns: 2,
179                p99_ns: 3,
180                min_ns: 1,
181                max_ns: 4,
182                max_jitter_ns: 5,
183                max_lateness_ns: 6,
184                overrun_count: 7,
185            }],
186        };
187        assert_eq!(snap.per_task.len(), 1);
188        assert_eq!(snap.per_task[0].overrun_count, 7);
189    }
190}