Skip to main content

taktora_executor/stats/
mod.rs

1//! Executor-side telemetry value types (`REQ_0103`).
2//!
3//! Aggregation math lives in `taktora-stats` ([`ExecutorCycleStats`][ext]);
4//! this module provides the std-side wiring carried across the [`Observer`]
5//! boundary: the per-cycle push observation [`CycleObservation`] and the
6//! pull snapshot [`StatsSnapshot`] / [`TaskStatsEntry`].
7//!
8//! [ext]: taktora_stats::ExecutorCycleStats
9//! [`Observer`]: crate::Observer
10
11use crate::TaskId;
12
13/// A single per-task observation pushed by the executor each cycle.
14///
15/// One value is emitted per dispatched task per cycle and handed to the
16/// observer (or buffered for aggregation). Because [`TaskId`] is `Arc<str>`
17/// under the hood, [`CycleObservation`] is `Clone` but not `Copy`.
18///
19/// **Absent vs. zero (`REQ_0103`).** Every measured quantity is an
20/// [`Option`]: `None` means "not measured this cycle", which is *not* the
21/// same as a measured `0`. A faulted scan (see [`faulted`](Self::faulted)
22/// and `REQ_0107`) advances `cycle_index` but enters no task body, so
23/// `took_ns`/`jitter_ns`/`lateness_ns` are all `None`; `actual_period_ns`
24/// is also `None` on the very first cycle. This mirrors the connector's
25/// observation contract (`REQ_0267`), where a faulted wire round reports
26/// `wire_round_ns: None` — so a consumer joining the two push streams on
27/// `cycle_index` sees a consistent "absent on fault" signal from both
28/// layers instead of an ambiguous `0`.
29#[derive(Clone, Debug)]
30pub struct CycleObservation {
31    /// Monotonic cycle counter, advances on every dispatch attempt including
32    /// faulted scans (`REQ_0107`).
33    pub cycle_index: u64,
34
35    /// Identifier of the task this observation belongs to.
36    pub task_id: TaskId,
37
38    /// Stable zero-based registration index of the task, assigned at
39    /// `Executor::add` time and constant for the executor's lifetime
40    /// (`REQ_0103`). The flat `u32` join/identity key for telemetry export
41    /// (`REQ_0111`'s `task_id` column) — frees consumers from hashing the
42    /// `Arc<str>` [`task_id`](Self::task_id) on the hot path.
43    pub task_index: u32,
44
45    /// `true` when this scan was fault-routed / skipped: the task body was
46    /// not entered, so every measured field below is `None` (`REQ_0107`).
47    /// The cross-layer twin of the connector's `CycleOutcome::Fault`
48    /// (`REQ_0267`).
49    pub faulted: bool,
50
51    /// Declared (nominal) scan period in nanoseconds. Always known.
52    pub period_ns: u64,
53
54    /// Telemetry-clock nanosecond instant of **task-logic start** — the
55    /// canonical reference point (`pre_execute`), the same instant the
56    /// period/jitter/lateness folds are sampled against (`REQ_0103`,
57    /// `REQ_0101`). The single time source for an exported sample's time
58    /// axis; never a second clock read. Always populated.
59    pub pre_ns: u64,
60
61    /// Measured period since the previous dispatch of this task in
62    /// nanoseconds. `None` on the first cycle (no previous timestamp).
63    pub actual_period_ns: Option<u64>,
64
65    /// Absolute jitter: `|actual_period_ns − period_ns|`. `None` when not
66    /// measurable (first cycle) or on a faulted scan.
67    pub jitter_ns: Option<u64>,
68
69    /// Signed deadline lateness relative to the nominal dispatch grid in
70    /// nanoseconds; positive means late (`REQ_0106`). `None` on a faulted
71    /// scan or an event-driven task.
72    pub lateness_ns: Option<i64>,
73
74    /// Wall-clock execution duration of the task in nanoseconds. `None` on
75    /// a faulted scan (the body was not entered) or when no sample was
76    /// recorded this cycle (e.g. a fault handler ran in the item's place).
77    pub took_ns: Option<u64>,
78}
79
80/// Aggregated statistics for a single task, produced by a pull snapshot.
81///
82/// **Precision contract.** `min_ns`/`max_ns` are **exact** (`REQ_0105`) and
83/// are the values to use for any threshold or regression decision. The
84/// percentile fields (`p50_ns`, `p95_ns`, `p99_ns`) are octave-bucket
85/// *estimates* from the `taktora-stats` histogram, carrying up to
86/// [`PERCENTILE_MAX_REL_ERR_PCT`](taktora_stats::PERCENTILE_MAX_REL_ERR_PCT)
87/// relative error — they locate the order of magnitude, not the exact
88/// figure. (`REQ_0100`'s ≤ 1% target awaits a sub-octave histogram.)
89#[derive(Clone, Debug)]
90pub struct TaskStatsEntry {
91    /// Identifier of the task these statistics belong to.
92    pub task_id: TaskId,
93
94    /// Estimated 50th-percentile execution duration in nanoseconds
95    /// (octave-bucket estimate; see the struct-level precision contract).
96    pub p50_ns: u64,
97
98    /// Estimated 95th-percentile execution duration in nanoseconds
99    /// (octave-bucket estimate; see the struct-level precision contract).
100    pub p95_ns: u64,
101
102    /// Estimated 99th-percentile execution duration in nanoseconds
103    /// (octave-bucket estimate; see the struct-level precision contract).
104    pub p99_ns: u64,
105
106    /// Exact minimum execution duration observed (`REQ_0105`).
107    pub min_ns: u64,
108
109    /// Exact maximum execution duration observed (`REQ_0105`).
110    pub max_ns: u64,
111
112    /// Peak jitter (maximum `|actual_period − period|`) observed (`REQ_0101`).
113    pub max_jitter_ns: u64,
114
115    /// Peak (unsigned) deadline lateness observed (`REQ_0106`).
116    pub max_lateness_ns: u64,
117
118    /// Number of times this task exceeded its execution deadline (`REQ_0102`),
119    /// read from the per-task overrun counter.
120    pub overrun_count: u64,
121}
122
123/// A point-in-time pull snapshot of executor telemetry (`REQ_0103`).
124///
125/// Contains one [`TaskStatsEntry`] per registered task in registration order.
126/// The `Vec` allocation is on the caller's side; internal ring-buffer
127/// accounting is out of scope for `REQ_0104`.
128#[derive(Clone, Debug)]
129pub struct StatsSnapshot {
130    /// Per-task aggregated statistics, one entry per registered task in
131    /// registration order.
132    pub per_task: Vec<TaskStatsEntry>,
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use crate::TaskId;
139
140    #[test]
141    fn cycle_observation_is_clone_and_holds_all_fields() {
142        let obs = CycleObservation {
143            cycle_index: 3,
144            task_id: TaskId::from("t0"),
145            task_index: 0,
146            faulted: false,
147            period_ns: 10_000_000,
148            pre_ns: 0,
149            actual_period_ns: Some(10_050_000),
150            jitter_ns: Some(50_000),
151            lateness_ns: Some(-120),
152            took_ns: Some(1_000_000),
153        };
154        // Verify Clone is implemented and produces an independent copy;
155        // both original and copy are read so the clone is genuinely exercised.
156        let copy = obs.clone();
157        assert_eq!(obs.cycle_index, 3);
158        assert_eq!(copy.cycle_index, obs.cycle_index);
159        assert_eq!(copy.task_id.as_str(), "t0");
160        assert_eq!(copy.lateness_ns, Some(-120));
161    }
162
163    #[test]
164    fn stats_snapshot_holds_per_task_entries() {
165        let snap = StatsSnapshot {
166            per_task: vec![TaskStatsEntry {
167                task_id: TaskId::from("t0"),
168                p50_ns: 1,
169                p95_ns: 2,
170                p99_ns: 3,
171                min_ns: 1,
172                max_ns: 4,
173                max_jitter_ns: 5,
174                max_lateness_ns: 6,
175                overrun_count: 7,
176            }],
177        };
178        assert_eq!(snap.per_task.len(), 1);
179        assert_eq!(snap.per_task[0].overrun_count, 7);
180    }
181}