rust_supervisor/summary/builder.rs
1//! Run summary construction for diagnostics.
2//!
3//! The builder derives an operator-facing summary from the event journal and
4//! final current state. It does not inspect runtime internals.
5
6use crate::error::types::TaskFailure;
7use crate::event::payload::{PolicyDecision, SupervisorEvent, What};
8use crate::journal::ring::EventJournal;
9use crate::state::supervisor::SupervisorState;
10use serde::{Deserialize, Serialize};
11
12/// Diagnostic summary for one supervisor run.
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14pub struct RunSummary {
15 /// Run start time in nanoseconds since the Unix epoch.
16 pub started_at_unix_nanos: u128,
17 /// Run finish time in nanoseconds since the Unix epoch.
18 pub finished_at_unix_nanos: u128,
19 /// Shutdown cause when the run ended through shutdown.
20 pub shutdown_cause: Option<String>,
21 /// Total restart count inferred from recent events.
22 pub restart_count: u64,
23 /// Total failure count inferred from recent events.
24 pub failure_count: u64,
25 /// Recent typed failures.
26 pub recent_failures: Vec<TaskFailure>,
27 /// Recent lifecycle events retained for replay.
28 pub recent_events: Vec<SupervisorEvent>,
29 /// Final current state.
30 pub final_state: SupervisorState,
31 /// Final policy decision when one was recorded.
32 pub final_decision: Option<PolicyDecision>,
33}
34
35/// Builder for [`RunSummary`].
36#[derive(Debug, Clone)]
37pub struct RunSummaryBuilder {
38 /// Maximum number of events copied from the journal.
39 pub recent_event_limit: usize,
40}
41
42impl RunSummaryBuilder {
43 /// Creates a run summary builder.
44 ///
45 /// # Arguments
46 ///
47 /// - `recent_event_limit`: Maximum number of recent journal events copied.
48 ///
49 /// # Returns
50 ///
51 /// Returns a [`RunSummaryBuilder`].
52 ///
53 /// # Examples
54 ///
55 /// ```
56 /// let builder = rust_supervisor::summary::builder::RunSummaryBuilder::new(8);
57 /// assert_eq!(builder.recent_event_limit, 8);
58 /// ```
59 pub fn new(recent_event_limit: usize) -> Self {
60 Self { recent_event_limit }
61 }
62
63 /// Builds a run summary from journal and final state.
64 ///
65 /// # Arguments
66 ///
67 /// - `journal`: Event journal that contains recent lifecycle facts.
68 /// - `final_state`: Final current state for the run.
69 /// - `shutdown_cause`: Optional shutdown cause.
70 ///
71 /// # Returns
72 ///
73 /// Returns a [`RunSummary`] derived from the inputs.
74 pub fn build(
75 &self,
76 journal: &EventJournal,
77 final_state: SupervisorState,
78 shutdown_cause: Option<String>,
79 ) -> RunSummary {
80 let recent_events = journal.recent(self.recent_event_limit);
81 let started_at_unix_nanos = started_at(&recent_events);
82 let finished_at_unix_nanos = finished_at(&recent_events);
83 let recent_failures = collect_failures(&recent_events);
84 RunSummary {
85 started_at_unix_nanos,
86 finished_at_unix_nanos,
87 shutdown_cause,
88 restart_count: count_restarts(&recent_events),
89 failure_count: recent_failures.len() as u64,
90 final_decision: last_decision(&recent_events),
91 recent_failures,
92 recent_events,
93 final_state,
94 }
95 }
96}
97
98impl Default for RunSummaryBuilder {
99 /// Creates the default run summary builder.
100 fn default() -> Self {
101 Self::new(32)
102 }
103}
104
105/// Reads the first event timestamp.
106///
107/// # Arguments
108///
109/// - `events`: Events retained for the summary.
110///
111/// # Returns
112///
113/// Returns zero when no events exist.
114fn started_at(events: &[SupervisorEvent]) -> u128 {
115 events
116 .first()
117 .map(|event| event.when.time.unix_nanos)
118 .unwrap_or(0)
119}
120
121/// Reads the last event timestamp.
122///
123/// # Arguments
124///
125/// - `events`: Events retained for the summary.
126///
127/// # Returns
128///
129/// Returns zero when no events exist.
130fn finished_at(events: &[SupervisorEvent]) -> u128 {
131 events
132 .last()
133 .map(|event| event.when.time.unix_nanos)
134 .unwrap_or(0)
135}
136
137/// Collects typed failures from recent events.
138///
139/// # Arguments
140///
141/// - `events`: Events retained for the summary.
142///
143/// # Returns
144///
145/// Returns failures in event order.
146fn collect_failures(events: &[SupervisorEvent]) -> Vec<TaskFailure> {
147 events
148 .iter()
149 .filter_map(|event| match &event.what {
150 What::ChildFailed { failure } => Some(failure.clone()),
151 _ => None,
152 })
153 .collect()
154}
155
156/// Counts restart events.
157///
158/// # Arguments
159///
160/// - `events`: Events retained for the summary.
161///
162/// # Returns
163///
164/// Returns the number of child restart events.
165fn count_restarts(events: &[SupervisorEvent]) -> u64 {
166 events
167 .iter()
168 .filter(|event| matches!(event.what, What::ChildRestarted { .. }))
169 .count() as u64
170}
171
172/// Finds the last policy decision.
173///
174/// # Arguments
175///
176/// - `events`: Events retained for the summary.
177///
178/// # Returns
179///
180/// Returns the last policy decision when one exists.
181fn last_decision(events: &[SupervisorEvent]) -> Option<PolicyDecision> {
182 events.iter().rev().find_map(|event| event.policy.clone())
183}