Skip to main content

rust_supervisor/summary/
builder.rs

1//! Run summary construction for diagnostics.
2//!
3//! The builder derives an operator-facing summary from the event journal and
4//! final current state. It does not inspect runtime internals.
5
6use crate::error::types::TaskFailure;
7use crate::event::payload::{PolicyDecision, SupervisorEvent, What};
8use crate::journal::ring::EventJournal;
9use crate::state::supervisor::SupervisorState;
10use serde::{Deserialize, Serialize};
11
12/// Diagnostic summary for one supervisor run.
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14pub struct RunSummary {
15    /// Run start time in nanoseconds since the Unix epoch.
16    pub started_at_unix_nanos: u128,
17    /// Run finish time in nanoseconds since the Unix epoch.
18    pub finished_at_unix_nanos: u128,
19    /// Shutdown cause when the run ended through shutdown.
20    pub shutdown_cause: Option<String>,
21    /// Total restart count inferred from recent events.
22    pub restart_count: u64,
23    /// Total failure count inferred from recent events.
24    pub failure_count: u64,
25    /// Recent typed failures.
26    pub recent_failures: Vec<TaskFailure>,
27    /// Recent lifecycle events retained for replay.
28    pub recent_events: Vec<SupervisorEvent>,
29    /// Final current state.
30    pub final_state: SupervisorState,
31    /// Final policy decision when one was recorded.
32    pub final_decision: Option<PolicyDecision>,
33}
34
35/// Builder for [`RunSummary`].
36#[derive(Debug, Clone)]
37pub struct RunSummaryBuilder {
38    /// Maximum number of events copied from the journal.
39    pub recent_event_limit: usize,
40}
41
42impl RunSummaryBuilder {
43    /// Creates a run summary builder.
44    ///
45    /// # Arguments
46    ///
47    /// - `recent_event_limit`: Maximum number of recent journal events copied.
48    ///
49    /// # Returns
50    ///
51    /// Returns a [`RunSummaryBuilder`].
52    ///
53    /// # Examples
54    ///
55    /// ```
56    /// let builder = rust_supervisor::summary::builder::RunSummaryBuilder::new(8);
57    /// assert_eq!(builder.recent_event_limit, 8);
58    /// ```
59    pub fn new(recent_event_limit: usize) -> Self {
60        Self { recent_event_limit }
61    }
62
63    /// Builds a run summary from journal and final state.
64    ///
65    /// # Arguments
66    ///
67    /// - `journal`: Event journal that contains recent lifecycle facts.
68    /// - `final_state`: Final current state for the run.
69    /// - `shutdown_cause`: Optional shutdown cause.
70    ///
71    /// # Returns
72    ///
73    /// Returns a [`RunSummary`] derived from the inputs.
74    pub fn build(
75        &self,
76        journal: &EventJournal,
77        final_state: SupervisorState,
78        shutdown_cause: Option<String>,
79    ) -> RunSummary {
80        let recent_events = journal.recent(self.recent_event_limit);
81        let started_at_unix_nanos = started_at(&recent_events);
82        let finished_at_unix_nanos = finished_at(&recent_events);
83        let recent_failures = collect_failures(&recent_events);
84        RunSummary {
85            started_at_unix_nanos,
86            finished_at_unix_nanos,
87            shutdown_cause,
88            restart_count: count_restarts(&recent_events),
89            failure_count: recent_failures.len() as u64,
90            final_decision: last_decision(&recent_events),
91            recent_failures,
92            recent_events,
93            final_state,
94        }
95    }
96}
97
98impl Default for RunSummaryBuilder {
99    /// Creates the default run summary builder.
100    fn default() -> Self {
101        Self::new(32)
102    }
103}
104
105/// Reads the first event timestamp.
106///
107/// # Arguments
108///
109/// - `events`: Events retained for the summary.
110///
111/// # Returns
112///
113/// Returns zero when no events exist.
114fn started_at(events: &[SupervisorEvent]) -> u128 {
115    events
116        .first()
117        .map(|event| event.when.time.unix_nanos)
118        .unwrap_or(0)
119}
120
121/// Reads the last event timestamp.
122///
123/// # Arguments
124///
125/// - `events`: Events retained for the summary.
126///
127/// # Returns
128///
129/// Returns zero when no events exist.
130fn finished_at(events: &[SupervisorEvent]) -> u128 {
131    events
132        .last()
133        .map(|event| event.when.time.unix_nanos)
134        .unwrap_or(0)
135}
136
137/// Collects typed failures from recent events.
138///
139/// # Arguments
140///
141/// - `events`: Events retained for the summary.
142///
143/// # Returns
144///
145/// Returns failures in event order.
146fn collect_failures(events: &[SupervisorEvent]) -> Vec<TaskFailure> {
147    events
148        .iter()
149        .filter_map(|event| match &event.what {
150            What::ChildFailed { failure } => Some(failure.clone()),
151            _ => None,
152        })
153        .collect()
154}
155
156/// Counts restart events.
157///
158/// # Arguments
159///
160/// - `events`: Events retained for the summary.
161///
162/// # Returns
163///
164/// Returns the number of child restart events.
165fn count_restarts(events: &[SupervisorEvent]) -> u64 {
166    events
167        .iter()
168        .filter(|event| matches!(event.what, What::ChildRestarted { .. }))
169        .count() as u64
170}
171
172/// Finds the last policy decision.
173///
174/// # Arguments
175///
176/// - `events`: Events retained for the summary.
177///
178/// # Returns
179///
180/// Returns the last policy decision when one exists.
181fn last_decision(events: &[SupervisorEvent]) -> Option<PolicyDecision> {
182    events.iter().rev().find_map(|event| event.policy.clone())
183}