1use metrics::{counter, gauge};
8use std::process::ExitStatus;
9use std::time::Instant;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
30pub enum RestartReason {
31 UserRequested,
33 CleanShutdown,
35 ErrorExit(i32),
37 Segfault,
39 Killed,
41 Aborted,
43 Terminated,
45 SignalOther(i32),
47 Unknown,
49}
50
51impl RestartReason {
52 pub fn from_exit_status(status: ExitStatus) -> Self {
68 #[cfg(unix)]
69 {
70 use std::os::unix::process::ExitStatusExt;
71 if let Some(signal) = status.signal() {
72 return match signal {
73 6 => Self::Aborted,
74 9 => Self::Killed,
75 11 => Self::Segfault,
76 15 => Self::Terminated,
77 s => Self::SignalOther(s),
78 };
79 }
80 }
81
82 match status.code() {
83 Some(0) => Self::CleanShutdown,
84 Some(n) => Self::ErrorExit(n),
85 None => Self::Unknown,
86 }
87 }
88
89 pub fn as_label(&self) -> &'static str {
93 match self {
94 Self::UserRequested => "user_requested",
95 Self::CleanShutdown => "clean_shutdown",
96 Self::ErrorExit(_) => "error_exit",
97 Self::Segfault => "segfault",
98 Self::Killed => "killed",
99 Self::Aborted => "aborted",
100 Self::Terminated => "terminated",
101 Self::SignalOther(_) => "signal_other",
102 Self::Unknown => "unknown",
103 }
104 }
105
106 pub fn is_signal(&self) -> bool {
108 matches!(
109 self,
110 Self::Segfault | Self::Killed | Self::Aborted | Self::Terminated | Self::SignalOther(_)
111 )
112 }
113
114 pub fn is_clean(&self) -> bool {
116 matches!(self, Self::CleanShutdown | Self::UserRequested)
117 }
118}
119
120impl std::fmt::Display for RestartReason {
121 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 match self {
123 Self::UserRequested => write!(f, "user requested"),
124 Self::CleanShutdown => write!(f, "clean shutdown"),
125 Self::ErrorExit(code) => write!(f, "error exit (code {})", code),
126 Self::Segfault => write!(f, "segmentation fault (SIGSEGV)"),
127 Self::Killed => write!(f, "killed (SIGKILL)"),
128 Self::Aborted => write!(f, "aborted (SIGABRT)"),
129 Self::Terminated => write!(f, "terminated (SIGTERM)"),
130 Self::SignalOther(sig) => write!(f, "signal {}", sig),
131 Self::Unknown => write!(f, "unknown"),
132 }
133 }
134}
135
136#[derive(Debug)]
166pub struct SubprocessMetrics {
167 started_at: Option<Instant>,
168 restart_count: u64,
169 last_restart_reason: Option<RestartReason>,
170}
171
172#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
174pub struct SubprocessMetricsSnapshot {
175 pub uptime_seconds: f64,
177 pub restart_count: u64,
179 pub last_restart_reason: Option<RestartReason>,
181 pub is_running: bool,
183}
184
185impl SubprocessMetrics {
186 pub fn new() -> Self {
188 Self { started_at: None, restart_count: 0, last_restart_reason: None }
189 }
190
191 pub fn process_started(&mut self) {
195 self.started_at = Some(Instant::now());
196 gauge!("repl.subprocess.uptime_seconds").set(0.0);
197 }
198
199 pub fn record_restart(&mut self, reason: RestartReason) {
207 self.restart_count += 1;
208 self.last_restart_reason = Some(reason);
209 counter!("repl.subprocess.restarts_total", "reason" => reason.as_label()).increment(1);
210 }
211
212 pub fn uptime_seconds(&self) -> f64 {
216 self.started_at.map(|start| start.elapsed().as_secs_f64()).unwrap_or(0.0)
217 }
218
219 pub fn update_uptime_gauge(&self) {
223 gauge!("repl.subprocess.uptime_seconds").set(self.uptime_seconds());
224 }
225
226 pub fn restart_count(&self) -> u64 {
228 self.restart_count
229 }
230
231 pub fn last_restart_reason(&self) -> Option<RestartReason> {
233 self.last_restart_reason
234 }
235
236 pub fn is_running(&self) -> bool {
238 self.started_at.is_some()
239 }
240
241 pub fn snapshot(&self) -> SubprocessMetricsSnapshot {
243 SubprocessMetricsSnapshot {
244 uptime_seconds: self.uptime_seconds(),
245 restart_count: self.restart_count,
246 last_restart_reason: self.last_restart_reason,
247 is_running: self.is_running(),
248 }
249 }
250}
251
252impl Default for SubprocessMetrics {
253 fn default() -> Self {
254 Self::new()
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261
262 #[test]
263 fn test_restart_reason_labels() {
264 assert_eq!(RestartReason::UserRequested.as_label(), "user_requested");
265 assert_eq!(RestartReason::CleanShutdown.as_label(), "clean_shutdown");
266 assert_eq!(RestartReason::ErrorExit(1).as_label(), "error_exit");
267 assert_eq!(RestartReason::Segfault.as_label(), "segfault");
268 assert_eq!(RestartReason::Killed.as_label(), "killed");
269 assert_eq!(RestartReason::Aborted.as_label(), "aborted");
270 assert_eq!(RestartReason::Terminated.as_label(), "terminated");
271 assert_eq!(RestartReason::SignalOther(99).as_label(), "signal_other");
272 assert_eq!(RestartReason::Unknown.as_label(), "unknown");
273 }
274
275 #[test]
276 fn test_restart_reason_is_signal() {
277 assert!(!RestartReason::UserRequested.is_signal());
278 assert!(!RestartReason::CleanShutdown.is_signal());
279 assert!(!RestartReason::ErrorExit(1).is_signal());
280 assert!(RestartReason::Segfault.is_signal());
281 assert!(RestartReason::Killed.is_signal());
282 assert!(RestartReason::Aborted.is_signal());
283 assert!(RestartReason::Terminated.is_signal());
284 assert!(RestartReason::SignalOther(99).is_signal());
285 assert!(!RestartReason::Unknown.is_signal());
286 }
287
288 #[test]
289 fn test_restart_reason_is_clean() {
290 assert!(RestartReason::UserRequested.is_clean());
291 assert!(RestartReason::CleanShutdown.is_clean());
292 assert!(!RestartReason::ErrorExit(1).is_clean());
293 assert!(!RestartReason::Segfault.is_clean());
294 assert!(!RestartReason::Unknown.is_clean());
295 }
296
297 #[test]
298 fn test_restart_reason_display() {
299 assert_eq!(format!("{}", RestartReason::UserRequested), "user requested");
300 assert_eq!(format!("{}", RestartReason::CleanShutdown), "clean shutdown");
301 assert_eq!(format!("{}", RestartReason::ErrorExit(42)), "error exit (code 42)");
302 assert_eq!(format!("{}", RestartReason::Segfault), "segmentation fault (SIGSEGV)");
303 assert_eq!(format!("{}", RestartReason::Killed), "killed (SIGKILL)");
304 }
305
306 #[test]
307 fn test_subprocess_metrics_creation() {
308 let metrics = SubprocessMetrics::new();
309 assert_eq!(metrics.uptime_seconds(), 0.0);
310 assert_eq!(metrics.restart_count(), 0);
311 }
312
313 #[test]
314 fn test_subprocess_metrics_lifecycle() {
315 let mut metrics = SubprocessMetrics::new();
316
317 metrics.process_started();
319 assert!(metrics.uptime_seconds() >= 0.0);
320
321 metrics.record_restart(RestartReason::ErrorExit(1));
323 assert_eq!(metrics.restart_count(), 1);
324
325 metrics.process_started();
327 metrics.record_restart(RestartReason::Segfault);
328 assert_eq!(metrics.restart_count(), 2);
329 }
330}