1use std::fmt;
38use std::time::{Duration, Instant};
39
40use serde::{Deserialize, Serialize};
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
51#[serde(rename_all = "snake_case")]
52pub enum ServicePhase {
53 Starting,
55 Running,
57 BackingOff,
59 Stopping,
61 Terminated,
63}
64
65impl fmt::Display for ServicePhase {
66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67 match self {
68 Self::Starting => write!(f, "starting"),
69 Self::Running => write!(f, "running"),
70 Self::BackingOff => write!(f, "backing_off"),
71 Self::Stopping => write!(f, "stopping"),
72 Self::Terminated => write!(f, "terminated"),
73 }
74 }
75}
76
77impl ServicePhase {
78 pub fn is_terminal(&self) -> bool {
80 matches!(self, Self::Terminated)
81 }
82
83 pub fn is_alive(&self) -> bool {
85 matches!(self, Self::Starting | Self::Running | Self::BackingOff)
86 }
87}
88
89#[derive(Debug, Clone, PartialEq, Eq)]
95pub enum TerminationReason {
96 Completed,
98 Cancelled,
100 CircuitBreakerOpen {
102 failures: u32,
104 max_retries: u32,
106 },
107 Unrecoverable(String),
110}
111
112impl fmt::Display for TerminationReason {
113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114 match self {
115 Self::Completed => write!(f, "completed"),
116 Self::Cancelled => write!(f, "cancelled"),
117 Self::CircuitBreakerOpen {
118 failures,
119 max_retries,
120 } => write!(
121 f,
122 "circuit breaker open ({failures}/{max_retries} failures)"
123 ),
124 Self::Unrecoverable(msg) => write!(f, "unrecoverable: {msg}"),
125 }
126 }
127}
128
129#[derive(Debug, Clone, thiserror::Error)]
135#[error("invalid lifecycle transition: {from} → {to}")]
136pub struct TransitionError {
137 pub from: ServicePhase,
139 pub to: ServicePhase,
141}
142
143#[derive(Debug, Clone)]
153pub struct ServiceLifecycle {
154 phase: ServicePhase,
155 service_name: String,
156 created_at: Instant,
157 phase_entered_at: Instant,
158 start_count: u32,
159 total_failures: u32,
160 last_error: Option<String>,
161 termination_reason: Option<TerminationReason>,
162 cumulative_running: Duration,
163 running_since: Option<Instant>,
164}
165
166impl ServiceLifecycle {
167 pub fn new(service_name: impl Into<String>) -> Self {
169 let now = Instant::now();
170 Self {
171 phase: ServicePhase::Starting,
172 service_name: service_name.into(),
173 created_at: now,
174 phase_entered_at: now,
175 start_count: 1,
176 total_failures: 0,
177 last_error: None,
178 termination_reason: None,
179 cumulative_running: Duration::ZERO,
180 running_since: None,
181 }
182 }
183
184 pub fn phase(&self) -> ServicePhase {
188 self.phase
189 }
190
191 pub fn service_name(&self) -> &str {
193 &self.service_name
194 }
195
196 pub fn age(&self) -> Duration {
198 self.created_at.elapsed()
199 }
200
201 pub fn time_in_current_phase(&self) -> Duration {
203 self.phase_entered_at.elapsed()
204 }
205
206 pub fn start_count(&self) -> u32 {
208 self.start_count
209 }
210
211 pub fn total_failures(&self) -> u32 {
213 self.total_failures
214 }
215
216 pub fn last_error(&self) -> Option<&str> {
218 self.last_error.as_deref()
219 }
220
221 pub fn termination_reason(&self) -> Option<&TerminationReason> {
223 self.termination_reason.as_ref()
224 }
225
226 pub fn cumulative_running_time(&self) -> Duration {
230 let extra = self
231 .running_since
232 .map(|since| since.elapsed())
233 .unwrap_or(Duration::ZERO);
234 self.cumulative_running + extra
235 }
236
237 pub fn transition_to_running(&mut self) -> Result<(), TransitionError> {
241 self.validate_transition(ServicePhase::Running)?;
242 self.set_phase(ServicePhase::Running);
243 self.running_since = Some(Instant::now());
244 tracing::info!(
245 service = %self.service_name,
246 start_count = self.start_count,
247 "service entered Running phase"
248 );
249 Ok(())
250 }
251
252 pub fn transition_to_backing_off(
254 &mut self,
255 error: &str,
256 backoff_duration: Duration,
257 ) -> Result<(), TransitionError> {
258 self.validate_transition(ServicePhase::BackingOff)?;
259 self.accumulate_running_time();
260 self.total_failures += 1;
261 self.last_error = Some(error.to_string());
262 self.set_phase(ServicePhase::BackingOff);
263 tracing::warn!(
264 service = %self.service_name,
265 error = %error,
266 attempt = self.total_failures,
267 backoff_ms = backoff_duration.as_millis() as u64,
268 "service failed, entering BackingOff phase"
269 );
270 Ok(())
271 }
272
273 pub fn transition_to_restarting(&mut self) -> Result<(), TransitionError> {
275 self.validate_transition(ServicePhase::Starting)?;
276 self.start_count += 1;
277 self.set_phase(ServicePhase::Starting);
278 tracing::info!(
279 service = %self.service_name,
280 start_count = self.start_count,
281 "service restarting (entering Starting phase)"
282 );
283 Ok(())
284 }
285
286 pub fn transition_to_stopping(&mut self) -> Result<(), TransitionError> {
288 self.validate_transition(ServicePhase::Stopping)?;
289 self.accumulate_running_time();
290 self.set_phase(ServicePhase::Stopping);
291 tracing::info!(
292 service = %self.service_name,
293 "service entering Stopping phase"
294 );
295 Ok(())
296 }
297
298 pub fn transition_to_terminated(
300 &mut self,
301 reason: TerminationReason,
302 ) -> Result<(), TransitionError> {
303 self.validate_transition(ServicePhase::Terminated)?;
304 self.accumulate_running_time();
305 self.termination_reason = Some(reason.clone());
306 self.set_phase(ServicePhase::Terminated);
307 tracing::info!(
308 service = %self.service_name,
309 reason = %reason,
310 total_starts = self.start_count,
311 total_failures = self.total_failures,
312 cumulative_running_secs = self.cumulative_running.as_secs_f64(),
313 "service terminated"
314 );
315 Ok(())
316 }
317
318 fn validate_transition(&self, target: ServicePhase) -> Result<(), TransitionError> {
321 let valid = match (self.phase, target) {
322 (ServicePhase::Starting, ServicePhase::Running) => true,
323 (ServicePhase::Starting, ServicePhase::Terminated) => true,
324 (ServicePhase::Starting, ServicePhase::Stopping) => true,
325 (ServicePhase::Starting, ServicePhase::BackingOff) => true,
326
327 (ServicePhase::Running, ServicePhase::BackingOff) => true,
328 (ServicePhase::Running, ServicePhase::Stopping) => true,
329 (ServicePhase::Running, ServicePhase::Terminated) => true,
330
331 (ServicePhase::BackingOff, ServicePhase::Starting) => true,
332 (ServicePhase::BackingOff, ServicePhase::Stopping) => true,
333 (ServicePhase::BackingOff, ServicePhase::Terminated) => true,
334
335 (ServicePhase::Stopping, ServicePhase::Terminated) => true,
336
337 (ServicePhase::Terminated, _) => false,
338
339 _ => false,
340 };
341
342 if valid {
343 Ok(())
344 } else {
345 Err(TransitionError {
346 from: self.phase,
347 to: target,
348 })
349 }
350 }
351
352 fn set_phase(&mut self, phase: ServicePhase) {
353 self.phase = phase;
354 self.phase_entered_at = Instant::now();
355 }
356
357 fn accumulate_running_time(&mut self) {
358 if let Some(since) = self.running_since.take() {
359 self.cumulative_running += since.elapsed();
360 }
361 }
362}
363
364impl fmt::Display for ServiceLifecycle {
365 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
366 write!(
367 f,
368 "{}[{}] starts={} failures={} running={:.1}s",
369 self.service_name,
370 self.phase,
371 self.start_count,
372 self.total_failures,
373 self.cumulative_running_time().as_secs_f64(),
374 )
375 }
376}
377
378#[derive(Debug, Clone, Serialize, Deserialize)]
385pub struct ServiceLifecycleSnapshot {
386 pub service_name: String,
388 pub phase: ServicePhase,
390 pub start_count: u32,
392 pub total_failures: u32,
394 pub last_error: Option<String>,
396 pub cumulative_running_secs: f64,
398 pub age_secs: f64,
400 pub time_in_phase_secs: f64,
402 pub termination_reason: Option<String>,
404}
405
406impl From<&ServiceLifecycle> for ServiceLifecycleSnapshot {
407 fn from(lc: &ServiceLifecycle) -> Self {
408 Self {
409 service_name: lc.service_name.clone(),
410 phase: lc.phase,
411 start_count: lc.start_count,
412 total_failures: lc.total_failures,
413 last_error: lc.last_error.clone(),
414 cumulative_running_secs: lc.cumulative_running_time().as_secs_f64(),
415 age_secs: lc.age().as_secs_f64(),
416 time_in_phase_secs: lc.time_in_current_phase().as_secs_f64(),
417 termination_reason: lc.termination_reason.as_ref().map(|r| r.to_string()),
418 }
419 }
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425
426 #[test]
427 fn test_new_lifecycle_starts_in_starting() {
428 let lc = ServiceLifecycle::new("test-svc");
429 assert_eq!(lc.phase(), ServicePhase::Starting);
430 assert_eq!(lc.start_count(), 1);
431 assert_eq!(lc.total_failures(), 0);
432 assert!(lc.last_error().is_none());
433 assert!(lc.termination_reason().is_none());
434 }
435
436 #[test]
437 fn test_service_name() {
438 let lc = ServiceLifecycle::new("data-service");
439 assert_eq!(lc.service_name(), "data-service");
440 }
441
442 #[test]
443 fn test_happy_path_starting_to_running_to_stopping_to_terminated() {
444 let mut lc = ServiceLifecycle::new("happy");
445 lc.transition_to_running().unwrap();
446 assert_eq!(lc.phase(), ServicePhase::Running);
447 lc.transition_to_stopping().unwrap();
448 assert_eq!(lc.phase(), ServicePhase::Stopping);
449 lc.transition_to_terminated(TerminationReason::Cancelled)
450 .unwrap();
451 assert_eq!(lc.phase(), ServicePhase::Terminated);
452 assert_eq!(lc.termination_reason(), Some(&TerminationReason::Cancelled));
453 }
454
455 #[test]
456 fn test_failure_and_restart_cycle() {
457 let mut lc = ServiceLifecycle::new("flaky");
458 lc.transition_to_running().unwrap();
459 assert_eq!(lc.start_count(), 1);
460
461 lc.transition_to_backing_off("connection refused", Duration::from_millis(200))
462 .unwrap();
463 assert_eq!(lc.phase(), ServicePhase::BackingOff);
464 assert_eq!(lc.total_failures(), 1);
465 assert_eq!(lc.last_error(), Some("connection refused"));
466
467 lc.transition_to_restarting().unwrap();
468 assert_eq!(lc.phase(), ServicePhase::Starting);
469 assert_eq!(lc.start_count(), 2);
470
471 lc.transition_to_running().unwrap();
472 assert_eq!(lc.phase(), ServicePhase::Running);
473 }
474
475 #[test]
476 fn test_circuit_breaker_termination() {
477 let mut lc = ServiceLifecycle::new("breaker");
478 lc.transition_to_running().unwrap();
479 lc.transition_to_backing_off("error 1", Duration::from_millis(100))
480 .unwrap();
481
482 lc.transition_to_terminated(TerminationReason::CircuitBreakerOpen {
483 failures: 10,
484 max_retries: 10,
485 })
486 .unwrap();
487
488 assert_eq!(lc.phase(), ServicePhase::Terminated);
489 assert!(matches!(
490 lc.termination_reason(),
491 Some(TerminationReason::CircuitBreakerOpen { .. })
492 ));
493 }
494
495 #[test]
496 fn test_completed_termination_from_running() {
497 let mut lc = ServiceLifecycle::new("one-shot");
498 lc.transition_to_running().unwrap();
499 lc.transition_to_terminated(TerminationReason::Completed)
500 .unwrap();
501 assert_eq!(lc.phase(), ServicePhase::Terminated);
502 assert_eq!(lc.termination_reason(), Some(&TerminationReason::Completed));
503 }
504
505 #[test]
506 fn test_invalid_transition_terminated_to_anything() {
507 let mut lc = ServiceLifecycle::new("dead");
508 lc.transition_to_running().unwrap();
509 lc.transition_to_terminated(TerminationReason::Completed)
510 .unwrap();
511
512 assert!(lc.transition_to_running().is_err());
513 assert!(lc.transition_to_stopping().is_err());
514 assert!(
515 lc.transition_to_terminated(TerminationReason::Cancelled)
516 .is_err()
517 );
518 assert!(lc.transition_to_restarting().is_err());
519 }
520
521 #[test]
522 fn test_invalid_transition_running_to_starting() {
523 let mut lc = ServiceLifecycle::new("bad");
524 lc.transition_to_running().unwrap();
525
526 let err = lc.transition_to_restarting().unwrap_err();
527 assert_eq!(err.from, ServicePhase::Running);
528 assert_eq!(err.to, ServicePhase::Starting);
529 }
530
531 #[test]
532 fn test_stopping_from_backing_off() {
533 let mut lc = ServiceLifecycle::new("interrupted");
534 lc.transition_to_running().unwrap();
535 lc.transition_to_backing_off("timeout", Duration::from_secs(5))
536 .unwrap();
537
538 lc.transition_to_stopping().unwrap();
539 assert_eq!(lc.phase(), ServicePhase::Stopping);
540
541 lc.transition_to_terminated(TerminationReason::Cancelled)
542 .unwrap();
543 assert_eq!(lc.phase(), ServicePhase::Terminated);
544 }
545
546 #[test]
547 fn test_starting_directly_to_terminated() {
548 let mut lc = ServiceLifecycle::new("init-fail");
549 lc.transition_to_terminated(TerminationReason::Unrecoverable(
550 "missing config".to_string(),
551 ))
552 .unwrap();
553 assert_eq!(lc.phase(), ServicePhase::Terminated);
554 }
555
556 #[test]
557 fn test_starting_to_backing_off() {
558 let mut lc = ServiceLifecycle::new("init-retry");
559 lc.transition_to_backing_off("db connect timeout", Duration::from_millis(500))
560 .unwrap();
561 assert_eq!(lc.phase(), ServicePhase::BackingOff);
562 assert_eq!(lc.total_failures(), 1);
563 }
564
565 #[test]
566 fn test_phase_display() {
567 assert_eq!(ServicePhase::Starting.to_string(), "starting");
568 assert_eq!(ServicePhase::Running.to_string(), "running");
569 assert_eq!(ServicePhase::BackingOff.to_string(), "backing_off");
570 assert_eq!(ServicePhase::Stopping.to_string(), "stopping");
571 assert_eq!(ServicePhase::Terminated.to_string(), "terminated");
572 }
573
574 #[test]
575 fn test_phase_is_terminal() {
576 assert!(!ServicePhase::Starting.is_terminal());
577 assert!(!ServicePhase::Running.is_terminal());
578 assert!(!ServicePhase::BackingOff.is_terminal());
579 assert!(!ServicePhase::Stopping.is_terminal());
580 assert!(ServicePhase::Terminated.is_terminal());
581 }
582
583 #[test]
584 fn test_phase_is_alive() {
585 assert!(ServicePhase::Starting.is_alive());
586 assert!(ServicePhase::Running.is_alive());
587 assert!(ServicePhase::BackingOff.is_alive());
588 assert!(!ServicePhase::Stopping.is_alive());
589 assert!(!ServicePhase::Terminated.is_alive());
590 }
591
592 #[test]
593 fn test_lifecycle_display() {
594 let lc = ServiceLifecycle::new("display-test");
595 let display = format!("{lc}");
596 assert!(display.contains("display-test"));
597 assert!(display.contains("starting"));
598 assert!(display.contains("starts=1"));
599 assert!(display.contains("failures=0"));
600 }
601
602 #[test]
603 fn test_snapshot_from_lifecycle() {
604 let mut lc = ServiceLifecycle::new("snapshot-svc");
605 lc.transition_to_running().unwrap();
606 lc.transition_to_backing_off("oops", Duration::from_millis(100))
607 .unwrap();
608
609 let snap = ServiceLifecycleSnapshot::from(&lc);
610 assert_eq!(snap.service_name, "snapshot-svc");
611 assert_eq!(snap.phase, ServicePhase::BackingOff);
612 assert_eq!(snap.start_count, 1);
613 assert_eq!(snap.total_failures, 1);
614 assert_eq!(snap.last_error.as_deref(), Some("oops"));
615 assert!(snap.termination_reason.is_none());
616 assert!(snap.age_secs >= 0.0);
617 }
618
619 #[test]
620 fn test_termination_reason_display() {
621 assert_eq!(TerminationReason::Completed.to_string(), "completed");
622 assert_eq!(TerminationReason::Cancelled.to_string(), "cancelled");
623 assert_eq!(
624 TerminationReason::CircuitBreakerOpen {
625 failures: 5,
626 max_retries: 5
627 }
628 .to_string(),
629 "circuit breaker open (5/5 failures)"
630 );
631 assert_eq!(
632 TerminationReason::Unrecoverable("bad config".into()).to_string(),
633 "unrecoverable: bad config"
634 );
635 }
636
637 #[test]
638 fn test_transition_error_display() {
639 let err = TransitionError {
640 from: ServicePhase::Terminated,
641 to: ServicePhase::Running,
642 };
643 assert_eq!(
644 err.to_string(),
645 "invalid lifecycle transition: terminated → running"
646 );
647 }
648
649 #[test]
650 fn test_multiple_failure_cycles_accumulate() {
651 let mut lc = ServiceLifecycle::new("multi-fail");
652
653 for i in 1..=5 {
654 lc.transition_to_running().unwrap();
655 lc.transition_to_backing_off(
656 &format!("error {i}"),
657 Duration::from_millis(100 * i as u64),
658 )
659 .unwrap();
660 if i < 5 {
661 lc.transition_to_restarting().unwrap();
662 }
663 }
664
665 assert_eq!(lc.total_failures(), 5);
666 assert_eq!(lc.start_count(), 5);
667 assert_eq!(lc.last_error(), Some("error 5"));
668 }
669
670 #[test]
671 fn test_stopping_from_starting() {
672 let mut lc = ServiceLifecycle::new("early-stop");
673 lc.transition_to_stopping().unwrap();
674 assert_eq!(lc.phase(), ServicePhase::Stopping);
675 lc.transition_to_terminated(TerminationReason::Cancelled)
676 .unwrap();
677 }
678}