1use std::fmt;
40use std::time::{Duration, Instant};
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
51#[serde(rename_all = "snake_case")]
52pub enum ServicePhase {
53 Starting,
56
57 Running,
59
60 BackingOff,
63
64 Stopping,
66
67 Terminated,
70}
71
72impl fmt::Display for ServicePhase {
73 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74 match self {
75 Self::Starting => write!(f, "starting"),
76 Self::Running => write!(f, "running"),
77 Self::BackingOff => write!(f, "backing_off"),
78 Self::Stopping => write!(f, "stopping"),
79 Self::Terminated => write!(f, "terminated"),
80 }
81 }
82}
83
84impl ServicePhase {
85 pub fn is_terminal(&self) -> bool {
88 matches!(self, Self::Terminated)
89 }
90
91 pub fn is_alive(&self) -> bool {
94 matches!(self, Self::Starting | Self::Running | Self::BackingOff)
95 }
96}
97
98#[derive(Debug, Clone, PartialEq, Eq)]
104pub enum TerminationReason {
105 Completed,
107
108 Cancelled,
110
111 CircuitBreakerOpen {
113 failures: u32,
115 max_retries: u32,
117 },
118
119 Unrecoverable(String),
122}
123
124impl fmt::Display for TerminationReason {
125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126 match self {
127 Self::Completed => write!(f, "completed"),
128 Self::Cancelled => write!(f, "cancelled"),
129 Self::CircuitBreakerOpen {
130 failures,
131 max_retries,
132 } => write!(
133 f,
134 "circuit breaker open ({failures}/{max_retries} failures)"
135 ),
136 Self::Unrecoverable(msg) => write!(f, "unrecoverable: {msg}"),
137 }
138 }
139}
140
141#[derive(Debug, Clone, thiserror::Error)]
147#[error("invalid lifecycle transition: {from} → {to}")]
148pub struct TransitionError {
149 pub from: ServicePhase,
150 pub to: ServicePhase,
151}
152
153#[derive(Debug, Clone)]
163pub struct ServiceLifecycle {
164 phase: ServicePhase,
166
167 service_name: String,
169
170 created_at: Instant,
172
173 phase_entered_at: Instant,
175
176 start_count: u32,
178
179 total_failures: u32,
181
182 last_error: Option<String>,
184
185 termination_reason: Option<TerminationReason>,
187
188 cumulative_running: Duration,
190
191 running_since: Option<Instant>,
194}
195
196impl ServiceLifecycle {
197 pub fn new(service_name: impl Into<String>) -> Self {
199 let now = Instant::now();
200 Self {
201 phase: ServicePhase::Starting,
202 service_name: service_name.into(),
203 created_at: now,
204 phase_entered_at: now,
205 start_count: 1,
206 total_failures: 0,
207 last_error: None,
208 termination_reason: None,
209 cumulative_running: Duration::ZERO,
210 running_since: None,
211 }
212 }
213
214 pub fn phase(&self) -> ServicePhase {
218 self.phase
219 }
220
221 pub fn service_name(&self) -> &str {
223 &self.service_name
224 }
225
226 pub fn age(&self) -> Duration {
228 self.created_at.elapsed()
229 }
230
231 pub fn time_in_current_phase(&self) -> Duration {
233 self.phase_entered_at.elapsed()
234 }
235
236 pub fn start_count(&self) -> u32 {
238 self.start_count
239 }
240
241 pub fn total_failures(&self) -> u32 {
243 self.total_failures
244 }
245
246 pub fn last_error(&self) -> Option<&str> {
248 self.last_error.as_deref()
249 }
250
251 pub fn termination_reason(&self) -> Option<&TerminationReason> {
253 self.termination_reason.as_ref()
254 }
255
256 pub fn cumulative_running_time(&self) -> Duration {
260 let extra = self
261 .running_since
262 .map(|since| since.elapsed())
263 .unwrap_or(Duration::ZERO);
264 self.cumulative_running + extra
265 }
266
267 pub fn transition_to_running(&mut self) -> Result<(), TransitionError> {
273 self.validate_transition(ServicePhase::Running)?;
274 self.set_phase(ServicePhase::Running);
275 self.running_since = Some(Instant::now());
276 tracing::info!(
277 service = %self.service_name,
278 start_count = self.start_count,
279 "service entered Running phase"
280 );
281 Ok(())
282 }
283
284 pub fn transition_to_backing_off(
288 &mut self,
289 error: &str,
290 backoff_duration: Duration,
291 ) -> Result<(), TransitionError> {
292 self.validate_transition(ServicePhase::BackingOff)?;
293 self.accumulate_running_time();
294 self.total_failures += 1;
295 self.last_error = Some(error.to_string());
296 self.set_phase(ServicePhase::BackingOff);
297 tracing::warn!(
298 service = %self.service_name,
299 error = %error,
300 attempt = self.total_failures,
301 backoff_ms = backoff_duration.as_millis() as u64,
302 "service failed, entering BackingOff phase"
303 );
304 Ok(())
305 }
306
307 pub fn transition_to_restarting(&mut self) -> Result<(), TransitionError> {
312 self.validate_transition(ServicePhase::Starting)?;
314 self.start_count += 1;
315 self.set_phase(ServicePhase::Starting);
316 tracing::info!(
317 service = %self.service_name,
318 start_count = self.start_count,
319 "service restarting (entering Starting phase)"
320 );
321 Ok(())
322 }
323
324 pub fn transition_to_stopping(&mut self) -> Result<(), TransitionError> {
328 self.validate_transition(ServicePhase::Stopping)?;
329 self.accumulate_running_time();
330 self.set_phase(ServicePhase::Stopping);
331 tracing::info!(
332 service = %self.service_name,
333 "service entering Stopping phase"
334 );
335 Ok(())
336 }
337
338 pub fn transition_to_terminated(
343 &mut self,
344 reason: TerminationReason,
345 ) -> Result<(), TransitionError> {
346 self.validate_transition(ServicePhase::Terminated)?;
347 self.accumulate_running_time();
348 self.termination_reason = Some(reason.clone());
349 self.set_phase(ServicePhase::Terminated);
350 tracing::info!(
351 service = %self.service_name,
352 reason = %reason,
353 total_starts = self.start_count,
354 total_failures = self.total_failures,
355 cumulative_running_secs = self.cumulative_running.as_secs_f64(),
356 "service terminated"
357 );
358 Ok(())
359 }
360
361 fn validate_transition(&self, target: ServicePhase) -> Result<(), TransitionError> {
366 let valid = match (self.phase, target) {
367 (ServicePhase::Starting, ServicePhase::Running) => true,
369 (ServicePhase::Starting, ServicePhase::Terminated) => true,
370 (ServicePhase::Starting, ServicePhase::Stopping) => true,
371 (ServicePhase::Starting, ServicePhase::BackingOff) => true,
373
374 (ServicePhase::Running, ServicePhase::BackingOff) => true,
376 (ServicePhase::Running, ServicePhase::Stopping) => true,
377 (ServicePhase::Running, ServicePhase::Terminated) => true,
378
379 (ServicePhase::BackingOff, ServicePhase::Starting) => true,
381 (ServicePhase::BackingOff, ServicePhase::Stopping) => true,
382 (ServicePhase::BackingOff, ServicePhase::Terminated) => true,
383
384 (ServicePhase::Stopping, ServicePhase::Terminated) => true,
386
387 (ServicePhase::Terminated, _) => false,
389
390 _ => false,
392 };
393
394 if valid {
395 Ok(())
396 } else {
397 Err(TransitionError {
398 from: self.phase,
399 to: target,
400 })
401 }
402 }
403
404 fn set_phase(&mut self, phase: ServicePhase) {
406 self.phase = phase;
407 self.phase_entered_at = Instant::now();
408 }
409
410 fn accumulate_running_time(&mut self) {
413 if let Some(since) = self.running_since.take() {
414 self.cumulative_running += since.elapsed();
415 }
416 }
417}
418
419impl fmt::Display for ServiceLifecycle {
420 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
421 write!(
422 f,
423 "{}[{}] starts={} failures={} running={:.1}s",
424 self.service_name,
425 self.phase,
426 self.start_count,
427 self.total_failures,
428 self.cumulative_running_time().as_secs_f64(),
429 )
430 }
431}
432
433#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
440pub struct ServiceLifecycleSnapshot {
441 pub service_name: String,
442 pub phase: ServicePhase,
443 pub start_count: u32,
444 pub total_failures: u32,
445 pub last_error: Option<String>,
446 pub cumulative_running_secs: f64,
447 pub age_secs: f64,
448 pub time_in_phase_secs: f64,
449 pub termination_reason: Option<String>,
450}
451
452impl From<&ServiceLifecycle> for ServiceLifecycleSnapshot {
453 fn from(lc: &ServiceLifecycle) -> Self {
454 Self {
455 service_name: lc.service_name.clone(),
456 phase: lc.phase,
457 start_count: lc.start_count,
458 total_failures: lc.total_failures,
459 last_error: lc.last_error.clone(),
460 cumulative_running_secs: lc.cumulative_running_time().as_secs_f64(),
461 age_secs: lc.age().as_secs_f64(),
462 time_in_phase_secs: lc.time_in_current_phase().as_secs_f64(),
463 termination_reason: lc.termination_reason.as_ref().map(|r| r.to_string()),
464 }
465 }
466}
467
468#[cfg(test)]
473mod tests {
474 use super::*;
475
476 #[test]
477 fn test_new_lifecycle_starts_in_starting() {
478 let lc = ServiceLifecycle::new("test-svc");
479 assert_eq!(lc.phase(), ServicePhase::Starting);
480 assert_eq!(lc.start_count(), 1);
481 assert_eq!(lc.total_failures(), 0);
482 assert!(lc.last_error().is_none());
483 assert!(lc.termination_reason().is_none());
484 }
485
486 #[test]
487 fn test_service_name() {
488 let lc = ServiceLifecycle::new("data-service");
489 assert_eq!(lc.service_name(), "data-service");
490 }
491
492 #[test]
493 fn test_happy_path_starting_to_running_to_stopping_to_terminated() {
494 let mut lc = ServiceLifecycle::new("happy");
495
496 lc.transition_to_running().unwrap();
497 assert_eq!(lc.phase(), ServicePhase::Running);
498
499 lc.transition_to_stopping().unwrap();
500 assert_eq!(lc.phase(), ServicePhase::Stopping);
501
502 lc.transition_to_terminated(TerminationReason::Cancelled)
503 .unwrap();
504 assert_eq!(lc.phase(), ServicePhase::Terminated);
505 assert_eq!(lc.termination_reason(), Some(&TerminationReason::Cancelled));
506 }
507
508 #[test]
509 fn test_failure_and_restart_cycle() {
510 let mut lc = ServiceLifecycle::new("flaky");
511
512 lc.transition_to_running().unwrap();
514 assert_eq!(lc.start_count(), 1);
515
516 lc.transition_to_backing_off("connection refused", Duration::from_millis(200))
517 .unwrap();
518 assert_eq!(lc.phase(), ServicePhase::BackingOff);
519 assert_eq!(lc.total_failures(), 1);
520 assert_eq!(lc.last_error(), Some("connection refused"));
521
522 lc.transition_to_restarting().unwrap();
523 assert_eq!(lc.phase(), ServicePhase::Starting);
524 assert_eq!(lc.start_count(), 2);
525
526 lc.transition_to_running().unwrap();
527 assert_eq!(lc.phase(), ServicePhase::Running);
528 }
529
530 #[test]
531 fn test_circuit_breaker_termination() {
532 let mut lc = ServiceLifecycle::new("breaker");
533
534 lc.transition_to_running().unwrap();
535 lc.transition_to_backing_off("error 1", Duration::from_millis(100))
536 .unwrap();
537
538 lc.transition_to_terminated(TerminationReason::CircuitBreakerOpen {
539 failures: 10,
540 max_retries: 10,
541 })
542 .unwrap();
543
544 assert_eq!(lc.phase(), ServicePhase::Terminated);
545 assert!(matches!(
546 lc.termination_reason(),
547 Some(TerminationReason::CircuitBreakerOpen { .. })
548 ));
549 }
550
551 #[test]
552 fn test_completed_termination_from_running() {
553 let mut lc = ServiceLifecycle::new("one-shot");
554
555 lc.transition_to_running().unwrap();
556 lc.transition_to_terminated(TerminationReason::Completed)
557 .unwrap();
558
559 assert_eq!(lc.phase(), ServicePhase::Terminated);
560 assert_eq!(lc.termination_reason(), Some(&TerminationReason::Completed));
561 }
562
563 #[test]
564 fn test_invalid_transition_terminated_to_anything() {
565 let mut lc = ServiceLifecycle::new("dead");
566
567 lc.transition_to_running().unwrap();
568 lc.transition_to_terminated(TerminationReason::Completed)
569 .unwrap();
570
571 assert!(lc.transition_to_running().is_err());
573 assert!(lc.transition_to_stopping().is_err());
574 assert!(
575 lc.transition_to_terminated(TerminationReason::Cancelled)
576 .is_err()
577 );
578 assert!(lc.transition_to_restarting().is_err());
579 }
580
581 #[test]
582 fn test_invalid_transition_running_to_starting() {
583 let mut lc = ServiceLifecycle::new("bad");
584
585 lc.transition_to_running().unwrap();
586
587 let err = lc.transition_to_restarting().unwrap_err();
589 assert_eq!(err.from, ServicePhase::Running);
590 assert_eq!(err.to, ServicePhase::Starting);
591 }
592
593 #[test]
594 fn test_stopping_from_backing_off() {
595 let mut lc = ServiceLifecycle::new("interrupted");
596
597 lc.transition_to_running().unwrap();
598 lc.transition_to_backing_off("timeout", Duration::from_secs(5))
599 .unwrap();
600
601 lc.transition_to_stopping().unwrap();
603 assert_eq!(lc.phase(), ServicePhase::Stopping);
604
605 lc.transition_to_terminated(TerminationReason::Cancelled)
606 .unwrap();
607 assert_eq!(lc.phase(), ServicePhase::Terminated);
608 }
609
610 #[test]
611 fn test_starting_directly_to_terminated() {
612 let mut lc = ServiceLifecycle::new("init-fail");
613
614 lc.transition_to_terminated(TerminationReason::Unrecoverable(
616 "missing config".to_string(),
617 ))
618 .unwrap();
619 assert_eq!(lc.phase(), ServicePhase::Terminated);
620 }
621
622 #[test]
623 fn test_starting_to_backing_off() {
624 let mut lc = ServiceLifecycle::new("init-retry");
625
626 lc.transition_to_backing_off("db connect timeout", Duration::from_millis(500))
628 .unwrap();
629 assert_eq!(lc.phase(), ServicePhase::BackingOff);
630 assert_eq!(lc.total_failures(), 1);
631 }
632
633 #[test]
634 fn test_phase_display() {
635 assert_eq!(ServicePhase::Starting.to_string(), "starting");
636 assert_eq!(ServicePhase::Running.to_string(), "running");
637 assert_eq!(ServicePhase::BackingOff.to_string(), "backing_off");
638 assert_eq!(ServicePhase::Stopping.to_string(), "stopping");
639 assert_eq!(ServicePhase::Terminated.to_string(), "terminated");
640 }
641
642 #[test]
643 fn test_phase_is_terminal() {
644 assert!(!ServicePhase::Starting.is_terminal());
645 assert!(!ServicePhase::Running.is_terminal());
646 assert!(!ServicePhase::BackingOff.is_terminal());
647 assert!(!ServicePhase::Stopping.is_terminal());
648 assert!(ServicePhase::Terminated.is_terminal());
649 }
650
651 #[test]
652 fn test_phase_is_alive() {
653 assert!(ServicePhase::Starting.is_alive());
654 assert!(ServicePhase::Running.is_alive());
655 assert!(ServicePhase::BackingOff.is_alive());
656 assert!(!ServicePhase::Stopping.is_alive());
657 assert!(!ServicePhase::Terminated.is_alive());
658 }
659
660 #[test]
661 fn test_lifecycle_display() {
662 let lc = ServiceLifecycle::new("display-test");
663 let display = format!("{lc}");
664 assert!(display.contains("display-test"));
665 assert!(display.contains("starting"));
666 assert!(display.contains("starts=1"));
667 assert!(display.contains("failures=0"));
668 }
669
670 #[test]
671 fn test_snapshot_from_lifecycle() {
672 let mut lc = ServiceLifecycle::new("snapshot-svc");
673 lc.transition_to_running().unwrap();
674 lc.transition_to_backing_off("oops", Duration::from_millis(100))
675 .unwrap();
676
677 let snap = ServiceLifecycleSnapshot::from(&lc);
678 assert_eq!(snap.service_name, "snapshot-svc");
679 assert_eq!(snap.phase, ServicePhase::BackingOff);
680 assert_eq!(snap.start_count, 1);
681 assert_eq!(snap.total_failures, 1);
682 assert_eq!(snap.last_error.as_deref(), Some("oops"));
683 assert!(snap.termination_reason.is_none());
684 assert!(snap.age_secs >= 0.0);
685 }
686
687 #[test]
688 fn test_termination_reason_display() {
689 assert_eq!(TerminationReason::Completed.to_string(), "completed");
690 assert_eq!(TerminationReason::Cancelled.to_string(), "cancelled");
691 assert_eq!(
692 TerminationReason::CircuitBreakerOpen {
693 failures: 5,
694 max_retries: 5
695 }
696 .to_string(),
697 "circuit breaker open (5/5 failures)"
698 );
699 assert_eq!(
700 TerminationReason::Unrecoverable("bad config".into()).to_string(),
701 "unrecoverable: bad config"
702 );
703 }
704
705 #[test]
706 fn test_transition_error_display() {
707 let err = TransitionError {
708 from: ServicePhase::Terminated,
709 to: ServicePhase::Running,
710 };
711 assert_eq!(
712 err.to_string(),
713 "invalid lifecycle transition: terminated → running"
714 );
715 }
716
717 #[test]
718 fn test_multiple_failure_cycles_accumulate() {
719 let mut lc = ServiceLifecycle::new("multi-fail");
720
721 for i in 1..=5 {
722 if lc.phase() == ServicePhase::Starting && i > 1 {
723 }
725 lc.transition_to_running().unwrap();
726 lc.transition_to_backing_off(
727 &format!("error {i}"),
728 Duration::from_millis(100 * i as u64),
729 )
730 .unwrap();
731 if i < 5 {
732 lc.transition_to_restarting().unwrap();
733 }
734 }
735
736 assert_eq!(lc.total_failures(), 5);
737 assert_eq!(lc.start_count(), 5);
738 assert_eq!(lc.last_error(), Some("error 5"));
739 }
740
741 #[test]
742 fn test_stopping_from_starting() {
743 let mut lc = ServiceLifecycle::new("early-stop");
744
745 lc.transition_to_stopping().unwrap();
747 assert_eq!(lc.phase(), ServicePhase::Stopping);
748
749 lc.transition_to_terminated(TerminationReason::Cancelled)
750 .unwrap();
751 }
752}