Skip to main content

jflow_core/supervisor/
lifecycle.rs

1//! Service lifecycle state machine for the Janus Supervisor.
2//!
3//! Each service managed by the [`JanusSupervisor`](super::JanusSupervisor)
4//! progresses through a well-defined set of states:
5//!
6//! ```text
7//!   ┌──────────┐
8//!   │ Starting │──────────────────────────┐
9//!   └────┬─────┘                          │
10//!        │ run() entered                  │ init error
11//!        ▼                                ▼
12//!   ┌──────────┐                    ┌────────────┐
13//!   │ Running  │───── error ──────▶│ BackingOff │
14//!   └────┬─────┘                    └──────┬─────┘
15//!        │                                 │
16//!        │ cancel / Ok(())                 │ retry
17//!        │                                 │
18//!        │    ┌────────────────────────────┘
19//!        ▼    ▼
20//!   ┌──────────┐         ┌────────────┐
21//!   │ Stopping │────────▶│ Terminated │
22//!   └──────────┘         └────────────┘
23//! ```
24//!
25//! The state machine enforces deterministic behaviour:
26//!
27//! - **Starting**: The service is initializing resources (connections, channels).
28//! - **Running**: The service's `run()` loop is active.
29//! - **BackingOff**: The service failed and is waiting for the exponential
30//!   backoff timer before the supervisor retries.
31//! - **Stopping**: A cancellation signal was received; the service is
32//!   finalizing (flushing WAL, closing connections).
33//! - **Terminated**: The service has exited cleanly (or the circuit breaker
34//!   tripped and the supervisor gave up). Terminal state.
35//!
36//! The `BackingOff` state prevents the supervisor from tight-looping on a
37//! persistent failure, which would burn CPU and flood logs.
38
39use std::fmt;
40use std::time::{Duration, Instant};
41
42// ---------------------------------------------------------------------------
43// ServicePhase — the raw enum
44// ---------------------------------------------------------------------------
45
46/// Lifecycle phase of a supervised service.
47///
48/// This is a plain enum without associated data; the richer context (timing,
49/// error info, attempt counts) lives in [`ServiceLifecycle`].
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
51#[serde(rename_all = "snake_case")]
52pub enum ServicePhase {
53    /// The service is initializing (connecting to databases, setting up
54    /// channels, etc.).
55    Starting,
56
57    /// The service's main `run()` loop is executing.
58    Running,
59
60    /// The service failed and is waiting for the backoff timer to expire
61    /// before the supervisor attempts a restart.
62    BackingOff,
63
64    /// A shutdown signal was received; the service is performing cleanup.
65    Stopping,
66
67    /// Terminal state — the service has exited. No further transitions are
68    /// valid.
69    Terminated,
70}
71
72impl fmt::Display for ServicePhase {
73    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74        match self {
75            Self::Starting => write!(f, "starting"),
76            Self::Running => write!(f, "running"),
77            Self::BackingOff => write!(f, "backing_off"),
78            Self::Stopping => write!(f, "stopping"),
79            Self::Terminated => write!(f, "terminated"),
80        }
81    }
82}
83
84impl ServicePhase {
85    /// Returns `true` if the service is in a terminal state and will not
86    /// transition again.
87    pub fn is_terminal(&self) -> bool {
88        matches!(self, Self::Terminated)
89    }
90
91    /// Returns `true` if the service is considered "alive" (starting,
92    /// running, or backing off for a retry).
93    pub fn is_alive(&self) -> bool {
94        matches!(self, Self::Starting | Self::Running | Self::BackingOff)
95    }
96}
97
98// ---------------------------------------------------------------------------
99// TerminationReason
100// ---------------------------------------------------------------------------
101
102/// Why a service reached the `Terminated` phase.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub enum TerminationReason {
105    /// The service's `run()` returned `Ok(())` — clean completion.
106    Completed,
107
108    /// The supervisor's cancellation token was triggered (graceful shutdown).
109    Cancelled,
110
111    /// The circuit breaker tripped after too many consecutive failures.
112    CircuitBreakerOpen {
113        /// Number of failures observed within the circuit-breaker window.
114        failures: u32,
115        /// The configured maximum before tripping.
116        max_retries: u32,
117    },
118
119    /// The service encountered an unrecoverable error and its restart
120    /// policy is [`RestartPolicy::Never`](super::RestartPolicy::Never).
121    Unrecoverable(String),
122}
123
124impl fmt::Display for TerminationReason {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        match self {
127            Self::Completed => write!(f, "completed"),
128            Self::Cancelled => write!(f, "cancelled"),
129            Self::CircuitBreakerOpen {
130                failures,
131                max_retries,
132            } => write!(
133                f,
134                "circuit breaker open ({failures}/{max_retries} failures)"
135            ),
136            Self::Unrecoverable(msg) => write!(f, "unrecoverable: {msg}"),
137        }
138    }
139}
140
141// ---------------------------------------------------------------------------
142// TransitionError
143// ---------------------------------------------------------------------------
144
145/// Error returned when an invalid state transition is attempted.
146#[derive(Debug, Clone, thiserror::Error)]
147#[error("invalid lifecycle transition: {from} → {to}")]
148pub struct TransitionError {
149    pub from: ServicePhase,
150    pub to: ServicePhase,
151}
152
153// ---------------------------------------------------------------------------
154// ServiceLifecycle — the state machine
155// ---------------------------------------------------------------------------
156
157/// Full lifecycle tracker for a single supervised service.
158///
159/// Wraps the [`ServicePhase`] enum with timing data, counters, and
160/// transition validation logic. The supervisor holds one of these per
161/// managed service.
162#[derive(Debug, Clone)]
163pub struct ServiceLifecycle {
164    /// Current phase.
165    phase: ServicePhase,
166
167    /// Name of the service (for logging/metrics).
168    service_name: String,
169
170    /// When the service first entered `Starting`.
171    created_at: Instant,
172
173    /// When the current phase was entered.
174    phase_entered_at: Instant,
175
176    /// Total number of times the service has been (re)started.
177    start_count: u32,
178
179    /// Total number of failures observed over the service's lifetime.
180    total_failures: u32,
181
182    /// The last error message if the service failed.
183    last_error: Option<String>,
184
185    /// Why the service terminated (only set in `Terminated` phase).
186    termination_reason: Option<TerminationReason>,
187
188    /// Cumulative time spent in the `Running` phase.
189    cumulative_running: Duration,
190
191    /// Snapshot of `phase_entered_at` when we last entered `Running`,
192    /// used to accumulate `cumulative_running` on exit.
193    running_since: Option<Instant>,
194}
195
196impl ServiceLifecycle {
197    /// Create a new lifecycle tracker in the `Starting` phase.
198    pub fn new(service_name: impl Into<String>) -> Self {
199        let now = Instant::now();
200        Self {
201            phase: ServicePhase::Starting,
202            service_name: service_name.into(),
203            created_at: now,
204            phase_entered_at: now,
205            start_count: 1,
206            total_failures: 0,
207            last_error: None,
208            termination_reason: None,
209            cumulative_running: Duration::ZERO,
210            running_since: None,
211        }
212    }
213
214    // ── Accessors ─────────────────────────────────────────────────────
215
216    /// Current lifecycle phase.
217    pub fn phase(&self) -> ServicePhase {
218        self.phase
219    }
220
221    /// Service name.
222    pub fn service_name(&self) -> &str {
223        &self.service_name
224    }
225
226    /// How long the service has existed (since first `Starting`).
227    pub fn age(&self) -> Duration {
228        self.created_at.elapsed()
229    }
230
231    /// How long the service has been in its current phase.
232    pub fn time_in_current_phase(&self) -> Duration {
233        self.phase_entered_at.elapsed()
234    }
235
236    /// Total number of times the service has been started.
237    pub fn start_count(&self) -> u32 {
238        self.start_count
239    }
240
241    /// Total failures over the service's lifetime.
242    pub fn total_failures(&self) -> u32 {
243        self.total_failures
244    }
245
246    /// The last error message, if any.
247    pub fn last_error(&self) -> Option<&str> {
248        self.last_error.as_deref()
249    }
250
251    /// Why the service terminated (only `Some` when phase is `Terminated`).
252    pub fn termination_reason(&self) -> Option<&TerminationReason> {
253        self.termination_reason.as_ref()
254    }
255
256    /// Cumulative wall-clock time spent in the `Running` phase.
257    ///
258    /// If the service is currently running, includes time up to *now*.
259    pub fn cumulative_running_time(&self) -> Duration {
260        let extra = self
261            .running_since
262            .map(|since| since.elapsed())
263            .unwrap_or(Duration::ZERO);
264        self.cumulative_running + extra
265    }
266
267    // ── Transitions ───────────────────────────────────────────────────
268
269    /// Transition from `Starting` → `Running`.
270    ///
271    /// Called when the service's `run()` method is entered.
272    pub fn transition_to_running(&mut self) -> Result<(), TransitionError> {
273        self.validate_transition(ServicePhase::Running)?;
274        self.set_phase(ServicePhase::Running);
275        self.running_since = Some(Instant::now());
276        tracing::info!(
277            service = %self.service_name,
278            start_count = self.start_count,
279            "service entered Running phase"
280        );
281        Ok(())
282    }
283
284    /// Transition from `Running` → `BackingOff`.
285    ///
286    /// Called when the service's `run()` returns an `Err`.
287    pub fn transition_to_backing_off(
288        &mut self,
289        error: &str,
290        backoff_duration: Duration,
291    ) -> Result<(), TransitionError> {
292        self.validate_transition(ServicePhase::BackingOff)?;
293        self.accumulate_running_time();
294        self.total_failures += 1;
295        self.last_error = Some(error.to_string());
296        self.set_phase(ServicePhase::BackingOff);
297        tracing::warn!(
298            service = %self.service_name,
299            error = %error,
300            attempt = self.total_failures,
301            backoff_ms = backoff_duration.as_millis() as u64,
302            "service failed, entering BackingOff phase"
303        );
304        Ok(())
305    }
306
307    /// Transition from `BackingOff` → `Starting` (retry).
308    ///
309    /// Called when the backoff timer has expired and the supervisor is
310    /// about to restart the service.
311    pub fn transition_to_restarting(&mut self) -> Result<(), TransitionError> {
312        // BackingOff → Starting is a restart
313        self.validate_transition(ServicePhase::Starting)?;
314        self.start_count += 1;
315        self.set_phase(ServicePhase::Starting);
316        tracing::info!(
317            service = %self.service_name,
318            start_count = self.start_count,
319            "service restarting (entering Starting phase)"
320        );
321        Ok(())
322    }
323
324    /// Transition to `Stopping` from any alive phase.
325    ///
326    /// Called when the cancellation token is triggered.
327    pub fn transition_to_stopping(&mut self) -> Result<(), TransitionError> {
328        self.validate_transition(ServicePhase::Stopping)?;
329        self.accumulate_running_time();
330        self.set_phase(ServicePhase::Stopping);
331        tracing::info!(
332            service = %self.service_name,
333            "service entering Stopping phase"
334        );
335        Ok(())
336    }
337
338    /// Transition to `Terminated` from `Stopping`, `Running`, `Starting`,
339    /// or `BackingOff`.
340    ///
341    /// This is the terminal state; no further transitions are allowed.
342    pub fn transition_to_terminated(
343        &mut self,
344        reason: TerminationReason,
345    ) -> Result<(), TransitionError> {
346        self.validate_transition(ServicePhase::Terminated)?;
347        self.accumulate_running_time();
348        self.termination_reason = Some(reason.clone());
349        self.set_phase(ServicePhase::Terminated);
350        tracing::info!(
351            service = %self.service_name,
352            reason = %reason,
353            total_starts = self.start_count,
354            total_failures = self.total_failures,
355            cumulative_running_secs = self.cumulative_running.as_secs_f64(),
356            "service terminated"
357        );
358        Ok(())
359    }
360
361    // ── Internal helpers ──────────────────────────────────────────────
362
363    /// Validate that transitioning from the current phase to `target` is
364    /// legal.
365    fn validate_transition(&self, target: ServicePhase) -> Result<(), TransitionError> {
366        let valid = match (self.phase, target) {
367            // Starting can go to Running or Terminated (init failure) or Stopping
368            (ServicePhase::Starting, ServicePhase::Running) => true,
369            (ServicePhase::Starting, ServicePhase::Terminated) => true,
370            (ServicePhase::Starting, ServicePhase::Stopping) => true,
371            // Also allow Starting → BackingOff for init errors that are retryable
372            (ServicePhase::Starting, ServicePhase::BackingOff) => true,
373
374            // Running can go to BackingOff (failure), Stopping, or Terminated
375            (ServicePhase::Running, ServicePhase::BackingOff) => true,
376            (ServicePhase::Running, ServicePhase::Stopping) => true,
377            (ServicePhase::Running, ServicePhase::Terminated) => true,
378
379            // BackingOff can go to Starting (retry), Stopping, or Terminated
380            (ServicePhase::BackingOff, ServicePhase::Starting) => true,
381            (ServicePhase::BackingOff, ServicePhase::Stopping) => true,
382            (ServicePhase::BackingOff, ServicePhase::Terminated) => true,
383
384            // Stopping can only go to Terminated
385            (ServicePhase::Stopping, ServicePhase::Terminated) => true,
386
387            // Terminated is terminal — nothing is valid
388            (ServicePhase::Terminated, _) => false,
389
390            // Everything else is invalid
391            _ => false,
392        };
393
394        if valid {
395            Ok(())
396        } else {
397            Err(TransitionError {
398                from: self.phase,
399                to: target,
400            })
401        }
402    }
403
404    /// Set the phase and update `phase_entered_at`.
405    fn set_phase(&mut self, phase: ServicePhase) {
406        self.phase = phase;
407        self.phase_entered_at = Instant::now();
408    }
409
410    /// If we were in Running, accumulate the elapsed running time and
411    /// clear the `running_since` marker.
412    fn accumulate_running_time(&mut self) {
413        if let Some(since) = self.running_since.take() {
414            self.cumulative_running += since.elapsed();
415        }
416    }
417}
418
419impl fmt::Display for ServiceLifecycle {
420    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
421        write!(
422            f,
423            "{}[{}] starts={} failures={} running={:.1}s",
424            self.service_name,
425            self.phase,
426            self.start_count,
427            self.total_failures,
428            self.cumulative_running_time().as_secs_f64(),
429        )
430    }
431}
432
433// ---------------------------------------------------------------------------
434// Serializable snapshot for API / metrics
435// ---------------------------------------------------------------------------
436
437/// A point-in-time snapshot of a service's lifecycle, suitable for
438/// serialization into JSON (e.g., for the `/api/health` endpoint).
439#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
440pub struct ServiceLifecycleSnapshot {
441    pub service_name: String,
442    pub phase: ServicePhase,
443    pub start_count: u32,
444    pub total_failures: u32,
445    pub last_error: Option<String>,
446    pub cumulative_running_secs: f64,
447    pub age_secs: f64,
448    pub time_in_phase_secs: f64,
449    pub termination_reason: Option<String>,
450}
451
452impl From<&ServiceLifecycle> for ServiceLifecycleSnapshot {
453    fn from(lc: &ServiceLifecycle) -> Self {
454        Self {
455            service_name: lc.service_name.clone(),
456            phase: lc.phase,
457            start_count: lc.start_count,
458            total_failures: lc.total_failures,
459            last_error: lc.last_error.clone(),
460            cumulative_running_secs: lc.cumulative_running_time().as_secs_f64(),
461            age_secs: lc.age().as_secs_f64(),
462            time_in_phase_secs: lc.time_in_current_phase().as_secs_f64(),
463            termination_reason: lc.termination_reason.as_ref().map(|r| r.to_string()),
464        }
465    }
466}
467
468// ===========================================================================
469// Tests
470// ===========================================================================
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn test_new_lifecycle_starts_in_starting() {
478        let lc = ServiceLifecycle::new("test-svc");
479        assert_eq!(lc.phase(), ServicePhase::Starting);
480        assert_eq!(lc.start_count(), 1);
481        assert_eq!(lc.total_failures(), 0);
482        assert!(lc.last_error().is_none());
483        assert!(lc.termination_reason().is_none());
484    }
485
486    #[test]
487    fn test_service_name() {
488        let lc = ServiceLifecycle::new("data-service");
489        assert_eq!(lc.service_name(), "data-service");
490    }
491
492    #[test]
493    fn test_happy_path_starting_to_running_to_stopping_to_terminated() {
494        let mut lc = ServiceLifecycle::new("happy");
495
496        lc.transition_to_running().unwrap();
497        assert_eq!(lc.phase(), ServicePhase::Running);
498
499        lc.transition_to_stopping().unwrap();
500        assert_eq!(lc.phase(), ServicePhase::Stopping);
501
502        lc.transition_to_terminated(TerminationReason::Cancelled)
503            .unwrap();
504        assert_eq!(lc.phase(), ServicePhase::Terminated);
505        assert_eq!(lc.termination_reason(), Some(&TerminationReason::Cancelled));
506    }
507
508    #[test]
509    fn test_failure_and_restart_cycle() {
510        let mut lc = ServiceLifecycle::new("flaky");
511
512        // Start → Run → Fail (BackingOff) → Restart (Starting) → Run
513        lc.transition_to_running().unwrap();
514        assert_eq!(lc.start_count(), 1);
515
516        lc.transition_to_backing_off("connection refused", Duration::from_millis(200))
517            .unwrap();
518        assert_eq!(lc.phase(), ServicePhase::BackingOff);
519        assert_eq!(lc.total_failures(), 1);
520        assert_eq!(lc.last_error(), Some("connection refused"));
521
522        lc.transition_to_restarting().unwrap();
523        assert_eq!(lc.phase(), ServicePhase::Starting);
524        assert_eq!(lc.start_count(), 2);
525
526        lc.transition_to_running().unwrap();
527        assert_eq!(lc.phase(), ServicePhase::Running);
528    }
529
530    #[test]
531    fn test_circuit_breaker_termination() {
532        let mut lc = ServiceLifecycle::new("breaker");
533
534        lc.transition_to_running().unwrap();
535        lc.transition_to_backing_off("error 1", Duration::from_millis(100))
536            .unwrap();
537
538        lc.transition_to_terminated(TerminationReason::CircuitBreakerOpen {
539            failures: 10,
540            max_retries: 10,
541        })
542        .unwrap();
543
544        assert_eq!(lc.phase(), ServicePhase::Terminated);
545        assert!(matches!(
546            lc.termination_reason(),
547            Some(TerminationReason::CircuitBreakerOpen { .. })
548        ));
549    }
550
551    #[test]
552    fn test_completed_termination_from_running() {
553        let mut lc = ServiceLifecycle::new("one-shot");
554
555        lc.transition_to_running().unwrap();
556        lc.transition_to_terminated(TerminationReason::Completed)
557            .unwrap();
558
559        assert_eq!(lc.phase(), ServicePhase::Terminated);
560        assert_eq!(lc.termination_reason(), Some(&TerminationReason::Completed));
561    }
562
563    #[test]
564    fn test_invalid_transition_terminated_to_anything() {
565        let mut lc = ServiceLifecycle::new("dead");
566
567        lc.transition_to_running().unwrap();
568        lc.transition_to_terminated(TerminationReason::Completed)
569            .unwrap();
570
571        // Any further transition should fail
572        assert!(lc.transition_to_running().is_err());
573        assert!(lc.transition_to_stopping().is_err());
574        assert!(
575            lc.transition_to_terminated(TerminationReason::Cancelled)
576                .is_err()
577        );
578        assert!(lc.transition_to_restarting().is_err());
579    }
580
581    #[test]
582    fn test_invalid_transition_running_to_starting() {
583        let mut lc = ServiceLifecycle::new("bad");
584
585        lc.transition_to_running().unwrap();
586
587        // Running → Starting is not valid (must go through BackingOff first)
588        let err = lc.transition_to_restarting().unwrap_err();
589        assert_eq!(err.from, ServicePhase::Running);
590        assert_eq!(err.to, ServicePhase::Starting);
591    }
592
593    #[test]
594    fn test_stopping_from_backing_off() {
595        let mut lc = ServiceLifecycle::new("interrupted");
596
597        lc.transition_to_running().unwrap();
598        lc.transition_to_backing_off("timeout", Duration::from_secs(5))
599            .unwrap();
600
601        // Shutdown arrives while backing off
602        lc.transition_to_stopping().unwrap();
603        assert_eq!(lc.phase(), ServicePhase::Stopping);
604
605        lc.transition_to_terminated(TerminationReason::Cancelled)
606            .unwrap();
607        assert_eq!(lc.phase(), ServicePhase::Terminated);
608    }
609
610    #[test]
611    fn test_starting_directly_to_terminated() {
612        let mut lc = ServiceLifecycle::new("init-fail");
613
614        // If init fails catastrophically, we can go straight to Terminated
615        lc.transition_to_terminated(TerminationReason::Unrecoverable(
616            "missing config".to_string(),
617        ))
618        .unwrap();
619        assert_eq!(lc.phase(), ServicePhase::Terminated);
620    }
621
622    #[test]
623    fn test_starting_to_backing_off() {
624        let mut lc = ServiceLifecycle::new("init-retry");
625
626        // Init fails but is retryable
627        lc.transition_to_backing_off("db connect timeout", Duration::from_millis(500))
628            .unwrap();
629        assert_eq!(lc.phase(), ServicePhase::BackingOff);
630        assert_eq!(lc.total_failures(), 1);
631    }
632
633    #[test]
634    fn test_phase_display() {
635        assert_eq!(ServicePhase::Starting.to_string(), "starting");
636        assert_eq!(ServicePhase::Running.to_string(), "running");
637        assert_eq!(ServicePhase::BackingOff.to_string(), "backing_off");
638        assert_eq!(ServicePhase::Stopping.to_string(), "stopping");
639        assert_eq!(ServicePhase::Terminated.to_string(), "terminated");
640    }
641
642    #[test]
643    fn test_phase_is_terminal() {
644        assert!(!ServicePhase::Starting.is_terminal());
645        assert!(!ServicePhase::Running.is_terminal());
646        assert!(!ServicePhase::BackingOff.is_terminal());
647        assert!(!ServicePhase::Stopping.is_terminal());
648        assert!(ServicePhase::Terminated.is_terminal());
649    }
650
651    #[test]
652    fn test_phase_is_alive() {
653        assert!(ServicePhase::Starting.is_alive());
654        assert!(ServicePhase::Running.is_alive());
655        assert!(ServicePhase::BackingOff.is_alive());
656        assert!(!ServicePhase::Stopping.is_alive());
657        assert!(!ServicePhase::Terminated.is_alive());
658    }
659
660    #[test]
661    fn test_lifecycle_display() {
662        let lc = ServiceLifecycle::new("display-test");
663        let display = format!("{lc}");
664        assert!(display.contains("display-test"));
665        assert!(display.contains("starting"));
666        assert!(display.contains("starts=1"));
667        assert!(display.contains("failures=0"));
668    }
669
670    #[test]
671    fn test_snapshot_from_lifecycle() {
672        let mut lc = ServiceLifecycle::new("snapshot-svc");
673        lc.transition_to_running().unwrap();
674        lc.transition_to_backing_off("oops", Duration::from_millis(100))
675            .unwrap();
676
677        let snap = ServiceLifecycleSnapshot::from(&lc);
678        assert_eq!(snap.service_name, "snapshot-svc");
679        assert_eq!(snap.phase, ServicePhase::BackingOff);
680        assert_eq!(snap.start_count, 1);
681        assert_eq!(snap.total_failures, 1);
682        assert_eq!(snap.last_error.as_deref(), Some("oops"));
683        assert!(snap.termination_reason.is_none());
684        assert!(snap.age_secs >= 0.0);
685    }
686
687    #[test]
688    fn test_termination_reason_display() {
689        assert_eq!(TerminationReason::Completed.to_string(), "completed");
690        assert_eq!(TerminationReason::Cancelled.to_string(), "cancelled");
691        assert_eq!(
692            TerminationReason::CircuitBreakerOpen {
693                failures: 5,
694                max_retries: 5
695            }
696            .to_string(),
697            "circuit breaker open (5/5 failures)"
698        );
699        assert_eq!(
700            TerminationReason::Unrecoverable("bad config".into()).to_string(),
701            "unrecoverable: bad config"
702        );
703    }
704
705    #[test]
706    fn test_transition_error_display() {
707        let err = TransitionError {
708            from: ServicePhase::Terminated,
709            to: ServicePhase::Running,
710        };
711        assert_eq!(
712            err.to_string(),
713            "invalid lifecycle transition: terminated → running"
714        );
715    }
716
717    #[test]
718    fn test_multiple_failure_cycles_accumulate() {
719        let mut lc = ServiceLifecycle::new("multi-fail");
720
721        for i in 1..=5 {
722            if lc.phase() == ServicePhase::Starting && i > 1 {
723                // After restarting
724            }
725            lc.transition_to_running().unwrap();
726            lc.transition_to_backing_off(
727                &format!("error {i}"),
728                Duration::from_millis(100 * i as u64),
729            )
730            .unwrap();
731            if i < 5 {
732                lc.transition_to_restarting().unwrap();
733            }
734        }
735
736        assert_eq!(lc.total_failures(), 5);
737        assert_eq!(lc.start_count(), 5);
738        assert_eq!(lc.last_error(), Some("error 5"));
739    }
740
741    #[test]
742    fn test_stopping_from_starting() {
743        let mut lc = ServiceLifecycle::new("early-stop");
744
745        // Shutdown arrives before the service even starts running
746        lc.transition_to_stopping().unwrap();
747        assert_eq!(lc.phase(), ServicePhase::Stopping);
748
749        lc.transition_to_terminated(TerminationReason::Cancelled)
750            .unwrap();
751    }
752}