Skip to main content

rust_supervisor/policy/
decision.rs

1//! Restart policy decisions for typed task exits.
2//!
3//! The module converts typed exits into explicit restart decisions. It does not
4//! inspect string messages and it does not own runtime state.
5
6use crate::error::types::TaskFailureKind;
7use crate::policy::backoff::BackoffPolicy;
8use serde::{Deserialize, Serialize};
9use std::time::Duration;
10
11/// Rule that decides whether a task exit is restartable.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum RestartPolicy {
14    /// Restart after both successful and failed exits.
15    Permanent,
16    /// Restart after failed exits only.
17    Transient,
18    /// Never restart automatically.
19    Temporary,
20}
21
22/// Failure category consumed by the policy engine.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24pub enum PolicyFailureKind {
25    /// A failure that may succeed on a later attempt.
26    Recoverable,
27    /// A configuration error that should stop the tree.
28    FatalConfig,
29    /// A code defect that should be escalated.
30    FatalBug,
31    /// A dependency failure that may be recoverable.
32    ExternalDependency,
33    /// The task exceeded its runtime budget.
34    Timeout,
35    /// The task panicked.
36    Panic,
37    /// The task was cancelled intentionally.
38    Cancelled,
39    /// The task missed its heartbeat budget.
40    Unhealthy,
41}
42
43impl From<TaskFailureKind> for PolicyFailureKind {
44    /// Maps a task failure kind into a policy failure kind.
45    fn from(value: TaskFailureKind) -> Self {
46        match value {
47            TaskFailureKind::Error => Self::Recoverable,
48            TaskFailureKind::Panic => Self::Panic,
49            TaskFailureKind::Timeout => Self::Timeout,
50            TaskFailureKind::Unhealthy => Self::Unhealthy,
51            TaskFailureKind::Cancelled => Self::Cancelled,
52        }
53    }
54}
55
56/// Typed exit information supplied to restart policy.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58pub enum TaskExit {
59    /// The task completed successfully.
60    Succeeded,
61    /// The task failed with a typed category.
62    Failed {
63        /// Failure category used for policy decisions.
64        kind: PolicyFailureKind,
65    },
66}
67
68/// Explicit decision returned by the policy engine.
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
70pub enum RestartDecision {
71    /// Do not restart the child.
72    DoNotRestart,
73    /// Restart after the supplied delay.
74    RestartAfter {
75        /// Delay before the next restart attempt.
76        delay: Duration,
77    },
78    /// Stop automatic restart and place the child in quarantine.
79    Quarantine,
80    /// Escalate the failure to the parent supervisor.
81    EscalateToParent,
82    /// Shut down the whole supervisor tree.
83    ShutdownTree,
84}
85
86/// Stateless restart policy engine.
87#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
88pub struct PolicyEngine;
89
90impl PolicyEngine {
91    /// Creates a policy engine.
92    ///
93    /// # Arguments
94    ///
95    /// This function has no arguments.
96    ///
97    /// # Returns
98    ///
99    /// Returns a [`PolicyEngine`].
100    ///
101    /// # Examples
102    ///
103    /// ```
104    /// let engine = rust_supervisor::policy::decision::PolicyEngine::new();
105    /// let _ = engine;
106    /// ```
107    pub fn new() -> Self {
108        Self
109    }
110
111    /// Decides the restart action for a typed exit.
112    ///
113    /// # Arguments
114    ///
115    /// - `policy`: Restart policy configured for the child.
116    /// - `exit`: Typed task exit.
117    /// - `attempt`: One-based restart attempt used for backoff.
118    /// - `backoff`: Backoff policy used when a restart is allowed.
119    ///
120    /// # Returns
121    ///
122    /// Returns a [`RestartDecision`] that the runtime can execute.
123    pub fn decide(
124        &self,
125        policy: RestartPolicy,
126        exit: TaskExit,
127        attempt: u64,
128        backoff: &BackoffPolicy,
129    ) -> RestartDecision {
130        match exit {
131            TaskExit::Succeeded => self.decide_success(policy, attempt, backoff),
132            TaskExit::Failed { kind } => self.decide_failure(policy, kind, attempt, backoff),
133        }
134    }
135
136    /// Decides behavior after successful completion.
137    ///
138    /// # Arguments
139    ///
140    /// - `policy`: Restart policy configured for the child.
141    /// - `attempt`: One-based restart attempt used for backoff.
142    /// - `backoff`: Backoff policy used when a restart is allowed.
143    ///
144    /// # Returns
145    ///
146    /// Returns a restart decision for a successful exit.
147    fn decide_success(
148        &self,
149        policy: RestartPolicy,
150        attempt: u64,
151        backoff: &BackoffPolicy,
152    ) -> RestartDecision {
153        match policy {
154            RestartPolicy::Permanent => RestartDecision::RestartAfter {
155                delay: backoff.delay_for_attempt(attempt),
156            },
157            RestartPolicy::Transient | RestartPolicy::Temporary => RestartDecision::DoNotRestart,
158        }
159    }
160
161    /// Decides behavior after a typed failure.
162    ///
163    /// # Arguments
164    ///
165    /// - `policy`: Restart policy configured for the child.
166    /// - `kind`: Failure kind supplied by the runner.
167    /// - `attempt`: One-based restart attempt used for backoff.
168    /// - `backoff`: Backoff policy used when a restart is allowed.
169    ///
170    /// # Returns
171    ///
172    /// Returns a restart decision for a failed exit.
173    fn decide_failure(
174        &self,
175        policy: RestartPolicy,
176        kind: PolicyFailureKind,
177        attempt: u64,
178        backoff: &BackoffPolicy,
179    ) -> RestartDecision {
180        match kind {
181            PolicyFailureKind::FatalConfig => RestartDecision::ShutdownTree,
182            PolicyFailureKind::FatalBug => RestartDecision::EscalateToParent,
183            PolicyFailureKind::Cancelled => RestartDecision::DoNotRestart,
184            _ => self.restartable_failure(policy, attempt, backoff),
185        }
186    }
187
188    /// Applies restart policy to a restartable failure.
189    ///
190    /// # Arguments
191    ///
192    /// - `policy`: Restart policy configured for the child.
193    /// - `attempt`: One-based restart attempt used for backoff.
194    /// - `backoff`: Backoff policy used when a restart is allowed.
195    ///
196    /// # Returns
197    ///
198    /// Returns a restart decision for a restartable failure.
199    fn restartable_failure(
200        &self,
201        policy: RestartPolicy,
202        attempt: u64,
203        backoff: &BackoffPolicy,
204    ) -> RestartDecision {
205        match policy {
206            RestartPolicy::Permanent | RestartPolicy::Transient => RestartDecision::RestartAfter {
207                delay: backoff.delay_for_attempt(attempt),
208            },
209            RestartPolicy::Temporary => RestartDecision::DoNotRestart,
210        }
211    }
212}