rust_supervisor/policy/decision.rs
1//! Restart policy decisions for typed task exits.
2//!
3//! The module converts typed exits into explicit restart decisions. It does not
4//! inspect string messages and it does not own runtime state.
5
6use crate::error::types::TaskFailureKind;
7use crate::policy::backoff::BackoffPolicy;
8use serde::{Deserialize, Serialize};
9use std::time::Duration;
10
11/// Rule that decides whether a task exit is restartable.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum RestartPolicy {
14 /// Restart after both successful and failed exits.
15 Permanent,
16 /// Restart after failed exits only.
17 Transient,
18 /// Never restart automatically.
19 Temporary,
20}
21
22/// Failure category consumed by the policy engine.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24pub enum PolicyFailureKind {
25 /// A failure that may succeed on a later attempt.
26 Recoverable,
27 /// A configuration error that should stop the tree.
28 FatalConfig,
29 /// A code defect that should be escalated.
30 FatalBug,
31 /// A dependency failure that may be recoverable.
32 ExternalDependency,
33 /// The task exceeded its runtime budget.
34 Timeout,
35 /// The task panicked.
36 Panic,
37 /// The task was cancelled intentionally.
38 Cancelled,
39 /// The task missed its heartbeat budget.
40 Unhealthy,
41}
42
43impl From<TaskFailureKind> for PolicyFailureKind {
44 /// Maps a task failure kind into a policy failure kind.
45 fn from(value: TaskFailureKind) -> Self {
46 match value {
47 TaskFailureKind::Error => Self::Recoverable,
48 TaskFailureKind::Panic => Self::Panic,
49 TaskFailureKind::Timeout => Self::Timeout,
50 TaskFailureKind::Unhealthy => Self::Unhealthy,
51 TaskFailureKind::Cancelled => Self::Cancelled,
52 }
53 }
54}
55
56/// Typed exit information supplied to restart policy.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58pub enum TaskExit {
59 /// The task completed successfully.
60 Succeeded,
61 /// The task failed with a typed category.
62 Failed {
63 /// Failure category used for policy decisions.
64 kind: PolicyFailureKind,
65 },
66}
67
68/// Explicit decision returned by the policy engine.
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
70pub enum RestartDecision {
71 /// Do not restart the child.
72 DoNotRestart,
73 /// Restart after the supplied delay.
74 RestartAfter {
75 /// Delay before the next restart attempt.
76 delay: Duration,
77 },
78 /// Stop automatic restart and place the child in quarantine.
79 Quarantine,
80 /// Escalate the failure to the parent supervisor.
81 EscalateToParent,
82 /// Shut down the whole supervisor tree.
83 ShutdownTree,
84}
85
86/// Stateless restart policy engine.
87#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
88pub struct PolicyEngine;
89
90impl PolicyEngine {
91 /// Creates a policy engine.
92 ///
93 /// # Arguments
94 ///
95 /// This function has no arguments.
96 ///
97 /// # Returns
98 ///
99 /// Returns a [`PolicyEngine`].
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// let engine = rust_supervisor::policy::decision::PolicyEngine::new();
105 /// let _ = engine;
106 /// ```
107 pub fn new() -> Self {
108 Self
109 }
110
111 /// Decides the restart action for a typed exit.
112 ///
113 /// # Arguments
114 ///
115 /// - `policy`: Restart policy configured for the child.
116 /// - `exit`: Typed task exit.
117 /// - `attempt`: One-based restart attempt used for backoff.
118 /// - `backoff`: Backoff policy used when a restart is allowed.
119 ///
120 /// # Returns
121 ///
122 /// Returns a [`RestartDecision`] that the runtime can execute.
123 pub fn decide(
124 &self,
125 policy: RestartPolicy,
126 exit: TaskExit,
127 attempt: u64,
128 backoff: &BackoffPolicy,
129 ) -> RestartDecision {
130 match exit {
131 TaskExit::Succeeded => self.decide_success(policy, attempt, backoff),
132 TaskExit::Failed { kind } => self.decide_failure(policy, kind, attempt, backoff),
133 }
134 }
135
136 /// Decides behavior after successful completion.
137 ///
138 /// # Arguments
139 ///
140 /// - `policy`: Restart policy configured for the child.
141 /// - `attempt`: One-based restart attempt used for backoff.
142 /// - `backoff`: Backoff policy used when a restart is allowed.
143 ///
144 /// # Returns
145 ///
146 /// Returns a restart decision for a successful exit.
147 fn decide_success(
148 &self,
149 policy: RestartPolicy,
150 attempt: u64,
151 backoff: &BackoffPolicy,
152 ) -> RestartDecision {
153 match policy {
154 RestartPolicy::Permanent => RestartDecision::RestartAfter {
155 delay: backoff.delay_for_attempt(attempt),
156 },
157 RestartPolicy::Transient | RestartPolicy::Temporary => RestartDecision::DoNotRestart,
158 }
159 }
160
161 /// Decides behavior after a typed failure.
162 ///
163 /// # Arguments
164 ///
165 /// - `policy`: Restart policy configured for the child.
166 /// - `kind`: Failure kind supplied by the runner.
167 /// - `attempt`: One-based restart attempt used for backoff.
168 /// - `backoff`: Backoff policy used when a restart is allowed.
169 ///
170 /// # Returns
171 ///
172 /// Returns a restart decision for a failed exit.
173 fn decide_failure(
174 &self,
175 policy: RestartPolicy,
176 kind: PolicyFailureKind,
177 attempt: u64,
178 backoff: &BackoffPolicy,
179 ) -> RestartDecision {
180 match kind {
181 PolicyFailureKind::FatalConfig => RestartDecision::ShutdownTree,
182 PolicyFailureKind::FatalBug => RestartDecision::EscalateToParent,
183 PolicyFailureKind::Cancelled => RestartDecision::DoNotRestart,
184 _ => self.restartable_failure(policy, attempt, backoff),
185 }
186 }
187
188 /// Applies restart policy to a restartable failure.
189 ///
190 /// # Arguments
191 ///
192 /// - `policy`: Restart policy configured for the child.
193 /// - `attempt`: One-based restart attempt used for backoff.
194 /// - `backoff`: Backoff policy used when a restart is allowed.
195 ///
196 /// # Returns
197 ///
198 /// Returns a restart decision for a restartable failure.
199 fn restartable_failure(
200 &self,
201 policy: RestartPolicy,
202 attempt: u64,
203 backoff: &BackoffPolicy,
204 ) -> RestartDecision {
205 match policy {
206 RestartPolicy::Permanent | RestartPolicy::Transient => RestartDecision::RestartAfter {
207 delay: backoff.delay_for_attempt(attempt),
208 },
209 RestartPolicy::Temporary => RestartDecision::DoNotRestart,
210 }
211 }
212}