rust_supervisor/policy/decision.rs
1//! Restart policy decisions for typed task exits.
2//!
3//! The module converts typed exits into explicit restart decisions. It does not
4//! inspect string messages and it does not own runtime state.
5
6use crate::error::types::TaskFailureKind;
7use crate::policy::backoff::BackoffPolicy;
8// Re-export ProtectionAction from event payload for policy decision usage.
9// This is the protection restrictiveness ladder with six tiers:
10// restart_allowed → restart_queued → restart_denied → supervision_paused → escalated → supervised_stop
11pub use crate::event::payload::ProtectionAction;
12use serde::{Deserialize, Serialize};
13use std::time::Duration;
14
15/// Rule that decides whether a task exit is restartable.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17pub enum RestartPolicy {
18 /// Restart after both successful and failed exits.
19 Permanent,
20 /// Restart after failed exits only.
21 Transient,
22 /// Never restart automatically.
23 Temporary,
24}
25
26/// Failure category consumed by the policy engine.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28pub enum PolicyFailureKind {
29 /// A failure that may succeed on a later child_start_count.
30 Recoverable,
31 /// A configuration error that should stop the tree.
32 FatalConfig,
33 /// A code defect that should be escalated.
34 FatalBug,
35 /// A dependency failure that may be recoverable.
36 ExternalDependency,
37 /// The task exceeded its runtime budget.
38 Timeout,
39 /// The task panicked.
40 Panic,
41 /// The task was cancelled intentionally.
42 Cancelled,
43 /// The task missed its heartbeat budget.
44 Unhealthy,
45}
46
47impl From<TaskFailureKind> for PolicyFailureKind {
48 /// Maps a task failure kind into a policy failure kind.
49 fn from(value: TaskFailureKind) -> Self {
50 match value {
51 TaskFailureKind::Error => Self::Recoverable,
52 TaskFailureKind::Panic => Self::Panic,
53 TaskFailureKind::Timeout => Self::Timeout,
54 TaskFailureKind::Unhealthy => Self::Unhealthy,
55 TaskFailureKind::Cancelled => Self::Cancelled,
56 }
57 }
58}
59
60/// Typed exit information supplied to restart policy.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62pub enum TaskExit {
63 /// The task completed successfully.
64 Succeeded,
65 /// The task failed with a typed category.
66 Failed {
67 /// Failure category used for policy decisions.
68 kind: PolicyFailureKind,
69 },
70}
71
72/// Explicit decision returned by the policy engine.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74pub enum RestartDecision {
75 /// Do not restart the child.
76 DoNotRestart,
77 /// Restart after the supplied delay.
78 RestartAfter {
79 /// Delay before the next restart child_start_count.
80 delay: Duration,
81 },
82 /// Stop automatic restart and place the child in quarantine.
83 Quarantine,
84 /// Escalate the failure to the parent supervisor.
85 EscalateToParent,
86 /// Shut down the whole supervisor tree.
87 ShutdownTree,
88}
89
90/// Stateless restart policy engine.
91#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
92pub struct PolicyEngine;
93
94impl PolicyEngine {
95 /// Creates a policy engine.
96 ///
97 /// # Arguments
98 ///
99 /// This function has no arguments.
100 ///
101 /// # Returns
102 ///
103 /// Returns a [`PolicyEngine`].
104 ///
105 /// # Examples
106 ///
107 /// ```
108 /// let engine = rust_supervisor::policy::decision::PolicyEngine::new();
109 /// let _ = engine;
110 /// ```
111 pub fn new() -> Self {
112 Self
113 }
114
115 /// Decides the restart action for a typed exit.
116 ///
117 /// # Arguments
118 ///
119 /// - `policy`: Restart policy configured for the child.
120 /// - `exit`: Typed task exit.
121 /// - `child_start_count`: One-based restart child_start_count used for backoff.
122 /// - `backoff`: Backoff policy used when a restart is allowed.
123 ///
124 /// # Returns
125 ///
126 /// Returns a [`RestartDecision`] that the runtime can execute.
127 pub fn decide(
128 &self,
129 policy: RestartPolicy,
130 exit: TaskExit,
131 child_start_count: u64,
132 backoff: &BackoffPolicy,
133 ) -> RestartDecision {
134 match exit {
135 TaskExit::Succeeded => self.decide_success(policy, child_start_count, backoff),
136 TaskExit::Failed { kind } => {
137 self.decide_failure(policy, kind, child_start_count, backoff)
138 }
139 }
140 }
141
142 /// Decides behavior after successful completion.
143 ///
144 /// # Arguments
145 ///
146 /// - `policy`: Restart policy configured for the child.
147 /// - `child_start_count`: One-based restart child_start_count used for backoff.
148 /// - `backoff`: Backoff policy used when a restart is allowed.
149 ///
150 /// # Returns
151 ///
152 /// Returns a restart decision for a successful exit.
153 fn decide_success(
154 &self,
155 policy: RestartPolicy,
156 child_start_count: u64,
157 backoff: &BackoffPolicy,
158 ) -> RestartDecision {
159 match policy {
160 RestartPolicy::Permanent => RestartDecision::RestartAfter {
161 delay: backoff.delay_for_child_start_count(child_start_count),
162 },
163 RestartPolicy::Transient | RestartPolicy::Temporary => RestartDecision::DoNotRestart,
164 }
165 }
166
167 /// Decides behavior after a typed failure.
168 ///
169 /// # Arguments
170 ///
171 /// - `policy`: Restart policy configured for the child.
172 /// - `kind`: Failure kind supplied by the runner.
173 /// - `child_start_count`: One-based restart child_start_count used for backoff.
174 /// - `backoff`: Backoff policy used when a restart is allowed.
175 ///
176 /// # Returns
177 ///
178 /// Returns a restart decision for a failed exit.
179 fn decide_failure(
180 &self,
181 policy: RestartPolicy,
182 kind: PolicyFailureKind,
183 child_start_count: u64,
184 backoff: &BackoffPolicy,
185 ) -> RestartDecision {
186 match kind {
187 PolicyFailureKind::FatalConfig => RestartDecision::ShutdownTree,
188 PolicyFailureKind::FatalBug => RestartDecision::EscalateToParent,
189 PolicyFailureKind::Cancelled => RestartDecision::DoNotRestart,
190 _ => self.restartable_failure(policy, child_start_count, backoff),
191 }
192 }
193
194 /// Applies restart policy to a restartable failure.
195 ///
196 /// # Arguments
197 ///
198 /// - `policy`: Restart policy configured for the child.
199 /// - `child_start_count`: One-based restart child_start_count used for backoff.
200 /// - `backoff`: Backoff policy used when a restart is allowed.
201 ///
202 /// # Returns
203 ///
204 /// Returns a restart decision for a restartable failure.
205 fn restartable_failure(
206 &self,
207 policy: RestartPolicy,
208 child_start_count: u64,
209 backoff: &BackoffPolicy,
210 ) -> RestartDecision {
211 match policy {
212 RestartPolicy::Permanent | RestartPolicy::Transient => RestartDecision::RestartAfter {
213 delay: backoff.delay_for_child_start_count(child_start_count),
214 },
215 RestartPolicy::Temporary => RestartDecision::DoNotRestart,
216 }
217 }
218}