Skip to main content

rust_supervisor/policy/
decision.rs

1//! Restart policy decisions for typed task exits.
2//!
3//! The module converts typed exits into explicit restart decisions. It does not
4//! inspect string messages and it does not own runtime state.
5
6use crate::error::types::TaskFailureKind;
7use crate::policy::backoff::BackoffPolicy;
8// Re-export ProtectionAction from event payload for policy decision usage.
9// This is the protection restrictiveness ladder with six tiers:
10// restart_allowed → restart_queued → restart_denied → supervision_paused → escalated → supervised_stop
11pub use crate::event::payload::ProtectionAction;
12use serde::{Deserialize, Serialize};
13use std::time::Duration;
14
15/// Rule that decides whether a task exit is restartable.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17pub enum RestartPolicy {
18    /// Restart after both successful and failed exits.
19    Permanent,
20    /// Restart after failed exits only.
21    Transient,
22    /// Never restart automatically.
23    Temporary,
24}
25
26/// Failure category consumed by the policy engine.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28pub enum PolicyFailureKind {
29    /// A failure that may succeed on a later child_start_count.
30    Recoverable,
31    /// A configuration error that should stop the tree.
32    FatalConfig,
33    /// A code defect that should be escalated.
34    FatalBug,
35    /// A dependency failure that may be recoverable.
36    ExternalDependency,
37    /// The task exceeded its runtime budget.
38    Timeout,
39    /// The task panicked.
40    Panic,
41    /// The task was cancelled intentionally.
42    Cancelled,
43    /// The task missed its heartbeat budget.
44    Unhealthy,
45}
46
47impl From<TaskFailureKind> for PolicyFailureKind {
48    /// Maps a task failure kind into a policy failure kind.
49    fn from(value: TaskFailureKind) -> Self {
50        match value {
51            TaskFailureKind::Error => Self::Recoverable,
52            TaskFailureKind::Panic => Self::Panic,
53            TaskFailureKind::Timeout => Self::Timeout,
54            TaskFailureKind::Unhealthy => Self::Unhealthy,
55            TaskFailureKind::Cancelled => Self::Cancelled,
56        }
57    }
58}
59
60/// Typed exit information supplied to restart policy.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62pub enum TaskExit {
63    /// The task completed successfully.
64    Succeeded,
65    /// The task failed with a typed category.
66    Failed {
67        /// Failure category used for policy decisions.
68        kind: PolicyFailureKind,
69    },
70}
71
72/// Explicit decision returned by the policy engine.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74pub enum RestartDecision {
75    /// Do not restart the child.
76    DoNotRestart,
77    /// Restart after the supplied delay.
78    RestartAfter {
79        /// Delay before the next restart child_start_count.
80        delay: Duration,
81    },
82    /// Stop automatic restart and place the child in quarantine.
83    Quarantine,
84    /// Escalate the failure to the parent supervisor.
85    EscalateToParent,
86    /// Shut down the whole supervisor tree.
87    ShutdownTree,
88}
89
90/// Stateless restart policy engine.
91#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
92pub struct PolicyEngine;
93
94impl PolicyEngine {
95    /// Creates a policy engine.
96    ///
97    /// # Arguments
98    ///
99    /// This function has no arguments.
100    ///
101    /// # Returns
102    ///
103    /// Returns a [`PolicyEngine`].
104    ///
105    /// # Examples
106    ///
107    /// ```
108    /// let engine = rust_supervisor::policy::decision::PolicyEngine::new();
109    /// let _ = engine;
110    /// ```
111    pub fn new() -> Self {
112        Self
113    }
114
115    /// Decides the restart action for a typed exit.
116    ///
117    /// # Arguments
118    ///
119    /// - `policy`: Restart policy configured for the child.
120    /// - `exit`: Typed task exit.
121    /// - `child_start_count`: One-based restart child_start_count used for backoff.
122    /// - `backoff`: Backoff policy used when a restart is allowed.
123    ///
124    /// # Returns
125    ///
126    /// Returns a [`RestartDecision`] that the runtime can execute.
127    pub fn decide(
128        &self,
129        policy: RestartPolicy,
130        exit: TaskExit,
131        child_start_count: u64,
132        backoff: &BackoffPolicy,
133    ) -> RestartDecision {
134        match exit {
135            TaskExit::Succeeded => self.decide_success(policy, child_start_count, backoff),
136            TaskExit::Failed { kind } => {
137                self.decide_failure(policy, kind, child_start_count, backoff)
138            }
139        }
140    }
141
142    /// Decides behavior after successful completion.
143    ///
144    /// # Arguments
145    ///
146    /// - `policy`: Restart policy configured for the child.
147    /// - `child_start_count`: One-based restart child_start_count used for backoff.
148    /// - `backoff`: Backoff policy used when a restart is allowed.
149    ///
150    /// # Returns
151    ///
152    /// Returns a restart decision for a successful exit.
153    fn decide_success(
154        &self,
155        policy: RestartPolicy,
156        child_start_count: u64,
157        backoff: &BackoffPolicy,
158    ) -> RestartDecision {
159        match policy {
160            RestartPolicy::Permanent => RestartDecision::RestartAfter {
161                delay: backoff.delay_for_child_start_count(child_start_count),
162            },
163            RestartPolicy::Transient | RestartPolicy::Temporary => RestartDecision::DoNotRestart,
164        }
165    }
166
167    /// Decides behavior after a typed failure.
168    ///
169    /// # Arguments
170    ///
171    /// - `policy`: Restart policy configured for the child.
172    /// - `kind`: Failure kind supplied by the runner.
173    /// - `child_start_count`: One-based restart child_start_count used for backoff.
174    /// - `backoff`: Backoff policy used when a restart is allowed.
175    ///
176    /// # Returns
177    ///
178    /// Returns a restart decision for a failed exit.
179    fn decide_failure(
180        &self,
181        policy: RestartPolicy,
182        kind: PolicyFailureKind,
183        child_start_count: u64,
184        backoff: &BackoffPolicy,
185    ) -> RestartDecision {
186        match kind {
187            PolicyFailureKind::FatalConfig => RestartDecision::ShutdownTree,
188            PolicyFailureKind::FatalBug => RestartDecision::EscalateToParent,
189            PolicyFailureKind::Cancelled => RestartDecision::DoNotRestart,
190            _ => self.restartable_failure(policy, child_start_count, backoff),
191        }
192    }
193
194    /// Applies restart policy to a restartable failure.
195    ///
196    /// # Arguments
197    ///
198    /// - `policy`: Restart policy configured for the child.
199    /// - `child_start_count`: One-based restart child_start_count used for backoff.
200    /// - `backoff`: Backoff policy used when a restart is allowed.
201    ///
202    /// # Returns
203    ///
204    /// Returns a restart decision for a restartable failure.
205    fn restartable_failure(
206        &self,
207        policy: RestartPolicy,
208        child_start_count: u64,
209        backoff: &BackoffPolicy,
210    ) -> RestartDecision {
211        match policy {
212            RestartPolicy::Permanent | RestartPolicy::Transient => RestartDecision::RestartAfter {
213                delay: backoff.delay_for_child_start_count(child_start_count),
214            },
215            RestartPolicy::Temporary => RestartDecision::DoNotRestart,
216        }
217    }
218}