Skip to main content

rust_supervisor/spec/
supervisor.rs

1//! Supervisor declaration model.
2//!
3//! This module owns the root and nested supervisor specification shape used by
4//! tree construction and runtime startup.
5
6use crate::error::types::SupervisorError;
7use crate::id::types::{ChildId, SupervisorPath};
8use crate::spec::child::{BackoffPolicy, ChildSpec, HealthPolicy, RestartPolicy, ShutdownPolicy};
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::time::Duration;
12
13/// Strategy used when a child exits and a restart scope is needed.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum SupervisionStrategy {
16    /// Restart only the failed child.
17    OneForOne,
18    /// Restart every child under the same supervisor.
19    OneForAll,
20    /// Restart the failed child and all children declared after it.
21    RestForOne,
22}
23
24/// Policy used when a restart scope cannot remain local.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum EscalationPolicy {
27    /// Escalate the failure to the parent supervisor.
28    EscalateToParent,
29    /// Shut down the current supervisor tree.
30    ShutdownTree,
31    /// Quarantine the selected restart scope.
32    QuarantineScope,
33}
34
35/// Restart budget attached to a supervisor, group, or child override.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub struct RestartBudget {
38    /// Maximum restarts allowed within the window.
39    pub max_restarts: u32,
40    /// Time window used to count restarts.
41    pub window: Duration,
42}
43
44impl RestartBudget {
45    /// Creates a restart budget.
46    ///
47    /// # Arguments
48    ///
49    /// - `max_restarts`: Maximum restart count allowed in the window.
50    /// - `window`: Duration used for the restart count window.
51    ///
52    /// # Returns
53    ///
54    /// Returns a [`RestartBudget`] value.
55    pub fn new(max_restarts: u32, window: Duration) -> Self {
56        Self {
57            max_restarts,
58            window,
59        }
60    }
61}
62
63/// Strategy and governance overrides for a named child group.
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct GroupStrategy {
66    /// Low-cardinality group tag shared by children.
67    pub group: String,
68    /// Restart strategy applied inside the group.
69    pub strategy: SupervisionStrategy,
70    /// Optional restart budget for this group.
71    pub restart_budget: Option<RestartBudget>,
72    /// Optional escalation policy for this group.
73    pub escalation_policy: Option<EscalationPolicy>,
74}
75
76impl GroupStrategy {
77    /// Creates a group strategy.
78    ///
79    /// # Arguments
80    ///
81    /// - `group`: Child tag that identifies the restart group.
82    /// - `strategy`: Restart strategy applied to the group.
83    ///
84    /// # Returns
85    ///
86    /// Returns a [`GroupStrategy`] with no budget or escalation override.
87    pub fn new(group: impl Into<String>, strategy: SupervisionStrategy) -> Self {
88        Self {
89            group: group.into(),
90            strategy,
91            restart_budget: None,
92            escalation_policy: None,
93        }
94    }
95}
96
97/// Per-child strategy and governance override.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct ChildStrategyOverride {
100    /// Child identifier that owns the override.
101    pub child_id: ChildId,
102    /// Restart strategy used when this child fails.
103    pub strategy: SupervisionStrategy,
104    /// Optional restart budget for this child.
105    pub restart_budget: Option<RestartBudget>,
106    /// Optional escalation policy for this child.
107    pub escalation_policy: Option<EscalationPolicy>,
108}
109
110impl ChildStrategyOverride {
111    /// Creates a child strategy override.
112    ///
113    /// # Arguments
114    ///
115    /// - `child_id`: Child identifier that owns the override.
116    /// - `strategy`: Restart strategy used for the child.
117    ///
118    /// # Returns
119    ///
120    /// Returns a [`ChildStrategyOverride`] value.
121    pub fn new(child_id: ChildId, strategy: SupervisionStrategy) -> Self {
122        Self {
123            child_id,
124            strategy,
125            restart_budget: None,
126            escalation_policy: None,
127        }
128    }
129}
130
131/// Dynamic supervisor policy for runtime child additions.
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
133pub struct DynamicSupervisorPolicy {
134    /// Whether runtime child additions are allowed.
135    pub enabled: bool,
136    /// Optional maximum number of declared and dynamic children.
137    pub child_limit: Option<usize>,
138}
139
140impl DynamicSupervisorPolicy {
141    /// Creates an unbounded dynamic supervisor policy.
142    ///
143    /// # Arguments
144    ///
145    /// This function has no arguments.
146    ///
147    /// # Returns
148    ///
149    /// Returns a policy that allows dynamic child additions without a limit.
150    pub fn unbounded() -> Self {
151        Self {
152            enabled: true,
153            child_limit: None,
154        }
155    }
156
157    /// Creates a limited dynamic supervisor policy.
158    ///
159    /// # Arguments
160    ///
161    /// - `child_limit`: Maximum declared and dynamic child count.
162    ///
163    /// # Returns
164    ///
165    /// Returns a policy that allows dynamic additions up to the limit.
166    pub fn limited(child_limit: usize) -> Self {
167        Self {
168            enabled: true,
169            child_limit: Some(child_limit),
170        }
171    }
172
173    /// Reports whether another dynamic child can be added.
174    ///
175    /// # Arguments
176    ///
177    /// - `current_child_count`: Current declared plus dynamic child count.
178    ///
179    /// # Returns
180    ///
181    /// Returns `true` when the next addition is allowed.
182    pub fn allows_addition(&self, current_child_count: usize) -> bool {
183        self.enabled
184            && self
185                .child_limit
186                .is_none_or(|limit| current_child_count < limit)
187    }
188}
189
190/// Restart plan selected after strategy, group, and child overrides are merged.
191#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct StrategyExecutionPlan {
193    /// Child whose exit triggered the plan.
194    pub failed_child: ChildId,
195    /// Strategy selected for this execution.
196    pub strategy: SupervisionStrategy,
197    /// Child identifiers selected for restart.
198    pub scope: Vec<ChildId>,
199    /// Optional group that constrained the scope.
200    pub group: Option<String>,
201    /// Optional restart budget selected for the plan.
202    pub restart_budget: Option<RestartBudget>,
203    /// Optional escalation policy selected for the plan.
204    pub escalation_policy: Option<EscalationPolicy>,
205    /// Whether dynamic supervisor additions are allowed.
206    pub dynamic_supervisor_enabled: bool,
207}
208
209/// Declarative specification for one supervisor node.
210#[derive(Debug, Clone)]
211pub struct SupervisorSpec {
212    /// Stable path for this supervisor.
213    pub path: SupervisorPath,
214    /// Restart scope strategy for child exits.
215    pub strategy: SupervisionStrategy,
216    /// Children in declaration order.
217    pub children: Vec<ChildSpec>,
218    /// Configuration version that produced this declaration.
219    pub config_version: String,
220    /// Restart policy inherited by children that do not override it.
221    pub default_restart_policy: RestartPolicy,
222    /// Backoff policy inherited by children that do not override it.
223    pub default_backoff_policy: BackoffPolicy,
224    /// Health policy inherited by children that do not override it.
225    pub default_health_policy: HealthPolicy,
226    /// Shutdown policy inherited by children that do not override it.
227    pub default_shutdown_policy: ShutdownPolicy,
228    /// Maximum supervisor failures before parent escalation.
229    pub supervisor_failure_limit: u32,
230    /// Optional supervisor-level restart budget.
231    pub restart_budget: Option<RestartBudget>,
232    /// Optional supervisor-level escalation policy.
233    pub escalation_policy: Option<EscalationPolicy>,
234    /// Group-level strategy overrides.
235    pub group_strategies: Vec<GroupStrategy>,
236    /// Child-level strategy overrides.
237    pub child_strategy_overrides: Vec<ChildStrategyOverride>,
238    /// Runtime policy for dynamic child additions.
239    pub dynamic_supervisor_policy: DynamicSupervisorPolicy,
240    /// Control command channel capacity.
241    pub control_channel_capacity: usize,
242    /// Event broadcast channel capacity.
243    pub event_channel_capacity: usize,
244}
245
246impl SupervisorSpec {
247    /// Creates a root supervisor specification.
248    ///
249    /// # Arguments
250    ///
251    /// - `children`: Children declared under the root supervisor.
252    ///
253    /// # Returns
254    ///
255    /// Returns a root [`SupervisorSpec`] with declaration-order children.
256    ///
257    /// # Examples
258    ///
259    /// ```
260    /// let spec = rust_supervisor::spec::supervisor::SupervisorSpec::root(Vec::new());
261    /// assert_eq!(spec.path.to_string(), "/");
262    /// ```
263    pub fn root(children: Vec<ChildSpec>) -> Self {
264        let channel_capacity = channel_capacity_for_children(children.len());
265        Self {
266            path: SupervisorPath::root(),
267            strategy: SupervisionStrategy::OneForOne,
268            children,
269            config_version: String::from("unversioned"),
270            default_restart_policy: RestartPolicy::Transient,
271            default_backoff_policy: BackoffPolicy::new(
272                Duration::from_millis(10),
273                Duration::from_secs(1),
274                0.0,
275            ),
276            default_health_policy: HealthPolicy::new(
277                Duration::from_secs(1),
278                Duration::from_secs(3),
279            ),
280            default_shutdown_policy: ShutdownPolicy::new(
281                Duration::from_secs(5),
282                Duration::from_secs(1),
283            ),
284            supervisor_failure_limit: 1,
285            restart_budget: None,
286            escalation_policy: None,
287            group_strategies: Vec::new(),
288            child_strategy_overrides: Vec::new(),
289            dynamic_supervisor_policy: DynamicSupervisorPolicy::unbounded(),
290            control_channel_capacity: channel_capacity,
291            event_channel_capacity: channel_capacity.saturating_mul(2),
292        }
293    }
294
295    /// Validates this supervisor and its direct children.
296    ///
297    /// # Arguments
298    ///
299    /// This function has no arguments.
300    ///
301    /// # Returns
302    ///
303    /// Returns `Ok(())` when the supervisor declaration is usable.
304    pub fn validate(&self) -> Result<(), SupervisorError> {
305        if self.config_version.trim().is_empty() {
306            return Err(SupervisorError::fatal_config(
307                "config version must not be empty",
308            ));
309        }
310        if self.supervisor_failure_limit == 0 {
311            return Err(SupervisorError::fatal_config(
312                "supervisor failure limit must be greater than zero",
313            ));
314        }
315        if self.control_channel_capacity == 0 {
316            return Err(SupervisorError::fatal_config(
317                "control channel capacity must be greater than zero",
318            ));
319        }
320        if self.event_channel_capacity == 0 {
321            return Err(SupervisorError::fatal_config(
322                "event channel capacity must be greater than zero",
323            ));
324        }
325        for child in &self.children {
326            child.validate()?;
327        }
328        validate_restart_budget(self.restart_budget)?;
329        validate_group_strategies(&self.group_strategies, &self.children)?;
330        validate_child_strategy_overrides(self)?;
331        validate_dynamic_policy(self.dynamic_supervisor_policy)?;
332        Ok(())
333    }
334}
335
336/// Validates an optional restart budget.
337///
338/// # Arguments
339///
340/// - `budget`: Optional restart budget to validate.
341///
342/// # Returns
343///
344/// Returns `Ok(())` when the budget is absent or valid.
345fn validate_restart_budget(budget: Option<RestartBudget>) -> Result<(), SupervisorError> {
346    let Some(budget) = budget else {
347        return Ok(());
348    };
349    if budget.max_restarts == 0 {
350        return Err(SupervisorError::fatal_config(
351            "restart budget max_restarts must be greater than zero",
352        ));
353    }
354    if budget.window.is_zero() {
355        return Err(SupervisorError::fatal_config(
356            "restart budget window must be greater than zero",
357        ));
358    }
359    Ok(())
360}
361
362/// Validates group strategy declarations.
363///
364/// # Arguments
365///
366/// - `strategies`: Group strategies declared on the supervisor.
367///
368/// # Returns
369///
370/// Returns `Ok(())` when group names are unique and valid.
371fn validate_group_strategies(
372    strategies: &[GroupStrategy],
373    children: &[ChildSpec],
374) -> Result<(), SupervisorError> {
375    let mut groups = HashSet::new();
376    for strategy in strategies {
377        if strategy.group.trim().is_empty() {
378            return Err(SupervisorError::fatal_config(
379                "group strategy group must not be empty",
380            ));
381        }
382        if !groups.insert(strategy.group.clone()) {
383            return Err(SupervisorError::fatal_config(format!(
384                "duplicate group strategy: {}",
385                strategy.group
386            )));
387        }
388        validate_restart_budget(strategy.restart_budget)?;
389    }
390    validate_group_membership(strategies, children)?;
391    Ok(())
392}
393
394/// Validates child membership against configured restart groups.
395///
396/// # Arguments
397///
398/// - `strategies`: Group strategies declared on the supervisor.
399/// - `children`: Children declared under the supervisor.
400///
401/// # Returns
402///
403/// Returns `Ok(())` when every configured group is used without ambiguity.
404fn validate_group_membership(
405    strategies: &[GroupStrategy],
406    children: &[ChildSpec],
407) -> Result<(), SupervisorError> {
408    let groups = strategies
409        .iter()
410        .map(|strategy| strategy.group.clone())
411        .collect::<HashSet<_>>();
412    for strategy in strategies {
413        if !children
414            .iter()
415            .any(|child| child.tags.contains(&strategy.group))
416        {
417            return Err(SupervisorError::fatal_config(format!(
418                "group strategy references unused group: {}",
419                strategy.group
420            )));
421        }
422    }
423    for child in children {
424        let configured_group_count = child
425            .tags
426            .iter()
427            .filter(|tag| groups.contains(*tag))
428            .count();
429        if configured_group_count > 1 {
430            return Err(SupervisorError::fatal_config(format!(
431                "child strategy groups are ambiguous for child: {}",
432                child.id
433            )));
434        }
435    }
436    Ok(())
437}
438
439/// Validates child strategy overrides.
440///
441/// # Arguments
442///
443/// - `spec`: Supervisor specification that owns children and overrides.
444///
445/// # Returns
446///
447/// Returns `Ok(())` when every override targets a known child once.
448fn validate_child_strategy_overrides(spec: &SupervisorSpec) -> Result<(), SupervisorError> {
449    let child_ids = spec
450        .children
451        .iter()
452        .map(|child| child.id.clone())
453        .collect::<HashSet<_>>();
454    let mut overrides = HashSet::new();
455    for strategy in &spec.child_strategy_overrides {
456        if !child_ids.contains(&strategy.child_id) {
457            return Err(SupervisorError::fatal_config(format!(
458                "child strategy override references unknown child: {}",
459                strategy.child_id
460            )));
461        }
462        if !overrides.insert(strategy.child_id.clone()) {
463            return Err(SupervisorError::fatal_config(format!(
464                "duplicate child strategy override: {}",
465                strategy.child_id
466            )));
467        }
468        validate_restart_budget(strategy.restart_budget)?;
469    }
470    Ok(())
471}
472
473/// Validates dynamic supervisor policy.
474///
475/// # Arguments
476///
477/// - `policy`: Dynamic supervisor policy to validate.
478///
479/// # Returns
480///
481/// Returns `Ok(())` when the policy limit is coherent.
482fn validate_dynamic_policy(policy: DynamicSupervisorPolicy) -> Result<(), SupervisorError> {
483    if policy.child_limit == Some(0) {
484        return Err(SupervisorError::fatal_config(
485            "dynamic supervisor child_limit must be greater than zero",
486        ));
487    }
488    Ok(())
489}
490
491/// Derives a channel capacity from declared children.
492///
493/// # Arguments
494///
495/// - `child_count`: Number of children declared under the supervisor.
496///
497/// # Returns
498///
499/// Returns a non-zero channel capacity.
500fn channel_capacity_for_children(child_count: usize) -> usize {
501    child_count.saturating_add(1)
502}