rust_supervisor/spec/supervisor.rs
1//! Supervisor declaration model.
2//!
3//! This module owns the root and nested supervisor specification shape used by
4//! tree construction and runtime startup.
5
6use crate::error::types::SupervisorError;
7use crate::id::types::{ChildId, SupervisorPath};
8use crate::spec::child::{BackoffPolicy, ChildSpec, HealthPolicy, RestartPolicy, ShutdownPolicy};
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::time::Duration;
12
13/// Strategy used when a child exits and a restart scope is needed.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum SupervisionStrategy {
16 /// Restart only the failed child.
17 OneForOne,
18 /// Restart every child under the same supervisor.
19 OneForAll,
20 /// Restart the failed child and all children declared after it.
21 RestForOne,
22}
23
24/// Policy used when a restart scope cannot remain local.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum EscalationPolicy {
27 /// Escalate the failure to the parent supervisor.
28 EscalateToParent,
29 /// Shut down the current supervisor tree.
30 ShutdownTree,
31 /// Quarantine the selected restart scope.
32 QuarantineScope,
33}
34
35/// Restart budget attached to a supervisor, group, or child override.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub struct RestartBudget {
38 /// Maximum restarts allowed within the window.
39 pub max_restarts: u32,
40 /// Time window used to count restarts.
41 pub window: Duration,
42}
43
44impl RestartBudget {
45 /// Creates a restart budget.
46 ///
47 /// # Arguments
48 ///
49 /// - `max_restarts`: Maximum restart count allowed in the window.
50 /// - `window`: Duration used for the restart count window.
51 ///
52 /// # Returns
53 ///
54 /// Returns a [`RestartBudget`] value.
55 pub fn new(max_restarts: u32, window: Duration) -> Self {
56 Self {
57 max_restarts,
58 window,
59 }
60 }
61}
62
63/// Strategy and governance overrides for a named child group.
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct GroupStrategy {
66 /// Low-cardinality group tag shared by children.
67 pub group: String,
68 /// Restart strategy applied inside the group.
69 pub strategy: SupervisionStrategy,
70 /// Optional restart budget for this group.
71 pub restart_budget: Option<RestartBudget>,
72 /// Optional escalation policy for this group.
73 pub escalation_policy: Option<EscalationPolicy>,
74}
75
76impl GroupStrategy {
77 /// Creates a group strategy.
78 ///
79 /// # Arguments
80 ///
81 /// - `group`: Child tag that identifies the restart group.
82 /// - `strategy`: Restart strategy applied to the group.
83 ///
84 /// # Returns
85 ///
86 /// Returns a [`GroupStrategy`] with no budget or escalation override.
87 pub fn new(group: impl Into<String>, strategy: SupervisionStrategy) -> Self {
88 Self {
89 group: group.into(),
90 strategy,
91 restart_budget: None,
92 escalation_policy: None,
93 }
94 }
95}
96
97/// Per-child strategy and governance override.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct ChildStrategyOverride {
100 /// Child identifier that owns the override.
101 pub child_id: ChildId,
102 /// Restart strategy used when this child fails.
103 pub strategy: SupervisionStrategy,
104 /// Optional restart budget for this child.
105 pub restart_budget: Option<RestartBudget>,
106 /// Optional escalation policy for this child.
107 pub escalation_policy: Option<EscalationPolicy>,
108}
109
110impl ChildStrategyOverride {
111 /// Creates a child strategy override.
112 ///
113 /// # Arguments
114 ///
115 /// - `child_id`: Child identifier that owns the override.
116 /// - `strategy`: Restart strategy used for the child.
117 ///
118 /// # Returns
119 ///
120 /// Returns a [`ChildStrategyOverride`] value.
121 pub fn new(child_id: ChildId, strategy: SupervisionStrategy) -> Self {
122 Self {
123 child_id,
124 strategy,
125 restart_budget: None,
126 escalation_policy: None,
127 }
128 }
129}
130
131/// Dynamic supervisor policy for runtime child additions.
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
133pub struct DynamicSupervisorPolicy {
134 /// Whether runtime child additions are allowed.
135 pub enabled: bool,
136 /// Optional maximum number of declared and dynamic children.
137 pub child_limit: Option<usize>,
138}
139
140impl DynamicSupervisorPolicy {
141 /// Creates an unbounded dynamic supervisor policy.
142 ///
143 /// # Arguments
144 ///
145 /// This function has no arguments.
146 ///
147 /// # Returns
148 ///
149 /// Returns a policy that allows dynamic child additions without a limit.
150 pub fn unbounded() -> Self {
151 Self {
152 enabled: true,
153 child_limit: None,
154 }
155 }
156
157 /// Creates a limited dynamic supervisor policy.
158 ///
159 /// # Arguments
160 ///
161 /// - `child_limit`: Maximum declared and dynamic child count.
162 ///
163 /// # Returns
164 ///
165 /// Returns a policy that allows dynamic additions up to the limit.
166 pub fn limited(child_limit: usize) -> Self {
167 Self {
168 enabled: true,
169 child_limit: Some(child_limit),
170 }
171 }
172
173 /// Reports whether another dynamic child can be added.
174 ///
175 /// # Arguments
176 ///
177 /// - `current_child_count`: Current declared plus dynamic child count.
178 ///
179 /// # Returns
180 ///
181 /// Returns `true` when the next addition is allowed.
182 pub fn allows_addition(&self, current_child_count: usize) -> bool {
183 self.enabled
184 && self
185 .child_limit
186 .is_none_or(|limit| current_child_count < limit)
187 }
188}
189
190/// Restart plan selected after strategy, group, and child overrides are merged.
191#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct StrategyExecutionPlan {
193 /// Child whose exit triggered the plan.
194 pub failed_child: ChildId,
195 /// Strategy selected for this execution.
196 pub strategy: SupervisionStrategy,
197 /// Child identifiers selected for restart.
198 pub scope: Vec<ChildId>,
199 /// Optional group that constrained the scope.
200 pub group: Option<String>,
201 /// Optional restart budget selected for the plan.
202 pub restart_budget: Option<RestartBudget>,
203 /// Optional escalation policy selected for the plan.
204 pub escalation_policy: Option<EscalationPolicy>,
205 /// Whether dynamic supervisor additions are allowed.
206 pub dynamic_supervisor_enabled: bool,
207}
208
209/// Declarative specification for one supervisor node.
210#[derive(Debug, Clone)]
211pub struct SupervisorSpec {
212 /// Stable path for this supervisor.
213 pub path: SupervisorPath,
214 /// Restart scope strategy for child exits.
215 pub strategy: SupervisionStrategy,
216 /// Children in declaration order.
217 pub children: Vec<ChildSpec>,
218 /// Configuration version that produced this declaration.
219 pub config_version: String,
220 /// Restart policy inherited by children that do not override it.
221 pub default_restart_policy: RestartPolicy,
222 /// Backoff policy inherited by children that do not override it.
223 pub default_backoff_policy: BackoffPolicy,
224 /// Health policy inherited by children that do not override it.
225 pub default_health_policy: HealthPolicy,
226 /// Shutdown policy inherited by children that do not override it.
227 pub default_shutdown_policy: ShutdownPolicy,
228 /// Maximum supervisor failures before parent escalation.
229 pub supervisor_failure_limit: u32,
230 /// Optional supervisor-level restart budget.
231 pub restart_budget: Option<RestartBudget>,
232 /// Optional supervisor-level escalation policy.
233 pub escalation_policy: Option<EscalationPolicy>,
234 /// Group-level strategy overrides.
235 pub group_strategies: Vec<GroupStrategy>,
236 /// Child-level strategy overrides.
237 pub child_strategy_overrides: Vec<ChildStrategyOverride>,
238 /// Runtime policy for dynamic child additions.
239 pub dynamic_supervisor_policy: DynamicSupervisorPolicy,
240 /// Control command channel capacity.
241 pub control_channel_capacity: usize,
242 /// Event broadcast channel capacity.
243 pub event_channel_capacity: usize,
244}
245
246impl SupervisorSpec {
247 /// Creates a root supervisor specification.
248 ///
249 /// # Arguments
250 ///
251 /// - `children`: Children declared under the root supervisor.
252 ///
253 /// # Returns
254 ///
255 /// Returns a root [`SupervisorSpec`] with declaration-order children.
256 ///
257 /// # Examples
258 ///
259 /// ```
260 /// let spec = rust_supervisor::spec::supervisor::SupervisorSpec::root(Vec::new());
261 /// assert_eq!(spec.path.to_string(), "/");
262 /// ```
263 pub fn root(children: Vec<ChildSpec>) -> Self {
264 let channel_capacity = channel_capacity_for_children(children.len());
265 Self {
266 path: SupervisorPath::root(),
267 strategy: SupervisionStrategy::OneForOne,
268 children,
269 config_version: String::from("unversioned"),
270 default_restart_policy: RestartPolicy::Transient,
271 default_backoff_policy: BackoffPolicy::new(
272 Duration::from_millis(10),
273 Duration::from_secs(1),
274 0.0,
275 ),
276 default_health_policy: HealthPolicy::new(
277 Duration::from_secs(1),
278 Duration::from_secs(3),
279 ),
280 default_shutdown_policy: ShutdownPolicy::new(
281 Duration::from_secs(5),
282 Duration::from_secs(1),
283 ),
284 supervisor_failure_limit: 1,
285 restart_budget: None,
286 escalation_policy: None,
287 group_strategies: Vec::new(),
288 child_strategy_overrides: Vec::new(),
289 dynamic_supervisor_policy: DynamicSupervisorPolicy::unbounded(),
290 control_channel_capacity: channel_capacity,
291 event_channel_capacity: channel_capacity.saturating_mul(2),
292 }
293 }
294
295 /// Validates this supervisor and its direct children.
296 ///
297 /// # Arguments
298 ///
299 /// This function has no arguments.
300 ///
301 /// # Returns
302 ///
303 /// Returns `Ok(())` when the supervisor declaration is usable.
304 pub fn validate(&self) -> Result<(), SupervisorError> {
305 if self.config_version.trim().is_empty() {
306 return Err(SupervisorError::fatal_config(
307 "config version must not be empty",
308 ));
309 }
310 if self.supervisor_failure_limit == 0 {
311 return Err(SupervisorError::fatal_config(
312 "supervisor failure limit must be greater than zero",
313 ));
314 }
315 if self.control_channel_capacity == 0 {
316 return Err(SupervisorError::fatal_config(
317 "control channel capacity must be greater than zero",
318 ));
319 }
320 if self.event_channel_capacity == 0 {
321 return Err(SupervisorError::fatal_config(
322 "event channel capacity must be greater than zero",
323 ));
324 }
325 for child in &self.children {
326 child.validate()?;
327 }
328 validate_restart_budget(self.restart_budget)?;
329 validate_group_strategies(&self.group_strategies, &self.children)?;
330 validate_child_strategy_overrides(self)?;
331 validate_dynamic_policy(self.dynamic_supervisor_policy)?;
332 Ok(())
333 }
334}
335
336/// Validates an optional restart budget.
337///
338/// # Arguments
339///
340/// - `budget`: Optional restart budget to validate.
341///
342/// # Returns
343///
344/// Returns `Ok(())` when the budget is absent or valid.
345fn validate_restart_budget(budget: Option<RestartBudget>) -> Result<(), SupervisorError> {
346 let Some(budget) = budget else {
347 return Ok(());
348 };
349 if budget.max_restarts == 0 {
350 return Err(SupervisorError::fatal_config(
351 "restart budget max_restarts must be greater than zero",
352 ));
353 }
354 if budget.window.is_zero() {
355 return Err(SupervisorError::fatal_config(
356 "restart budget window must be greater than zero",
357 ));
358 }
359 Ok(())
360}
361
362/// Validates group strategy declarations.
363///
364/// # Arguments
365///
366/// - `strategies`: Group strategies declared on the supervisor.
367///
368/// # Returns
369///
370/// Returns `Ok(())` when group names are unique and valid.
371fn validate_group_strategies(
372 strategies: &[GroupStrategy],
373 children: &[ChildSpec],
374) -> Result<(), SupervisorError> {
375 let mut groups = HashSet::new();
376 for strategy in strategies {
377 if strategy.group.trim().is_empty() {
378 return Err(SupervisorError::fatal_config(
379 "group strategy group must not be empty",
380 ));
381 }
382 if !groups.insert(strategy.group.clone()) {
383 return Err(SupervisorError::fatal_config(format!(
384 "duplicate group strategy: {}",
385 strategy.group
386 )));
387 }
388 validate_restart_budget(strategy.restart_budget)?;
389 }
390 validate_group_membership(strategies, children)?;
391 Ok(())
392}
393
394/// Validates child membership against configured restart groups.
395///
396/// # Arguments
397///
398/// - `strategies`: Group strategies declared on the supervisor.
399/// - `children`: Children declared under the supervisor.
400///
401/// # Returns
402///
403/// Returns `Ok(())` when every configured group is used without ambiguity.
404fn validate_group_membership(
405 strategies: &[GroupStrategy],
406 children: &[ChildSpec],
407) -> Result<(), SupervisorError> {
408 let groups = strategies
409 .iter()
410 .map(|strategy| strategy.group.clone())
411 .collect::<HashSet<_>>();
412 for strategy in strategies {
413 if !children
414 .iter()
415 .any(|child| child.tags.contains(&strategy.group))
416 {
417 return Err(SupervisorError::fatal_config(format!(
418 "group strategy references unused group: {}",
419 strategy.group
420 )));
421 }
422 }
423 for child in children {
424 let configured_group_count = child
425 .tags
426 .iter()
427 .filter(|tag| groups.contains(*tag))
428 .count();
429 if configured_group_count > 1 {
430 return Err(SupervisorError::fatal_config(format!(
431 "child strategy groups are ambiguous for child: {}",
432 child.id
433 )));
434 }
435 }
436 Ok(())
437}
438
439/// Validates child strategy overrides.
440///
441/// # Arguments
442///
443/// - `spec`: Supervisor specification that owns children and overrides.
444///
445/// # Returns
446///
447/// Returns `Ok(())` when every override targets a known child once.
448fn validate_child_strategy_overrides(spec: &SupervisorSpec) -> Result<(), SupervisorError> {
449 let child_ids = spec
450 .children
451 .iter()
452 .map(|child| child.id.clone())
453 .collect::<HashSet<_>>();
454 let mut overrides = HashSet::new();
455 for strategy in &spec.child_strategy_overrides {
456 if !child_ids.contains(&strategy.child_id) {
457 return Err(SupervisorError::fatal_config(format!(
458 "child strategy override references unknown child: {}",
459 strategy.child_id
460 )));
461 }
462 if !overrides.insert(strategy.child_id.clone()) {
463 return Err(SupervisorError::fatal_config(format!(
464 "duplicate child strategy override: {}",
465 strategy.child_id
466 )));
467 }
468 validate_restart_budget(strategy.restart_budget)?;
469 }
470 Ok(())
471}
472
473/// Validates dynamic supervisor policy.
474///
475/// # Arguments
476///
477/// - `policy`: Dynamic supervisor policy to validate.
478///
479/// # Returns
480///
481/// Returns `Ok(())` when the policy limit is coherent.
482fn validate_dynamic_policy(policy: DynamicSupervisorPolicy) -> Result<(), SupervisorError> {
483 if policy.child_limit == Some(0) {
484 return Err(SupervisorError::fatal_config(
485 "dynamic supervisor child_limit must be greater than zero",
486 ));
487 }
488 Ok(())
489}
490
491/// Derives a channel capacity from declared children.
492///
493/// # Arguments
494///
495/// - `child_count`: Number of children declared under the supervisor.
496///
497/// # Returns
498///
499/// Returns a non-zero channel capacity.
500fn channel_capacity_for_children(child_count: usize) -> usize {
501 child_count.saturating_add(1)
502}