rust_supervisor/spec/supervisor.rs
1//! Supervisor declaration model.
2//!
3//! This module owns the root and nested supervisor specification shape used by
4//! tree construction and runtime startup.
5
6use crate::error::types::SupervisorError;
7use crate::id::types::{ChildId, SupervisorPath};
8use crate::spec::child::{BackoffPolicy, ChildSpec, HealthPolicy, RestartPolicy, ShutdownPolicy};
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use std::collections::HashSet;
12use std::time::Duration;
13
14/// Strategy used when a child exits and a restart scope is needed.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
16pub enum SupervisionStrategy {
17 /// Restart only the failed child.
18 OneForOne,
19 /// Restart every child under the same supervisor.
20 OneForAll,
21 /// Restart the failed child and all children declared after it.
22 RestForOne,
23}
24
25/// Policy used when a restart scope cannot remain local.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum EscalationPolicy {
28 /// Escalate the failure to the parent supervisor.
29 EscalateToParent,
30 /// Shut down the current supervisor tree.
31 ShutdownTree,
32 /// Quarantine the selected restart scope.
33 QuarantineScope,
34}
35
36/// Restart budget attached to a supervisor, group, or child override.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct RestartBudget {
39 /// Maximum restarts allowed within the window.
40 pub max_restarts: u32,
41 /// Time window used to count restarts.
42 pub window: Duration,
43}
44
45impl RestartBudget {
46 /// Creates a restart budget.
47 ///
48 /// # Arguments
49 ///
50 /// - `max_restarts`: Maximum restart count allowed in the window.
51 /// - `window`: Duration used for the restart count window.
52 ///
53 /// # Returns
54 ///
55 /// Returns a [`RestartBudget`] value.
56 pub fn new(max_restarts: u32, window: Duration) -> Self {
57 Self {
58 max_restarts,
59 window,
60 }
61 }
62}
63
64/// Strategy and governance overrides for a named child group.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct GroupStrategy {
67 /// Low-cardinality group tag shared by children.
68 pub group: String,
69 /// Restart strategy applied inside the group.
70 pub strategy: SupervisionStrategy,
71 /// Optional restart budget for this group.
72 pub restart_budget: Option<RestartBudget>,
73 /// Optional escalation policy for this group.
74 pub escalation_policy: Option<EscalationPolicy>,
75}
76
77impl GroupStrategy {
78 /// Creates a group strategy.
79 ///
80 /// # Arguments
81 ///
82 /// - `group`: Child tag that identifies the restart group.
83 /// - `strategy`: Restart strategy applied to the group.
84 ///
85 /// # Returns
86 ///
87 /// Returns a [`GroupStrategy`] with no budget or escalation override.
88 pub fn new(group: impl Into<String>, strategy: SupervisionStrategy) -> Self {
89 Self {
90 group: group.into(),
91 strategy,
92 restart_budget: None,
93 escalation_policy: None,
94 }
95 }
96}
97
98/// Per-child strategy and governance override.
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub struct ChildStrategyOverride {
101 /// Child identifier that owns the override.
102 pub child_id: ChildId,
103 /// Restart strategy used when this child fails.
104 pub strategy: SupervisionStrategy,
105 /// Optional restart budget for this child.
106 pub restart_budget: Option<RestartBudget>,
107 /// Optional escalation policy for this child.
108 pub escalation_policy: Option<EscalationPolicy>,
109}
110
111impl ChildStrategyOverride {
112 /// Creates a child strategy override.
113 ///
114 /// # Arguments
115 ///
116 /// - `child_id`: Child identifier that owns the override.
117 /// - `strategy`: Restart strategy used for the child.
118 ///
119 /// # Returns
120 ///
121 /// Returns a [`ChildStrategyOverride`] value.
122 pub fn new(child_id: ChildId, strategy: SupervisionStrategy) -> Self {
123 Self {
124 child_id,
125 strategy,
126 restart_budget: None,
127 escalation_policy: None,
128 }
129 }
130}
131
132/// Dynamic supervisor policy for runtime child additions.
133#[derive(Debug, Clone, Copy, PartialEq, Eq)]
134pub struct DynamicSupervisorPolicy {
135 /// Whether runtime child additions are allowed.
136 pub enabled: bool,
137 /// Optional maximum number of declared and dynamic children.
138 pub child_limit: Option<usize>,
139}
140
141impl DynamicSupervisorPolicy {
142 /// Creates an unbounded dynamic supervisor policy.
143 ///
144 /// # Arguments
145 ///
146 /// This function has no arguments.
147 ///
148 /// # Returns
149 ///
150 /// Returns a policy that allows dynamic child additions without a limit.
151 pub fn unbounded() -> Self {
152 Self {
153 enabled: true,
154 child_limit: None,
155 }
156 }
157
158 /// Creates a limited dynamic supervisor policy.
159 ///
160 /// # Arguments
161 ///
162 /// - `child_limit`: Maximum declared and dynamic child count.
163 ///
164 /// # Returns
165 ///
166 /// Returns a policy that allows dynamic additions up to the limit.
167 pub fn limited(child_limit: usize) -> Self {
168 Self {
169 enabled: true,
170 child_limit: Some(child_limit),
171 }
172 }
173
174 /// Reports whether another dynamic child can be added.
175 ///
176 /// # Arguments
177 ///
178 /// - `current_child_count`: Current declared plus dynamic child count.
179 ///
180 /// # Returns
181 ///
182 /// Returns `true` when the next addition is allowed.
183 pub fn allows_addition(&self, current_child_count: usize) -> bool {
184 self.enabled
185 && self
186 .child_limit
187 .is_none_or(|limit| current_child_count < limit)
188 }
189}
190
191/// Restart plan selected after strategy, group, and child overrides are merged.
192#[derive(Debug, Clone, PartialEq, Eq)]
193pub struct StrategyExecutionPlan {
194 /// Child whose exit triggered the plan.
195 pub failed_child: ChildId,
196 /// Strategy selected for this execution.
197 pub strategy: SupervisionStrategy,
198 /// Child identifiers selected for restart.
199 pub scope: Vec<ChildId>,
200 /// Optional group that constrained the scope.
201 pub group: Option<String>,
202 /// Optional restart budget selected for the plan.
203 pub restart_budget: Option<RestartBudget>,
204 /// Optional escalation policy selected for the plan.
205 pub escalation_policy: Option<EscalationPolicy>,
206 /// Whether dynamic supervisor additions are allowed.
207 pub dynamic_supervisor_enabled: bool,
208}
209
210/// Declarative specification for one supervisor node.
211#[derive(Debug, Clone)]
212pub struct SupervisorSpec {
213 /// Stable path for this supervisor.
214 pub path: SupervisorPath,
215 /// Restart scope strategy for child exits.
216 pub strategy: SupervisionStrategy,
217 /// Children in declaration order.
218 pub children: Vec<ChildSpec>,
219 /// Configuration version that produced this declaration.
220 pub config_version: String,
221 /// Restart policy inherited by children that do not override it.
222 pub default_restart_policy: RestartPolicy,
223 /// Backoff policy inherited by children that do not override it.
224 pub default_backoff_policy: BackoffPolicy,
225 /// Health policy inherited by children that do not override it.
226 pub default_health_policy: HealthPolicy,
227 /// Shutdown policy inherited by children that do not override it.
228 pub default_shutdown_policy: ShutdownPolicy,
229 /// Maximum supervisor failures before parent escalation.
230 pub supervisor_failure_limit: u32,
231 /// Optional supervisor-level restart budget.
232 pub restart_budget: Option<RestartBudget>,
233 /// Optional supervisor-level escalation policy.
234 pub escalation_policy: Option<EscalationPolicy>,
235 /// Group-level strategy overrides.
236 pub group_strategies: Vec<GroupStrategy>,
237 /// Child-level strategy overrides.
238 pub child_strategy_overrides: Vec<ChildStrategyOverride>,
239 /// Runtime policy for dynamic child additions.
240 pub dynamic_supervisor_policy: DynamicSupervisorPolicy,
241 /// Control command channel capacity.
242 pub control_channel_capacity: usize,
243 /// Event broadcast channel capacity.
244 pub event_channel_capacity: usize,
245}
246
247impl SupervisorSpec {
248 /// Creates a root supervisor specification.
249 ///
250 /// # Arguments
251 ///
252 /// - `children`: Children declared under the root supervisor.
253 ///
254 /// # Returns
255 ///
256 /// Returns a root [`SupervisorSpec`] with declaration-order children.
257 ///
258 /// # Examples
259 ///
260 /// ```
261 /// let spec = rust_supervisor::spec::supervisor::SupervisorSpec::root(Vec::new());
262 /// assert_eq!(spec.path.to_string(), "/");
263 /// ```
264 pub fn root(children: Vec<ChildSpec>) -> Self {
265 let channel_capacity = channel_capacity_for_children(children.len());
266 Self {
267 path: SupervisorPath::root(),
268 strategy: SupervisionStrategy::OneForOne,
269 children,
270 config_version: String::from("unversioned"),
271 default_restart_policy: RestartPolicy::Transient,
272 default_backoff_policy: BackoffPolicy::new(
273 Duration::from_millis(10),
274 Duration::from_secs(1),
275 0.0,
276 ),
277 default_health_policy: HealthPolicy::new(
278 Duration::from_secs(1),
279 Duration::from_secs(3),
280 ),
281 default_shutdown_policy: ShutdownPolicy::new(
282 Duration::from_secs(5),
283 Duration::from_secs(1),
284 ),
285 supervisor_failure_limit: 1,
286 restart_budget: None,
287 escalation_policy: None,
288 group_strategies: Vec::new(),
289 child_strategy_overrides: Vec::new(),
290 dynamic_supervisor_policy: DynamicSupervisorPolicy::unbounded(),
291 control_channel_capacity: channel_capacity,
292 event_channel_capacity: channel_capacity.saturating_mul(2),
293 }
294 }
295
296 /// Validates this supervisor and its direct children.
297 ///
298 /// # Arguments
299 ///
300 /// This function has no arguments.
301 ///
302 /// # Returns
303 ///
304 /// Returns `Ok(())` when the supervisor declaration is usable.
305 pub fn validate(&self) -> Result<(), SupervisorError> {
306 if self.config_version.trim().is_empty() {
307 return Err(SupervisorError::fatal_config(
308 "config version must not be empty",
309 ));
310 }
311 if self.supervisor_failure_limit == 0 {
312 return Err(SupervisorError::fatal_config(
313 "supervisor failure limit must be greater than zero",
314 ));
315 }
316 if self.control_channel_capacity == 0 {
317 return Err(SupervisorError::fatal_config(
318 "control channel capacity must be greater than zero",
319 ));
320 }
321 if self.event_channel_capacity == 0 {
322 return Err(SupervisorError::fatal_config(
323 "event channel capacity must be greater than zero",
324 ));
325 }
326 for child in &self.children {
327 child.validate()?;
328 }
329 validate_restart_budget(self.restart_budget)?;
330 validate_group_strategies(&self.group_strategies, &self.children)?;
331 validate_child_strategy_overrides(self)?;
332 validate_dynamic_policy(self.dynamic_supervisor_policy)?;
333 Ok(())
334 }
335}
336
337/// Validates an optional restart budget.
338///
339/// # Arguments
340///
341/// - `budget`: Optional restart budget to validate.
342///
343/// # Returns
344///
345/// Returns `Ok(())` when the budget is absent or valid.
346fn validate_restart_budget(budget: Option<RestartBudget>) -> Result<(), SupervisorError> {
347 let Some(budget) = budget else {
348 return Ok(());
349 };
350 if budget.max_restarts == 0 {
351 return Err(SupervisorError::fatal_config(
352 "restart budget max_restarts must be greater than zero",
353 ));
354 }
355 if budget.window.is_zero() {
356 return Err(SupervisorError::fatal_config(
357 "restart budget window must be greater than zero",
358 ));
359 }
360 Ok(())
361}
362
363/// Validates group strategy declarations.
364///
365/// # Arguments
366///
367/// - `strategies`: Group strategies declared on the supervisor.
368///
369/// # Returns
370///
371/// Returns `Ok(())` when group names are unique and valid.
372fn validate_group_strategies(
373 strategies: &[GroupStrategy],
374 children: &[ChildSpec],
375) -> Result<(), SupervisorError> {
376 let mut groups = HashSet::new();
377 for strategy in strategies {
378 if strategy.group.trim().is_empty() {
379 return Err(SupervisorError::fatal_config(
380 "group strategy group must not be empty",
381 ));
382 }
383 if !groups.insert(strategy.group.clone()) {
384 return Err(SupervisorError::fatal_config(format!(
385 "duplicate group strategy: {}",
386 strategy.group
387 )));
388 }
389 validate_restart_budget(strategy.restart_budget)?;
390 }
391 validate_group_membership(strategies, children)?;
392 Ok(())
393}
394
395/// Validates child membership against configured restart groups.
396///
397/// # Arguments
398///
399/// - `strategies`: Group strategies declared on the supervisor.
400/// - `children`: Children declared under the supervisor.
401///
402/// # Returns
403///
404/// Returns `Ok(())` when every configured group is used without ambiguity.
405fn validate_group_membership(
406 strategies: &[GroupStrategy],
407 children: &[ChildSpec],
408) -> Result<(), SupervisorError> {
409 let groups = strategies
410 .iter()
411 .map(|strategy| strategy.group.clone())
412 .collect::<HashSet<_>>();
413 for strategy in strategies {
414 if !children
415 .iter()
416 .any(|child| child.tags.contains(&strategy.group))
417 {
418 return Err(SupervisorError::fatal_config(format!(
419 "group strategy references unused group: {}",
420 strategy.group
421 )));
422 }
423 }
424 for child in children {
425 let configured_group_count = child
426 .tags
427 .iter()
428 .filter(|tag| groups.contains(*tag))
429 .count();
430 if configured_group_count > 1 {
431 return Err(SupervisorError::fatal_config(format!(
432 "child strategy groups are ambiguous for child: {}",
433 child.id
434 )));
435 }
436 }
437 Ok(())
438}
439
440/// Validates child strategy overrides.
441///
442/// # Arguments
443///
444/// - `spec`: Supervisor specification that owns children and overrides.
445///
446/// # Returns
447///
448/// Returns `Ok(())` when every override targets a known child once.
449fn validate_child_strategy_overrides(spec: &SupervisorSpec) -> Result<(), SupervisorError> {
450 let child_ids = spec
451 .children
452 .iter()
453 .map(|child| child.id.clone())
454 .collect::<HashSet<_>>();
455 let mut overrides = HashSet::new();
456 for strategy in &spec.child_strategy_overrides {
457 if !child_ids.contains(&strategy.child_id) {
458 return Err(SupervisorError::fatal_config(format!(
459 "child strategy override references unknown child: {}",
460 strategy.child_id
461 )));
462 }
463 if !overrides.insert(strategy.child_id.clone()) {
464 return Err(SupervisorError::fatal_config(format!(
465 "duplicate child strategy override: {}",
466 strategy.child_id
467 )));
468 }
469 validate_restart_budget(strategy.restart_budget)?;
470 }
471 Ok(())
472}
473
474/// Validates dynamic supervisor policy.
475///
476/// # Arguments
477///
478/// - `policy`: Dynamic supervisor policy to validate.
479///
480/// # Returns
481///
482/// Returns `Ok(())` when the policy limit is coherent.
483fn validate_dynamic_policy(policy: DynamicSupervisorPolicy) -> Result<(), SupervisorError> {
484 if policy.child_limit == Some(0) {
485 return Err(SupervisorError::fatal_config(
486 "dynamic supervisor child_limit must be greater than zero",
487 ));
488 }
489 Ok(())
490}
491
492/// Derives a channel capacity from declared children.
493///
494/// # Arguments
495///
496/// - `child_count`: Number of children declared under the supervisor.
497///
498/// # Returns
499///
500/// Returns a non-zero channel capacity.
501fn channel_capacity_for_children(child_count: usize) -> usize {
502 child_count.saturating_add(1)
503}