Skip to main content

rust_supervisor/spec/
supervisor.rs

1//! Supervisor declaration model.
2//!
3//! This module owns the root and nested supervisor specification shape used by
4//! tree construction and runtime startup.
5
6use crate::error::types::SupervisorError;
7use crate::id::types::{ChildId, SupervisorPath};
8use crate::policy::budget::RestartBudgetConfig;
9use crate::policy::failure_window::FailureWindowConfig;
10use crate::policy::group::GroupDependencyEdge;
11use crate::policy::meltdown::MeltdownPolicy;
12use crate::policy::task_role_defaults::{SeverityClass, TaskRole, semantic_conflicts_for_child};
13use crate::spec::child::{BackoffPolicy, ChildSpec, HealthPolicy, RestartPolicy, ShutdownPolicy};
14use confique::Config;
15use schemars::JsonSchema;
16use serde::{Deserialize, Serialize};
17use std::collections::{HashMap, HashSet};
18use std::time::Duration;
19
20/// Strategy used when a child exits and a restart scope is needed.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
22pub enum SupervisionStrategy {
23    /// Restart only the failed child.
24    OneForOne,
25    /// Restart every child under the same supervisor.
26    OneForAll,
27    /// Restart the failed child and all children declared after it.
28    RestForOne,
29}
30
31/// Policy used when a restart scope cannot remain local.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
33#[serde(rename_all = "snake_case")]
34pub enum EscalationPolicy {
35    /// Escalate the failure to the parent supervisor.
36    EscalateToParent,
37    /// Shut down the current supervisor tree.
38    ShutdownTree,
39    /// Quarantine the selected restart scope.
40    QuarantineScope,
41}
42
43/// Restart limit attached to supervisor, group, or child override settings.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
45pub struct RestartLimit {
46    /// Maximum allowed restart count inside the accounting window.
47    pub max_restarts: u32,
48    /// Accounting window used for restart counts.
49    pub window: Duration,
50}
51
52impl RestartLimit {
53    /// Creates a restart limit.
54    ///
55    /// # Arguments
56    ///
57    /// - `max_restarts`: Maximum allowed restart count inside the accounting window.
58    /// - `window`: Accounting window used for restart counts.
59    ///
60    /// # Returns
61    ///
62    /// Returns a [`RestartLimit`] value.
63    pub fn new(max_restarts: u32, window: Duration) -> Self {
64        Self {
65            max_restarts,
66            window,
67        }
68    }
69}
70
71/// Strategy and governance overrides for a named child group.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct GroupStrategy {
74    /// Low-cardinality group tag shared by children.
75    pub group: String,
76    /// Restart strategy applied inside the group.
77    pub strategy: SupervisionStrategy,
78    /// Optional restart limit for this group.
79    pub restart_limit: Option<RestartLimit>,
80    /// Optional escalation policy for this group.
81    pub escalation_policy: Option<EscalationPolicy>,
82}
83
84impl GroupStrategy {
85    /// Creates a group strategy.
86    ///
87    /// # Arguments
88    ///
89    /// - `group`: Child tag that identifies the restart group.
90    /// - `strategy`: Restart strategy applied to the group.
91    ///
92    /// # Returns
93    ///
94    /// Returns a [`GroupStrategy`] without restart limit or escalation override.
95    pub fn new(group: impl Into<String>, strategy: SupervisionStrategy) -> Self {
96        Self {
97            group: group.into(),
98            strategy,
99            restart_limit: None,
100            escalation_policy: None,
101        }
102    }
103}
104
105/// Group-level configuration for restart budget, dependency edges, and
106/// severity defaults used by US1/US2/US3 policy evaluation.
107#[derive(Debug, Clone, PartialEq)]
108pub struct GroupConfig {
109    /// Low-cardinality group name shared by member children.
110    pub name: String,
111    /// Child identifiers that belong to this group.
112    pub children: Vec<ChildId>,
113    /// Restart budget configuration applied to this group.
114    ///
115    /// When `None`, the supervisor-level default budget is inherited.
116    /// If the supervisor also has no default, [`RestartBudgetConfig::safe_default`]
117    /// is used as a fallback.
118    pub budget: Option<RestartBudgetConfig>,
119}
120
121impl GroupConfig {
122    /// Creates a group configuration.
123    ///
124    /// # Arguments
125    ///
126    /// - `name`: Group name.
127    /// - `children`: Child identifiers belonging to this group.
128    /// - `budget`: Restart budget configuration for the group (None = inherit).
129    ///
130    /// # Returns
131    ///
132    /// Returns a [`GroupConfig`].
133    pub fn new(
134        name: impl Into<String>,
135        children: Vec<ChildId>,
136        budget: Option<RestartBudgetConfig>,
137    ) -> Self {
138        Self {
139            name: name.into(),
140            children,
141            budget,
142        }
143    }
144}
145
146/// Per-child strategy and governance override.
147#[derive(Debug, Clone, PartialEq, Eq)]
148pub struct ChildStrategyOverride {
149    /// Child identifier that owns the override.
150    pub child_id: ChildId,
151    /// Restart strategy used when this child fails.
152    pub strategy: SupervisionStrategy,
153    /// Optional restart limit for this child.
154    pub restart_limit: Option<RestartLimit>,
155    /// Optional escalation policy for this child.
156    pub escalation_policy: Option<EscalationPolicy>,
157}
158
159impl ChildStrategyOverride {
160    /// Creates a child strategy override.
161    ///
162    /// # Arguments
163    ///
164    /// - `child_id`: Child identifier that owns the override.
165    /// - `strategy`: Restart strategy used for the child.
166    ///
167    /// # Returns
168    ///
169    /// Returns a [`ChildStrategyOverride`] value.
170    pub fn new(child_id: ChildId, strategy: SupervisionStrategy) -> Self {
171        Self {
172            child_id,
173            strategy,
174            restart_limit: None,
175            escalation_policy: None,
176        }
177    }
178}
179
180/// Dynamic supervisor policy for runtime child additions.
181#[derive(Debug, Clone, Copy, PartialEq, Eq)]
182pub struct DynamicSupervisorPolicy {
183    /// Whether runtime child additions are allowed.
184    pub enabled: bool,
185    /// Optional maximum number of declared and dynamic children.
186    pub child_limit: Option<usize>,
187}
188
189impl DynamicSupervisorPolicy {
190    /// Creates an unbounded dynamic supervisor policy.
191    ///
192    /// # Arguments
193    ///
194    /// This function has no arguments.
195    ///
196    /// # Returns
197    ///
198    /// Returns a policy that allows dynamic child additions without a limit.
199    pub fn unbounded() -> Self {
200        Self {
201            enabled: true,
202            child_limit: None,
203        }
204    }
205
206    /// Creates a limited dynamic supervisor policy.
207    ///
208    /// # Arguments
209    ///
210    /// - `child_limit`: Maximum declared and dynamic child count.
211    ///
212    /// # Returns
213    ///
214    /// Returns a policy that allows dynamic additions up to the limit.
215    pub fn limited(child_limit: usize) -> Self {
216        Self {
217            enabled: true,
218            child_limit: Some(child_limit),
219        }
220    }
221
222    /// Reports whether another dynamic child can be added.
223    ///
224    /// # Arguments
225    ///
226    /// - `current_child_count`: Current declared plus dynamic child count.
227    ///
228    /// # Returns
229    ///
230    /// Returns `true` when the next addition is allowed.
231    pub fn allows_addition(&self, current_child_count: usize) -> bool {
232        self.enabled
233            && self
234                .child_limit
235                .is_none_or(|limit| current_child_count < limit)
236    }
237}
238
239/// Restart plan selected after strategy, group, and child overrides are merged.
240#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct StrategyExecutionPlan {
242    /// Child whose exit triggered the plan.
243    pub failed_child: ChildId,
244    /// Strategy selected for this execution.
245    pub strategy: SupervisionStrategy,
246    /// Child identifiers selected for restart.
247    pub scope: Vec<ChildId>,
248    /// Optional group that constrained the scope.
249    pub group: Option<String>,
250    /// Optional restart limit selected by this execution plan.
251    pub restart_limit: Option<RestartLimit>,
252    /// Optional escalation policy selected for the plan.
253    pub escalation_policy: Option<EscalationPolicy>,
254    /// Whether dynamic supervisor additions are allowed.
255    pub dynamic_supervisor_enabled: bool,
256}
257
258/// Backpressure strategy for slow event subscribers.
259#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
260#[serde(rename_all = "snake_case")]
261pub enum BackpressureStrategy {
262    /// Alert and block the producer when buffers fill up; never drop events.
263    AlertAndBlock,
264    /// Sample and discard events when buffers fill up; record the ratio in the audit trail.
265    SampleAndAudit,
266}
267
268impl Default for BackpressureStrategy {
269    /// Returns the default non-dropping backpressure strategy.
270    fn default() -> Self {
271        Self::AlertAndBlock
272    }
273}
274
275/// Configuration for event subscriber backpressure.
276#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
277pub struct BackpressureConfig {
278    /// Backpressure strategy selection.
279    #[config(default = "alert_and_block")]
280    #[serde(default)]
281    pub strategy: BackpressureStrategy,
282    /// Buffer occupancy soft threshold percentage (triggers warning alert).
283    #[config(default = 80)]
284    #[serde(default = "default_warn_threshold")]
285    pub warn_threshold_pct: u8,
286    /// Buffer occupancy hard threshold percentage (triggers degradation).
287    #[config(default = 95)]
288    #[serde(default = "default_critical_threshold")]
289    pub critical_threshold_pct: u8,
290    /// Sliding window duration in seconds for backpressure evaluation.
291    #[config(default = 30)]
292    #[serde(default = "default_window_secs")]
293    pub window_secs: u64,
294    /// Capacity of the dedicated audit channel.
295    #[config(default = 1024)]
296    #[serde(default = "default_audit_capacity")]
297    pub audit_channel_capacity: usize,
298}
299
300/// Returns the default backpressure warning threshold (80%).
301fn default_warn_threshold() -> u8 {
302    80
303}
304
305/// Returns the default backpressure critical threshold (95%).
306fn default_critical_threshold() -> u8 {
307    95
308}
309
310/// Returns the default backpressure evaluation window in seconds (30).
311fn default_window_secs() -> u64 {
312    30
313}
314
315/// Returns the default audit channel capacity (1024).
316fn default_audit_capacity() -> usize {
317    1024
318}
319
320impl Default for BackpressureConfig {
321    /// Returns the default backpressure configuration with `AlertAndBlock` strategy.
322    fn default() -> Self {
323        Self {
324            strategy: BackpressureStrategy::AlertAndBlock,
325            warn_threshold_pct: default_warn_threshold(),
326            critical_threshold_pct: default_critical_threshold(),
327            window_secs: default_window_secs(),
328            audit_channel_capacity: default_audit_capacity(),
329        }
330    }
331}
332
333/// Declarative specification for one supervisor node.
334#[derive(Debug, Clone)]
335pub struct SupervisorSpec {
336    /// Stable path for this supervisor.
337    pub path: SupervisorPath,
338    /// Restart scope strategy for child exits.
339    pub strategy: SupervisionStrategy,
340    /// Children in declaration order.
341    pub children: Vec<ChildSpec>,
342    /// Configuration version that produced this declaration.
343    pub config_version: String,
344    /// Restart policy inherited by children that do not override it.
345    pub default_restart_policy: RestartPolicy,
346    /// Backoff policy inherited by children that do not override it.
347    pub default_backoff_policy: BackoffPolicy,
348    /// Health policy inherited by children that do not override it.
349    pub default_health_policy: HealthPolicy,
350    /// Shutdown policy inherited by children that do not override it.
351    pub default_shutdown_policy: ShutdownPolicy,
352    /// Maximum supervisor failures before parent escalation.
353    pub supervisor_failure_limit: u32,
354    /// Optional supervisor-level restart limit.
355    pub restart_limit: Option<RestartLimit>,
356    /// Optional supervisor-level escalation policy.
357    pub escalation_policy: Option<EscalationPolicy>,
358    /// Group-level strategy overrides.
359    pub group_strategies: Vec<GroupStrategy>,
360    /// Group-level configurations for restart budget, membership, and isolation.
361    pub group_configs: Vec<GroupConfig>,
362    /// Cross-group dependency edges for fault propagation.
363    pub group_dependencies: Vec<GroupDependencyEdge>,
364    /// Default severity class per task role for escalation bifurcation (US3).
365    pub severity_defaults: HashMap<TaskRole, SeverityClass>,
366    /// Child-level strategy overrides.
367    pub child_strategy_overrides: Vec<ChildStrategyOverride>,
368    /// Runtime policy for dynamic child additions.
369    pub dynamic_supervisor_policy: DynamicSupervisorPolicy,
370    /// Control command channel capacity.
371    pub control_channel_capacity: usize,
372    /// Event broadcast channel capacity.
373    pub event_channel_capacity: usize,
374    /// Backpressure policy used by observability event subscribers.
375    pub backpressure_config: BackpressureConfig,
376    /// Failure fuse policy used by the supervision pipeline.
377    pub meltdown_policy: MeltdownPolicy,
378    /// Failure accumulation window used by the supervision pipeline.
379    pub failure_window_config: FailureWindowConfig,
380    /// Restart budget used by the supervision pipeline.
381    pub restart_budget_config: RestartBudgetConfig,
382    /// Event journal capacity used by the supervision pipeline.
383    pub pipeline_journal_capacity: usize,
384    /// Subscriber queue capacity used by the supervision pipeline.
385    pub pipeline_subscriber_capacity: usize,
386    /// Maximum concurrent restarts allowed for this supervisor instance.
387    pub concurrent_restart_limit: u32,
388}
389
390impl SupervisorSpec {
391    /// Creates a root supervisor specification.
392    ///
393    /// # Arguments
394    ///
395    /// - `children`: Children declared under the root supervisor.
396    ///
397    /// # Returns
398    ///
399    /// Returns a root [`SupervisorSpec`] with declaration-order children.
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// let spec = rust_supervisor::spec::supervisor::SupervisorSpec::root(Vec::new());
405    /// assert_eq!(spec.path.to_string(), "/");
406    /// ```
407    pub fn root(children: Vec<ChildSpec>) -> Self {
408        let channel_capacity = channel_capacity_for_children(children.len());
409        Self {
410            path: SupervisorPath::root(),
411            strategy: SupervisionStrategy::OneForOne,
412            children,
413            config_version: String::from("unversioned"),
414            default_restart_policy: RestartPolicy::Transient,
415            default_backoff_policy: BackoffPolicy::new(
416                Duration::from_millis(10),
417                Duration::from_secs(1),
418                0.0,
419            ),
420            default_health_policy: HealthPolicy::new(
421                Duration::from_secs(1),
422                Duration::from_secs(3),
423            ),
424            default_shutdown_policy: ShutdownPolicy::new(
425                Duration::from_secs(5),
426                Duration::from_secs(1),
427            ),
428            supervisor_failure_limit: 1,
429            restart_limit: None,
430            escalation_policy: None,
431            group_strategies: Vec::new(),
432            group_configs: Vec::new(),
433            group_dependencies: Vec::new(),
434            severity_defaults: HashMap::new(),
435            child_strategy_overrides: Vec::new(),
436            dynamic_supervisor_policy: DynamicSupervisorPolicy::unbounded(),
437            control_channel_capacity: channel_capacity,
438            event_channel_capacity: channel_capacity.saturating_mul(2),
439            backpressure_config: BackpressureConfig::default(),
440            meltdown_policy: MeltdownPolicy::new(
441                3,
442                Duration::from_secs(10),
443                5,
444                Duration::from_secs(30),
445                10,
446                Duration::from_secs(60),
447                Duration::from_secs(120),
448            ),
449            failure_window_config: FailureWindowConfig::time_sliding(60, 5),
450            restart_budget_config: RestartBudgetConfig::safe_default(),
451            pipeline_journal_capacity: 100,
452            pipeline_subscriber_capacity: 10,
453            concurrent_restart_limit: 5,
454        }
455    }
456
457    /// Validates this supervisor and its direct children.
458    ///
459    /// # Arguments
460    ///
461    /// This function has no arguments.
462    ///
463    /// # Returns
464    ///
465    /// Returns `Ok(())` when the supervisor declaration is usable.
466    pub fn validate(&self) -> Result<(), SupervisorError> {
467        if self.config_version.trim().is_empty() {
468            return Err(SupervisorError::fatal_config(
469                "config version must not be empty",
470            ));
471        }
472        if self.supervisor_failure_limit == 0 {
473            return Err(SupervisorError::fatal_config(
474                "supervisor failure limit must be greater than zero",
475            ));
476        }
477        if self.control_channel_capacity == 0 {
478            return Err(SupervisorError::fatal_config(
479                "control channel capacity must be greater than zero",
480            ));
481        }
482        if self.event_channel_capacity == 0 {
483            return Err(SupervisorError::fatal_config(
484                "event channel capacity must be greater than zero",
485            ));
486        }
487        validate_backpressure_config(&self.backpressure_config)?;
488        for child in &self.children {
489            child.validate()?;
490        }
491        validate_restart_limit(self.restart_limit)?;
492        validate_group_strategies(&self.group_strategies, &self.children)?;
493        validate_child_strategy_overrides(self)?;
494        validate_task_roles(&self.children)?;
495        validate_dynamic_policy(self.dynamic_supervisor_policy)?;
496        validate_child_group_names(&self.children, &self.group_configs)?;
497        validate_pipeline_policy(self)?;
498        Ok(())
499    }
500}
501
502/// Validates that every child referencing a group name actually points to an
503/// existing [`GroupConfig`]. Unknown group names are rejected at load time
504/// to prevent silent isolation failures due to typos.
505fn validate_child_group_names(
506    children: &[ChildSpec],
507    group_configs: &[GroupConfig],
508) -> Result<(), SupervisorError> {
509    let group_names: std::collections::HashSet<&str> =
510        group_configs.iter().map(|g| g.name.as_str()).collect();
511
512    for child in children {
513        if let Some(ref group_name) = child.group
514            && !group_names.contains(group_name.as_str())
515        {
516            return Err(SupervisorError::fatal_config(format!(
517                "child '{}' references unknown group '{}'; available groups: {:?}",
518                child.id,
519                group_name,
520                group_names.iter().copied().collect::<Vec<_>>(),
521            )));
522        }
523    }
524    Ok(())
525}
526
527/// Validates an optional restart limit.
528///
529/// # Arguments
530///
531/// - `limit`: Optional restart limit to validate.
532///
533/// # Returns
534///
535/// Returns `Ok(())` when the limit is absent or valid.
536fn validate_restart_limit(limit: Option<RestartLimit>) -> Result<(), SupervisorError> {
537    let Some(limit) = limit else {
538        return Ok(());
539    };
540    if limit.max_restarts == 0 {
541        return Err(SupervisorError::fatal_config(
542            "restart limit max_restarts must be greater than zero",
543        ));
544    }
545    if limit.window.is_zero() {
546        return Err(SupervisorError::fatal_config(
547            "restart limit window must be greater than zero",
548        ));
549    }
550    Ok(())
551}
552
553/// Validates group strategy declarations.
554///
555/// # Arguments
556///
557/// - `strategies`: Group strategies declared on the supervisor.
558///
559/// # Returns
560///
561/// Returns `Ok(())` when group names are unique and valid.
562fn validate_group_strategies(
563    strategies: &[GroupStrategy],
564    children: &[ChildSpec],
565) -> Result<(), SupervisorError> {
566    let mut groups = HashSet::new();
567    for strategy in strategies {
568        if strategy.group.trim().is_empty() {
569            return Err(SupervisorError::fatal_config(
570                "group strategy group must not be empty",
571            ));
572        }
573        if !groups.insert(strategy.group.clone()) {
574            return Err(SupervisorError::fatal_config(format!(
575                "duplicate group strategy: {}",
576                strategy.group
577            )));
578        }
579        validate_restart_limit(strategy.restart_limit)?;
580    }
581    validate_group_membership(strategies, children)?;
582    Ok(())
583}
584
585/// Validates child membership against configured restart groups.
586///
587/// # Arguments
588///
589/// - `strategies`: Group strategies declared on the supervisor.
590/// - `children`: Children declared under the supervisor.
591///
592/// # Returns
593///
594/// Returns `Ok(())` when every configured group is used without ambiguity.
595fn validate_group_membership(
596    strategies: &[GroupStrategy],
597    children: &[ChildSpec],
598) -> Result<(), SupervisorError> {
599    let groups = strategies
600        .iter()
601        .map(|strategy| strategy.group.clone())
602        .collect::<HashSet<_>>();
603    for strategy in strategies {
604        if !children
605            .iter()
606            .any(|child| child.tags.contains(&strategy.group))
607        {
608            return Err(SupervisorError::fatal_config(format!(
609                "group strategy references unused group: {}",
610                strategy.group
611            )));
612        }
613    }
614    for child in children {
615        let configured_group_count = child
616            .tags
617            .iter()
618            .filter(|tag| groups.contains(*tag))
619            .count();
620        if configured_group_count > 1 {
621            return Err(SupervisorError::fatal_config(format!(
622                "child strategy groups are ambiguous for child: {}",
623                child.id
624            )));
625        }
626    }
627    Ok(())
628}
629
630/// Validates child strategy overrides.
631///
632/// # Arguments
633///
634/// - `spec`: Supervisor specification that owns children and overrides.
635///
636/// # Returns
637///
638/// Returns `Ok(())` when every override targets a known child once.
639fn validate_child_strategy_overrides(spec: &SupervisorSpec) -> Result<(), SupervisorError> {
640    let child_ids = spec
641        .children
642        .iter()
643        .map(|child| child.id.clone())
644        .collect::<HashSet<_>>();
645    let mut overrides = HashSet::new();
646    for strategy in &spec.child_strategy_overrides {
647        if !child_ids.contains(&strategy.child_id) {
648            return Err(SupervisorError::fatal_config(format!(
649                "child strategy override references unknown child: {}",
650                strategy.child_id
651            )));
652        }
653        if !overrides.insert(strategy.child_id.clone()) {
654            return Err(SupervisorError::fatal_config(format!(
655                "duplicate child strategy override: {}",
656                strategy.child_id
657            )));
658        }
659        validate_restart_limit(strategy.restart_limit)?;
660    }
661    Ok(())
662}
663
664/// Validates task role relationships that require sibling context.
665///
666/// # Arguments
667///
668/// - `children`: Children declared under one supervisor.
669///
670/// # Returns
671///
672/// Returns `Ok(())` when sidecar bindings and semantic diagnostics are valid.
673fn validate_task_roles(children: &[ChildSpec]) -> Result<(), SupervisorError> {
674    let child_ids = children
675        .iter()
676        .map(|child| child.id.clone())
677        .collect::<HashSet<_>>();
678
679    for child in children {
680        emit_role_conflict_warnings(child);
681        if child.task_role != Some(TaskRole::Sidecar) {
682            continue;
683        }
684
685        let sidecar_config = child.sidecar_config.as_ref().ok_or_else(|| {
686            SupervisorError::fatal_config(format!(
687                "sidecar child {} requires sidecar_config",
688                child.id
689            ))
690        })?;
691
692        if !child_ids.contains(&sidecar_config.primary_child_id) {
693            return Err(SupervisorError::fatal_config(format!(
694                "sidecar child {} references unknown primary_child_id {}",
695                child.id, sidecar_config.primary_child_id
696            )));
697        }
698
699        let primary_child = children
700            .iter()
701            .find(|candidate| candidate.id == sidecar_config.primary_child_id)
702            .ok_or_else(|| {
703                SupervisorError::fatal_config(format!(
704                    "sidecar child {} references unknown primary_child_id {}",
705                    child.id, sidecar_config.primary_child_id
706                ))
707            })?;
708
709        if primary_child.task_role == Some(TaskRole::Sidecar) {
710            return Err(SupervisorError::fatal_config(format!(
711                "sidecar child {} must not use another sidecar {} as primary_child_id",
712                child.id, sidecar_config.primary_child_id
713            )));
714        }
715    }
716
717    Ok(())
718}
719
720/// Emits warning diagnostics for role semantic conflicts.
721///
722/// # Arguments
723///
724/// - `child`: Child specification being inspected.
725///
726/// # Returns
727///
728/// This function does not return a value.
729fn emit_role_conflict_warnings(child: &ChildSpec) {
730    for conflict in semantic_conflicts_for_child(child) {
731        tracing::warn!(
732            child_id = %conflict.child_id,
733            task_role = %conflict.task_role,
734            conflicting_field = %conflict.conflicting_field,
735            user_value = %conflict.user_value,
736            expected_semantic = %conflict.expected_semantic,
737            reason = %conflict.reason,
738            "task role semantic conflict"
739        );
740    }
741}
742
743/// Validates dynamic supervisor policy.
744///
745/// # Arguments
746///
747/// - `policy`: Dynamic supervisor policy to validate.
748///
749/// # Returns
750///
751/// Returns `Ok(())` when the policy limit is coherent.
752fn validate_dynamic_policy(policy: DynamicSupervisorPolicy) -> Result<(), SupervisorError> {
753    if policy.child_limit == Some(0) {
754        return Err(SupervisorError::fatal_config(
755            "dynamic supervisor child_limit must be greater than zero",
756        ));
757    }
758    Ok(())
759}
760
761/// Validates supervision pipeline policy values.
762///
763/// # Arguments
764///
765/// - `spec`: Supervisor specification to validate.
766///
767/// # Returns
768///
769/// Returns `Ok(())` when pipeline policy values are usable.
770fn validate_pipeline_policy(spec: &SupervisorSpec) -> Result<(), SupervisorError> {
771    if spec.pipeline_journal_capacity == 0 {
772        return Err(SupervisorError::fatal_config(
773            "pipeline journal capacity must be greater than zero",
774        ));
775    }
776    if spec.pipeline_subscriber_capacity == 0 {
777        return Err(SupervisorError::fatal_config(
778            "pipeline subscriber capacity must be greater than zero",
779        ));
780    }
781    if spec.concurrent_restart_limit == 0 {
782        return Err(SupervisorError::fatal_config(
783            "concurrent restart limit must be greater than zero",
784        ));
785    }
786    if spec.restart_budget_config.window.is_zero() {
787        return Err(SupervisorError::fatal_config(
788            "restart budget window must be greater than zero",
789        ));
790    }
791    if spec.restart_budget_config.max_burst == 0 {
792        return Err(SupervisorError::fatal_config(
793            "restart budget max_burst must be greater than zero",
794        ));
795    }
796    if spec.restart_budget_config.recovery_rate_per_sec <= 0.0 {
797        return Err(SupervisorError::fatal_config(
798            "restart budget recovery_rate_per_sec must be greater than zero",
799        ));
800    }
801    Ok(())
802}
803
804/// Validates observability backpressure policy.
805///
806/// # Arguments
807///
808/// - `config`: Backpressure configuration to validate.
809///
810/// # Returns
811///
812/// Returns `Ok(())` when thresholds and capacities are coherent.
813fn validate_backpressure_config(config: &BackpressureConfig) -> Result<(), SupervisorError> {
814    if config.warn_threshold_pct == 0 || config.warn_threshold_pct > 100 {
815        return Err(SupervisorError::fatal_config(
816            "backpressure warn_threshold_pct must be between 1 and 100",
817        ));
818    }
819    if config.critical_threshold_pct == 0 || config.critical_threshold_pct > 100 {
820        return Err(SupervisorError::fatal_config(
821            "backpressure critical_threshold_pct must be between 1 and 100",
822        ));
823    }
824    if config.warn_threshold_pct >= config.critical_threshold_pct {
825        return Err(SupervisorError::fatal_config(
826            "backpressure warn_threshold_pct must be less than critical_threshold_pct",
827        ));
828    }
829    if config.window_secs == 0 {
830        return Err(SupervisorError::fatal_config(
831            "backpressure window_secs must be greater than zero",
832        ));
833    }
834    if config.audit_channel_capacity == 0 {
835        return Err(SupervisorError::fatal_config(
836            "backpressure audit_channel_capacity must be greater than zero",
837        ));
838    }
839    Ok(())
840}
841
842/// Derives a channel capacity from declared children.
843///
844/// # Arguments
845///
846/// - `child_count`: Number of children declared under the supervisor.
847///
848/// # Returns
849///
850/// Returns a non-zero channel capacity.
851fn channel_capacity_for_children(child_count: usize) -> usize {
852    child_count.saturating_add(1)
853}