Skip to main content

rust_supervisor/policy/
task_role_defaults.rs

1//! Task role defaults for supervised children.
2//!
3//! This module owns role classification, default policy bundles, effective
4//! policy attribution, and semantic conflict diagnostics.
5
6use crate::id::types::ChildId;
7use crate::spec::child::{BackoffPolicy, RestartPolicy};
8use crate::spec::supervisor::{EscalationPolicy, RestartLimit};
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11use std::fmt::{Display, Formatter};
12use std::time::Duration;
13
14/// Task role classification for supervised children.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
16#[serde(rename_all = "snake_case")]
17pub enum TaskRole {
18    /// Long-running service that should stay online.
19    Service,
20    /// Background worker with bounded retry semantics.
21    Worker,
22    /// One-shot job that must not auto-restart on success.
23    Job,
24    /// Auxiliary sidecar process attached to a primary service.
25    Sidecar,
26    /// Nested supervisor tree treated as a single unit.
27    Supervisor,
28}
29
30impl TaskRole {
31    /// Returns a stable low-cardinality role label.
32    ///
33    /// # Arguments
34    ///
35    /// This function has no arguments.
36    ///
37    /// # Returns
38    ///
39    /// Returns a snake_case static role label.
40    pub const fn as_str(self) -> &'static str {
41        match self {
42            Self::Service => "service",
43            Self::Worker => "worker",
44            Self::Job => "job",
45            Self::Sidecar => "sidecar",
46            Self::Supervisor => "supervisor",
47        }
48    }
49}
50
51impl Display for TaskRole {
52    /// Formats the role as a stable label.
53    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
54        formatter.write_str(self.as_str())
55    }
56}
57
58/// Configuration for sidecar attachment to a primary service.
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
60pub struct SidecarConfig {
61    /// Child ID of the primary service this sidecar attaches to.
62    pub primary_child_id: ChildId,
63    /// Whether lifecycle events are linked.
64    #[serde(default)]
65    pub linked_lifecycle: bool,
66}
67
68impl SidecarConfig {
69    /// Creates a sidecar binding configuration.
70    ///
71    /// # Arguments
72    ///
73    /// - `primary_child_id`: Child ID of the primary service.
74    /// - `linked_lifecycle`: Whether lifecycle operations are linked.
75    ///
76    /// # Returns
77    ///
78    /// Returns a [`SidecarConfig`] value.
79    pub fn new(primary_child_id: ChildId, linked_lifecycle: bool) -> Self {
80        Self {
81            primary_child_id,
82            linked_lifecycle,
83        }
84    }
85}
86
87/// Action taken when a child exits successfully.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
89#[serde(rename_all = "snake_case")]
90pub enum OnSuccessAction {
91    /// Restart the child to keep it online.
92    Restart,
93    /// Stop the child permanently.
94    Stop,
95    /// Take no automatic action.
96    NoOp,
97}
98
99/// Action taken when a child exits with failure.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
101#[serde(rename_all = "snake_case")]
102pub enum OnFailureAction {
103    /// Restart with backoff policy applied.
104    RestartWithBackoff,
105    /// Restart indefinitely.
106    RestartPermanent,
107    /// Stop and escalate to parent or shutdown tree.
108    StopAndEscalate,
109}
110
111/// Action taken when a child receives an explicit stop request.
112#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
113#[serde(rename_all = "snake_case")]
114pub enum OnManualStopAction {
115    /// Stop permanently until explicitly restarted.
116    StopForever,
117    /// Stop but allow a future explicit restart.
118    StopUntilExplicitRestart,
119}
120
121/// Action taken when a child exceeds its execution timeout.
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
123#[serde(rename_all = "snake_case")]
124pub enum OnTimeoutAction {
125    /// Restart with backoff policy applied.
126    RestartWithBackoff,
127    /// Stop and escalate to parent or shutdown tree.
128    StopAndEscalate,
129}
130
131/// Action taken when restart budget is exhausted.
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
133#[serde(rename_all = "snake_case")]
134pub enum OnBudgetExhaustedAction {
135    /// Stop and escalate to parent or shutdown tree.
136    StopAndEscalate,
137    /// Quarantine the child or scope without escalating.
138    Quarantine,
139}
140
141/// Default policy bundle bound to a specific task role.
142#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
143pub struct RoleDefaultPolicy {
144    /// Action on successful exit.
145    pub on_success_exit: OnSuccessAction,
146    /// Action on failure exit.
147    pub on_failure_exit: OnFailureAction,
148    /// Action on explicit manual stop.
149    pub on_manual_stop: OnManualStopAction,
150    /// Action on execution timeout.
151    pub on_timeout: OnTimeoutAction,
152    /// Action when restart budget is exhausted.
153    pub on_budget_exhausted: OnBudgetExhaustedAction,
154    /// Default restart limit.
155    pub default_restart_limit: Option<RestartLimit>,
156    /// Default escalation policy.
157    pub default_escalation_policy: Option<EscalationPolicy>,
158    /// Default backoff policy.
159    pub default_backoff_policy: Option<BackoffPolicy>,
160    /// Exit codes considered successful.
161    #[serde(default = "default_success_exit_codes")]
162    pub success_exit_codes: Vec<i32>,
163}
164
165/// Role-specific differences used to build a default policy.
166struct RoleDefaultPolicyDifferences {
167    /// Action on successful exit.
168    on_success_exit: OnSuccessAction,
169    /// Action on execution timeout.
170    on_timeout: OnTimeoutAction,
171    /// Maximum restart count inside the default restart limit window.
172    max_restarts: u32,
173}
174
175impl From<RoleDefaultPolicyDifferences> for RoleDefaultPolicy {
176    /// Converts role-specific differences into a complete default policy.
177    ///
178    /// # Arguments
179    ///
180    /// - `differences`: Role-specific policy fields.
181    ///
182    /// # Returns
183    ///
184    /// Returns a complete [`RoleDefaultPolicy`] with shared defaults applied.
185    fn from(differences: RoleDefaultPolicyDifferences) -> Self {
186        Self {
187            on_success_exit: differences.on_success_exit,
188            on_failure_exit: OnFailureAction::RestartWithBackoff,
189            on_manual_stop: OnManualStopAction::StopForever,
190            on_timeout: differences.on_timeout,
191            on_budget_exhausted: OnBudgetExhaustedAction::StopAndEscalate,
192            default_restart_limit: Some(bounded_restart_limit(differences.max_restarts)),
193            default_escalation_policy: Some(EscalationPolicy::EscalateToParent),
194            default_backoff_policy: Some(default_backoff_policy()),
195            success_exit_codes: default_success_exit_codes(),
196        }
197    }
198}
199
200impl RoleDefaultPolicy {
201    /// Returns the default policy pack for a task role.
202    ///
203    /// # Arguments
204    ///
205    /// - `role`: Task role used to select defaults.
206    ///
207    /// # Returns
208    ///
209    /// Returns a role-specific [`RoleDefaultPolicy`].
210    pub fn for_role(role: TaskRole) -> Self {
211        match role {
212            TaskRole::Service => service_default(),
213            TaskRole::Worker => worker_default(),
214            TaskRole::Job => job_default(),
215            TaskRole::Sidecar => sidecar_default(),
216            TaskRole::Supervisor => supervisor_default(),
217        }
218    }
219}
220
221/// Source used to build an effective policy.
222#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
223#[serde(rename_all = "snake_case")]
224pub enum PolicySource {
225    /// Policy came from an explicit role default.
226    RoleDefault,
227    /// Policy contains user overrides.
228    UserOverride,
229    /// Policy used the conservative fallback role.
230    FallbackDefault,
231}
232
233impl Display for PolicySource {
234    /// Formats the policy source as a stable label.
235    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
236        let label = match self {
237            Self::RoleDefault => "role_default",
238            Self::UserOverride => "user_override",
239            Self::FallbackDefault => "fallback_default",
240        };
241        formatter.write_str(label)
242    }
243}
244
245/// Severity classification for failure escalation bifurcation.
246///
247/// Ordering: Critical > Standard > Optional (highest to lowest severity).
248#[derive(
249    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
250)]
251pub enum SeverityClass {
252    /// Optional: failure follows noise-reduction path (no alert upgrade).
253    Optional,
254    /// Standard: follows the default TaskRole behavior.
255    Standard,
256    /// Critical: failure must trigger escalation path.
257    Critical,
258}
259
260/// Effective policy selected for one child.
261#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
262pub struct EffectivePolicy {
263    /// Effective task role after fallback handling.
264    pub task_role: TaskRole,
265    /// Policy pack selected for the effective role.
266    pub policy_pack: RoleDefaultPolicy,
267    /// Source of the effective policy.
268    pub source: PolicySource,
269    /// Whether the worker fallback default was used.
270    pub used_fallback: bool,
271    /// Fields explicitly overridden by the user.
272    pub overridden_fields: Vec<String>,
273    /// Severity classification for escalation bifurcation.
274    pub severity: SeverityClass,
275    /// Group name for group isolation (None = not grouped).
276    pub group_name: Option<String>,
277}
278
279impl EffectivePolicy {
280    /// Merges task role defaults with known user override markers.
281    ///
282    /// # Arguments
283    ///
284    /// - `role`: Optional declared task role.
285    /// - `overridden_fields`: Fields explicitly set by the user.
286    ///
287    /// # Returns
288    ///
289    /// Returns an [`EffectivePolicy`] with fallback attribution.
290    pub fn merge(role: Option<TaskRole>, overridden_fields: Vec<String>) -> Self {
291        let used_fallback = role.is_none();
292        let task_role = role.unwrap_or(TaskRole::Worker);
293        let source = if used_fallback {
294            PolicySource::FallbackDefault
295        } else if overridden_fields.is_empty() {
296            PolicySource::RoleDefault
297        } else {
298            PolicySource::UserOverride
299        };
300        let severity = Self::default_severity(task_role);
301        Self {
302            task_role,
303            policy_pack: RoleDefaultPolicy::for_role(task_role),
304            source,
305            used_fallback,
306            overridden_fields,
307            severity,
308            group_name: None,
309        }
310    }
311
312    /// Returns the default [`SeverityClass`] for a given [`TaskRole`].
313    fn default_severity(role: TaskRole) -> SeverityClass {
314        match role {
315            TaskRole::Service => SeverityClass::Critical,
316            TaskRole::Supervisor => SeverityClass::Critical,
317            TaskRole::Worker => SeverityClass::Standard,
318            TaskRole::Job => SeverityClass::Optional,
319            TaskRole::Sidecar => SeverityClass::Standard,
320        }
321    }
322
323    /// Builds an effective policy for a child specification.
324    ///
325    /// # Arguments
326    ///
327    /// - `child`: Child specification to inspect.
328    ///
329    /// # Returns
330    ///
331    /// Returns the effective role policy for the child.
332    pub fn for_child(child: &crate::spec::child::ChildSpec) -> Self {
333        let mut overridden = Vec::new();
334        if child.restart_policy != RestartPolicy::Transient {
335            overridden.push("restart_policy".to_string());
336        }
337        let effective_policy = Self::merge(child.task_role, overridden);
338        if child.task_role.is_none() {
339            tracing::warn!(
340                child_id = %child.id,
341                task_role = %effective_policy.task_role,
342                used_fallback_default = effective_policy.used_fallback,
343                effective_policy_source = %effective_policy.source,
344                "task role missing, falling back to worker default"
345            );
346        }
347        effective_policy
348    }
349}
350
351/// Describes one role semantic conflict.
352#[derive(Debug, Clone, PartialEq, Eq)]
353pub struct RoleSemanticConflict {
354    /// Child that owns the conflict.
355    pub child_id: ChildId,
356    /// Declared task role.
357    pub task_role: TaskRole,
358    /// Conflicting field name.
359    pub conflicting_field: String,
360    /// User-provided value.
361    pub user_value: String,
362    /// Role default expectation.
363    pub expected_semantic: String,
364    /// Human-readable reason.
365    pub reason: String,
366}
367
368/// Returns semantic conflicts for one child.
369///
370/// # Arguments
371///
372/// - `child`: Child specification to inspect.
373///
374/// # Returns
375///
376/// Returns a list of role semantic conflicts.
377pub fn semantic_conflicts_for_child(
378    child: &crate::spec::child::ChildSpec,
379) -> Vec<RoleSemanticConflict> {
380    let mut conflicts = Vec::new();
381    if child.task_role == Some(TaskRole::Job) && child.restart_policy == RestartPolicy::Permanent {
382        conflicts.push(RoleSemanticConflict {
383            child_id: child.id.clone(),
384            task_role: TaskRole::Job,
385            conflicting_field: "restart_policy".to_string(),
386            user_value: "permanent".to_string(),
387            expected_semantic: "job success should stop".to_string(),
388            reason: "Job role must not silently use permanent restart semantics".to_string(),
389        });
390    }
391    conflicts
392}
393
394/// Returns default success exit codes.
395///
396/// # Arguments
397///
398/// This function has no arguments.
399///
400/// # Returns
401///
402/// Returns a vector containing exit code zero.
403fn default_success_exit_codes() -> Vec<i32> {
404    vec![0]
405}
406
407/// Returns a bounded restart limit used by task role defaults.
408fn bounded_restart_limit(max_restarts: u32) -> RestartLimit {
409    RestartLimit::new(max_restarts, Duration::from_secs(60))
410}
411
412/// Returns a default backoff policy used by task role defaults.
413fn default_backoff_policy() -> BackoffPolicy {
414    BackoffPolicy::new(Duration::from_millis(50), Duration::from_secs(5), 0.2)
415}
416
417/// Returns service task role defaults.
418fn service_default() -> RoleDefaultPolicy {
419    RoleDefaultPolicyDifferences {
420        on_success_exit: OnSuccessAction::Restart,
421        on_timeout: OnTimeoutAction::RestartWithBackoff,
422        max_restarts: 10,
423    }
424    .into()
425}
426
427/// Returns worker task role defaults.
428fn worker_default() -> RoleDefaultPolicy {
429    RoleDefaultPolicyDifferences {
430        on_success_exit: OnSuccessAction::Stop,
431        on_timeout: OnTimeoutAction::RestartWithBackoff,
432        max_restarts: 3,
433    }
434    .into()
435}
436
437/// Returns job task role defaults.
438fn job_default() -> RoleDefaultPolicy {
439    RoleDefaultPolicyDifferences {
440        on_success_exit: OnSuccessAction::Stop,
441        on_timeout: OnTimeoutAction::StopAndEscalate,
442        max_restarts: 1,
443    }
444    .into()
445}
446
447/// Returns sidecar task role defaults.
448fn sidecar_default() -> RoleDefaultPolicy {
449    RoleDefaultPolicyDifferences {
450        on_success_exit: OnSuccessAction::Restart,
451        on_timeout: OnTimeoutAction::RestartWithBackoff,
452        max_restarts: 5,
453    }
454    .into()
455}
456
457/// Returns nested supervisor task role defaults.
458fn supervisor_default() -> RoleDefaultPolicy {
459    RoleDefaultPolicyDifferences {
460        on_success_exit: OnSuccessAction::Restart,
461        on_timeout: OnTimeoutAction::RestartWithBackoff,
462        max_restarts: 3,
463    }
464    .into()
465}