rust_supervisor/spec/child.rs
1//! Child declaration model.
2//!
3//! This module owns declarative child specifications and validates local child
4//! invariants before the runtime registers or starts work.
5
6use crate::error::types::SupervisorError;
7use crate::id::types::ChildId;
8use crate::policy::task_role_defaults::{SeverityClass, SidecarConfig, TaskRole};
9use crate::readiness::signal::ReadinessPolicy;
10use crate::task::factory::TaskFactory;
11use schemars::JsonSchema;
12use serde::{Deserialize, Serialize};
13use std::fmt::{Debug, Formatter};
14use std::sync::Arc;
15use std::time::Duration;
16
17/// Kind of task represented by a child declaration.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
19#[serde(rename_all = "snake_case")]
20pub enum TaskKind {
21 /// Asynchronous worker that can be cancelled through its context.
22 AsyncWorker,
23 /// Blocking worker with explicit shutdown and escalation boundaries.
24 BlockingWorker,
25 /// Nested supervisor node.
26 Supervisor,
27}
28
29impl Default for TaskKind {
30 /// Returns the default task kind: [`AsyncWorker`](TaskKind::AsyncWorker).
31 fn default() -> Self {
32 Self::AsyncWorker
33 }
34}
35
36/// Importance of a child to its parent supervisor.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
38#[serde(rename_all = "snake_case")]
39pub enum Criticality {
40 /// The child is required for the supervisor to remain healthy.
41 Critical,
42 /// The child can fail without forcing parent shutdown.
43 Optional,
44}
45
46impl Default for Criticality {
47 /// Returns the default criticality: [`Optional`](Criticality::Optional).
48 fn default() -> Self {
49 Self::Optional
50 }
51}
52
53/// Restart behavior attached to a child.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
55#[serde(rename_all = "snake_case")]
56pub enum RestartPolicy {
57 /// Restart regardless of the exit result.
58 Permanent,
59 /// Restart only when the task failed.
60 Transient,
61 /// Do not restart after any exit.
62 Temporary,
63}
64
65impl Default for RestartPolicy {
66 /// Returns the default restart policy: [`Permanent`](RestartPolicy::Permanent).
67 fn default() -> Self {
68 Self::Permanent
69 }
70}
71
72/// Shutdown behavior attached to a child.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
74pub struct ShutdownPolicy {
75 /// Graceful stop budget for cooperative shutdown.
76 pub graceful_timeout: Duration,
77 /// Wait budget after an abort request.
78 pub abort_wait: Duration,
79}
80
81impl ShutdownPolicy {
82 /// Creates a shutdown policy.
83 ///
84 /// # Arguments
85 ///
86 /// - `graceful_timeout`: Cooperative shutdown budget.
87 /// - `abort_wait`: Wait budget after abort escalation.
88 ///
89 /// # Returns
90 ///
91 /// Returns a [`ShutdownPolicy`] value.
92 ///
93 /// # Examples
94 ///
95 /// ```
96 /// let policy = rust_supervisor::spec::child::ShutdownPolicy::new(
97 /// std::time::Duration::from_secs(1),
98 /// std::time::Duration::from_millis(100),
99 /// );
100 /// assert_eq!(policy.graceful_timeout.as_secs(), 1);
101 /// ```
102 pub fn new(graceful_timeout: Duration, abort_wait: Duration) -> Self {
103 Self {
104 graceful_timeout,
105 abort_wait,
106 }
107 }
108}
109
110/// Health behavior attached to a child.
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
112pub struct HealthPolicy {
113 /// Expected heartbeat interval.
114 pub heartbeat_interval: Duration,
115 /// Maximum age for the last heartbeat before the child is stale.
116 pub stale_after: Duration,
117}
118
119impl HealthPolicy {
120 /// Creates a health policy.
121 ///
122 /// # Arguments
123 ///
124 /// - `heartbeat_interval`: Expected heartbeat interval.
125 /// - `stale_after`: Maximum heartbeat age.
126 ///
127 /// # Returns
128 ///
129 /// Returns a [`HealthPolicy`] value.
130 pub fn new(heartbeat_interval: Duration, stale_after: Duration) -> Self {
131 Self {
132 heartbeat_interval,
133 stale_after,
134 }
135 }
136}
137
138/// Health check configuration for a child declaration.
139#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
140pub struct HealthCheckConfig {
141 /// Interval between health checks in seconds.
142 pub check_interval_secs: u64,
143 /// Timeout for each health check in seconds.
144 pub timeout_secs: u64,
145 /// Maximum retries before marking the child as unhealthy.
146 pub max_retries: u32,
147}
148
149impl Default for HealthCheckConfig {
150 /// Returns the default health check config: 10s interval, 5s timeout, 3 retries.
151 fn default() -> Self {
152 Self {
153 check_interval_secs: 10,
154 timeout_secs: 5,
155 max_retries: 3,
156 }
157 }
158}
159
160/// Readiness check configuration for a child declaration.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
162pub struct ReadinessConfig {
163 /// Interval between readiness checks in seconds.
164 pub check_interval_secs: u64,
165 /// Timeout for each readiness check in seconds.
166 pub timeout_secs: u64,
167}
168
169impl Default for ReadinessConfig {
170 /// Returns the default readiness config: 5s interval, 3s timeout.
171 fn default() -> Self {
172 Self {
173 check_interval_secs: 5,
174 timeout_secs: 3,
175 }
176 }
177}
178
179/// Resource limits for a child process.
180#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
181pub struct ResourceLimits {
182 /// Maximum memory in megabytes.
183 pub max_memory_mb: Option<u64>,
184 /// Maximum CPU usage as a percentage.
185 pub max_cpu_percent: Option<u8>,
186 /// Maximum number of open file descriptors.
187 pub max_file_descriptors: Option<u64>,
188}
189
190/// Command permissions granted to a child.
191#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
192pub struct CommandPermissions {
193 /// Whether the child may trigger supervisor shutdown.
194 pub allow_shutdown: bool,
195 /// Whether the child may request its own restart.
196 pub allow_restart: bool,
197 /// Signals the child is allowed to send.
198 pub allowed_signals: Vec<String>,
199}
200
201impl Default for CommandPermissions {
202 /// Returns the default command permissions: no shutdown, no restart, SIGTERM only.
203 fn default() -> Self {
204 Self {
205 allow_shutdown: false,
206 allow_restart: false,
207 allowed_signals: vec!["SIGTERM".to_string()],
208 }
209 }
210}
211
212/// Environment variable for a child.
213#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
214pub struct EnvVar {
215 /// Environment variable name.
216 pub name: String,
217 /// Plain-text value (mutually exclusive with secret_ref).
218 pub value: Option<String>,
219 /// Secret reference in `${SECRET_NAME}` format (mutually exclusive with value).
220 pub secret_ref: Option<String>,
221}
222
223/// Secret reference for a child.
224#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
225pub struct SecretRef {
226 /// Secret name used as an identifier.
227 pub name: String,
228 /// Key path within the vault.
229 pub key: String,
230 /// Whether the secret is required (vault offline treated as rejection when true).
231 pub required: bool,
232}
233
234/// Backoff behavior attached to a child.
235#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, JsonSchema)]
236pub struct BackoffPolicy {
237 /// Initial delay before the first restart.
238 pub initial_delay: Duration,
239 /// Maximum restart delay.
240 pub max_delay: Duration,
241 /// Jitter ratio between zero and one.
242 pub jitter_ratio: f64,
243}
244
245impl BackoffPolicy {
246 /// Creates a backoff policy.
247 ///
248 /// # Arguments
249 ///
250 /// - `initial_delay`: Initial restart delay.
251 /// - `max_delay`: Maximum restart delay.
252 /// - `jitter_ratio`: Jitter ratio between zero and one.
253 ///
254 /// # Returns
255 ///
256 /// Returns a [`BackoffPolicy`] value.
257 pub fn new(initial_delay: Duration, max_delay: Duration, jitter_ratio: f64) -> Self {
258 Self {
259 initial_delay,
260 max_delay,
261 jitter_ratio,
262 }
263 }
264}
265
266/// Declarative specification for a child task or nested supervisor.
267#[derive(Clone, Serialize, Deserialize, JsonSchema)]
268pub struct ChildSpec {
269 /// Stable child identifier.
270 pub id: ChildId,
271 /// Human-readable child name.
272 pub name: String,
273 /// Child task kind.
274 pub kind: TaskKind,
275 /// Optional factory for worker children.
276 #[serde(skip)]
277 #[schemars(skip)]
278 pub factory: Option<Arc<dyn TaskFactory>>,
279 /// Restart policy for this child.
280 pub restart_policy: RestartPolicy,
281 /// Shutdown policy for this child.
282 pub shutdown_policy: ShutdownPolicy,
283 /// Health policy for this child.
284 pub health_policy: HealthPolicy,
285 /// Readiness policy for this child.
286 pub readiness_policy: ReadinessPolicy,
287 /// Backoff policy for this child.
288 pub backoff_policy: BackoffPolicy,
289 /// Child identifiers that must become ready before this child starts.
290 pub dependencies: Vec<ChildId>,
291 /// Low-cardinality tags used for grouping and diagnostics.
292 pub tags: Vec<String>,
293 /// Criticality used by parent policy decisions.
294 pub criticality: Criticality,
295 /// Optional role that selects default lifecycle policy semantics.
296 #[serde(default)]
297 pub task_role: Option<TaskRole>,
298 /// Optional sidecar binding used when the role is [`TaskRole::Sidecar`].
299 #[serde(default)]
300 pub sidecar_config: Option<SidecarConfig>,
301 /// Optional explicit severity classification that overrides the role default (US3).
302 #[serde(default)]
303 pub severity: Option<SeverityClass>,
304 /// Optional group name for group-level isolation and budget tracking (US2).
305 #[serde(default)]
306 pub group: Option<String>,
307 /// Optional health check configuration.
308 #[serde(default)]
309 pub health_check: Option<HealthCheckConfig>,
310 /// Optional readiness check configuration.
311 #[serde(default)]
312 pub readiness: Option<ReadinessConfig>,
313 /// Optional resource limits.
314 #[serde(default)]
315 pub resource_limits: Option<ResourceLimits>,
316 /// Command permissions granted to this child.
317 #[serde(default)]
318 pub command_permissions: CommandPermissions,
319 /// Environment variables for this child.
320 #[serde(default)]
321 pub environment: Vec<EnvVar>,
322 /// Secret references for this child.
323 #[serde(default)]
324 pub secrets: Vec<SecretRef>,
325}
326
327impl Debug for ChildSpec {
328 /// Formats the child specification without printing the task factory.
329 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
330 formatter
331 .debug_struct("ChildSpec")
332 .field("id", &self.id)
333 .field("name", &self.name)
334 .field("kind", &self.kind)
335 .field("restart_policy", &self.restart_policy)
336 .field("shutdown_policy", &self.shutdown_policy)
337 .field("health_policy", &self.health_policy)
338 .field("readiness_policy", &self.readiness_policy)
339 .field("backoff_policy", &self.backoff_policy)
340 .field("dependencies", &self.dependencies)
341 .field("tags", &self.tags)
342 .field("criticality", &self.criticality)
343 .field("task_role", &self.task_role)
344 .field("sidecar_config", &self.sidecar_config)
345 .field("severity", &self.severity)
346 .field("group", &self.group)
347 .field("health_check", &self.health_check)
348 .field("readiness", &self.readiness)
349 .field("resource_limits", &self.resource_limits)
350 .field("command_permissions", &self.command_permissions)
351 .field("environment", &self.environment)
352 .field("secrets", &self.secrets)
353 .finish()
354 }
355}
356
357impl ChildSpec {
358 /// Creates a worker child specification.
359 ///
360 /// # Arguments
361 ///
362 /// - `id`: Stable child identifier.
363 /// - `name`: Human-readable child name.
364 /// - `kind`: Worker task kind.
365 /// - `factory`: Task factory used to build each child_start_count.
366 ///
367 /// # Returns
368 ///
369 /// Returns a [`ChildSpec`] with conservative policy values.
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// let factory = rust_supervisor::task::factory::service_fn(|_ctx| async {
375 /// rust_supervisor::task::factory::TaskResult::Succeeded
376 /// });
377 /// let spec = rust_supervisor::spec::child::ChildSpec::worker(
378 /// rust_supervisor::id::types::ChildId::new("worker"),
379 /// "worker",
380 /// rust_supervisor::spec::child::TaskKind::AsyncWorker,
381 /// std::sync::Arc::new(factory),
382 /// );
383 /// assert_eq!(spec.name, "worker");
384 /// ```
385 pub fn worker(
386 id: ChildId,
387 name: impl Into<String>,
388 kind: TaskKind,
389 factory: Arc<dyn TaskFactory>,
390 ) -> Self {
391 Self {
392 id,
393 name: name.into(),
394 kind,
395 factory: Some(factory),
396 restart_policy: RestartPolicy::Transient,
397 shutdown_policy: ShutdownPolicy::new(Duration::from_secs(5), Duration::from_secs(1)),
398 health_policy: HealthPolicy::new(Duration::from_secs(1), Duration::from_secs(3)),
399 readiness_policy: ReadinessPolicy::Immediate,
400 backoff_policy: BackoffPolicy::new(
401 Duration::from_millis(10),
402 Duration::from_secs(1),
403 0.0,
404 ),
405 dependencies: Vec::new(),
406 tags: Vec::new(),
407 criticality: Criticality::Critical,
408 task_role: Some(TaskRole::Worker),
409 sidecar_config: None,
410 severity: None,
411 group: None,
412 health_check: None,
413 readiness: None,
414 resource_limits: None,
415 command_permissions: CommandPermissions::default(),
416 environment: Vec::new(),
417 secrets: Vec::new(),
418 }
419 }
420
421 /// Validates local child specification invariants.
422 ///
423 /// # Arguments
424 ///
425 /// This function has no arguments.
426 ///
427 /// # Returns
428 ///
429 /// Returns `Ok(())` when the child can be registered.
430 pub fn validate(&self) -> Result<(), SupervisorError> {
431 validate_non_empty(&self.id.value, "child id")?;
432 validate_non_empty(&self.name, "child name")?;
433 validate_tags(&self.tags)?;
434 validate_backoff(self.backoff_policy)?;
435 validate_factory(self.kind, self.factory.is_some())?;
436 validate_sidecar_local(self)
437 }
438}
439
440/// Validates a non-empty string invariant.
441///
442/// # Arguments
443///
444/// - `value`: String value being validated.
445/// - `label`: Field label used in the error message.
446///
447/// # Returns
448///
449/// Returns `Ok(())` when the string is not empty.
450fn validate_non_empty(value: &str, label: &str) -> Result<(), SupervisorError> {
451 if value.trim().is_empty() {
452 Err(SupervisorError::fatal_config(format!(
453 "{label} must not be empty"
454 )))
455 } else {
456 Ok(())
457 }
458}
459
460/// Validates tag invariants.
461///
462/// # Arguments
463///
464/// - `tags`: Tags attached to the child.
465///
466/// # Returns
467///
468/// Returns `Ok(())` when every tag is non-empty.
469fn validate_tags(tags: &[String]) -> Result<(), SupervisorError> {
470 for tag in tags {
471 validate_non_empty(tag, "child tag")?;
472 }
473 Ok(())
474}
475
476/// Validates backoff invariants.
477///
478/// # Arguments
479///
480/// - `policy`: Backoff policy attached to the child.
481///
482/// # Returns
483///
484/// Returns `Ok(())` when delay and jitter values are valid.
485fn validate_backoff(policy: BackoffPolicy) -> Result<(), SupervisorError> {
486 if policy.initial_delay > policy.max_delay {
487 return Err(SupervisorError::fatal_config(
488 "initial backoff must not exceed max backoff",
489 ));
490 }
491 if !(0.0..=1.0).contains(&policy.jitter_ratio) {
492 return Err(SupervisorError::fatal_config(
493 "jitter ratio must be between zero and one",
494 ));
495 }
496 Ok(())
497}
498
499/// Validates factory presence for the child kind.
500///
501/// # Arguments
502///
503/// - `kind`: Child task kind.
504/// - `has_factory`: Whether a factory was supplied.
505///
506/// # Returns
507///
508/// Returns `Ok(())` when factory presence matches the task kind.
509fn validate_factory(kind: TaskKind, has_factory: bool) -> Result<(), SupervisorError> {
510 match (kind, has_factory) {
511 (TaskKind::Supervisor, true) => Err(SupervisorError::fatal_config(
512 "supervisor child must not own a task factory",
513 )),
514 (TaskKind::AsyncWorker | TaskKind::BlockingWorker, false) => Err(
515 SupervisorError::fatal_config("worker child requires a task factory"),
516 ),
517 _ => Ok(()),
518 }
519}
520
521/// Validates local sidecar fields without inspecting sibling children.
522///
523/// # Arguments
524///
525/// - `child`: Child specification to validate.
526///
527/// # Returns
528///
529/// Returns `Ok(())` when the local sidecar declaration is coherent.
530fn validate_sidecar_local(child: &ChildSpec) -> Result<(), SupervisorError> {
531 match (child.task_role, child.sidecar_config.as_ref()) {
532 (Some(TaskRole::Sidecar), None) => Err(SupervisorError::fatal_config(
533 "sidecar task_role requires sidecar_config",
534 )),
535 (role, Some(_)) if role != Some(TaskRole::Sidecar) => Err(SupervisorError::fatal_config(
536 "sidecar_config requires sidecar task_role",
537 )),
538 _ => Ok(()),
539 }
540}