1use confique::Config;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use std::time::Duration;
11
12use crate::id::types::ChildId;
13use crate::policy::budget as runtime_budget;
14use crate::policy::failure_window as runtime_failure_window;
15use crate::policy::group::{GroupDependencyEdge, PropagationPolicy};
16use crate::policy::meltdown::MeltdownPolicy;
17use crate::policy::task_role_defaults::{SeverityClass, TaskRole};
18use crate::spec::supervisor::{
19 ChildStrategyOverride, DynamicSupervisorPolicy, EscalationPolicy,
20 GroupConfig as RuntimeGroupConfig, GroupStrategy, RestartLimit, SupervisionStrategy,
21};
22
23#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
25pub struct RestartBudgetConfig {
26 #[config(default = 60)]
28 #[serde(default = "default_restart_budget_window_secs")]
29 pub window_secs: u64,
30 #[config(default = 10)]
32 #[serde(default = "default_restart_budget_max_burst")]
33 pub max_burst: u32,
34 #[config(default = 0.5)]
36 #[serde(default = "default_restart_budget_recovery_rate")]
37 pub recovery_rate_per_sec: f64,
38}
39
40impl RestartBudgetConfig {
41 pub fn to_runtime(&self) -> runtime_budget::RestartBudgetConfig {
51 runtime_budget::RestartBudgetConfig::new(
52 Duration::from_secs(self.window_secs),
53 self.max_burst,
54 self.recovery_rate_per_sec,
55 )
56 }
57}
58
59impl Default for RestartBudgetConfig {
60 fn default() -> Self {
62 Self {
63 window_secs: default_restart_budget_window_secs(),
64 max_burst: default_restart_budget_max_burst(),
65 recovery_rate_per_sec: default_restart_budget_recovery_rate(),
66 }
67 }
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
72#[serde(rename_all = "snake_case")]
73pub enum FailureWindowMode {
74 TimeSliding,
76 CountSliding,
78}
79
80impl Default for FailureWindowMode {
81 fn default() -> Self {
83 Self::TimeSliding
84 }
85}
86
87#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
89pub struct FailureWindowConfig {
90 #[config(default = "time_sliding")]
92 #[serde(default)]
93 pub mode: FailureWindowMode,
94 #[config(default = 60)]
96 #[serde(default = "default_failure_window_secs")]
97 pub window_secs: u64,
98 #[config(default = 5)]
100 #[serde(default = "default_failure_window_max_count")]
101 pub max_count: usize,
102 #[config(default = 5)]
104 #[serde(default = "default_failure_window_threshold")]
105 pub threshold: usize,
106}
107
108impl FailureWindowConfig {
109 pub fn to_runtime(&self) -> runtime_failure_window::FailureWindowConfig {
119 match self.mode {
120 FailureWindowMode::TimeSliding => {
121 runtime_failure_window::FailureWindowConfig::time_sliding(
122 self.window_secs,
123 self.threshold,
124 )
125 }
126 FailureWindowMode::CountSliding => {
127 runtime_failure_window::FailureWindowConfig::count_sliding(
128 self.max_count,
129 self.threshold,
130 )
131 }
132 }
133 }
134}
135
136impl Default for FailureWindowConfig {
137 fn default() -> Self {
139 Self {
140 mode: FailureWindowMode::default(),
141 window_secs: default_failure_window_secs(),
142 max_count: default_failure_window_max_count(),
143 threshold: default_failure_window_threshold(),
144 }
145 }
146}
147
148#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
150pub struct MeltdownConfig {
151 #[config(default = 3)]
153 #[serde(default = "default_meltdown_child_max_restarts")]
154 pub child_max_restarts: u32,
155 #[config(default = 10)]
157 #[serde(default = "default_meltdown_child_window_secs")]
158 pub child_window_secs: u64,
159 #[config(default = 5)]
161 #[serde(default = "default_meltdown_group_max_failures")]
162 pub group_max_failures: u32,
163 #[config(default = 30)]
165 #[serde(default = "default_meltdown_group_window_secs")]
166 pub group_window_secs: u64,
167 #[config(default = 10)]
169 #[serde(default = "default_meltdown_supervisor_max_failures")]
170 pub supervisor_max_failures: u32,
171 #[config(default = 60)]
173 #[serde(default = "default_meltdown_supervisor_window_secs")]
174 pub supervisor_window_secs: u64,
175 #[config(default = 120)]
177 #[serde(default = "default_meltdown_reset_after_secs")]
178 pub reset_after_secs: u64,
179}
180
181impl MeltdownConfig {
182 pub fn to_runtime(&self) -> MeltdownPolicy {
192 MeltdownPolicy::new(
193 self.child_max_restarts,
194 Duration::from_secs(self.child_window_secs),
195 self.group_max_failures,
196 Duration::from_secs(self.group_window_secs),
197 self.supervisor_max_failures,
198 Duration::from_secs(self.supervisor_window_secs),
199 Duration::from_secs(self.reset_after_secs),
200 )
201 }
202}
203
204impl Default for MeltdownConfig {
205 fn default() -> Self {
207 Self {
208 child_max_restarts: default_meltdown_child_max_restarts(),
209 child_window_secs: default_meltdown_child_window_secs(),
210 group_max_failures: default_meltdown_group_max_failures(),
211 group_window_secs: default_meltdown_group_window_secs(),
212 supervisor_max_failures: default_meltdown_supervisor_max_failures(),
213 supervisor_window_secs: default_meltdown_supervisor_window_secs(),
214 reset_after_secs: default_meltdown_reset_after_secs(),
215 }
216 }
217}
218
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
221pub struct SupervisionPipelineConfig {
222 #[config(default = 100)]
224 #[serde(default = "default_pipeline_journal_capacity")]
225 pub journal_capacity: usize,
226 #[config(default = 10)]
228 #[serde(default = "default_pipeline_subscriber_capacity")]
229 pub subscriber_capacity: usize,
230 #[config(default = 5)]
232 #[serde(default = "default_concurrent_restart_limit")]
233 pub concurrent_restart_limit: u32,
234}
235
236impl Default for SupervisionPipelineConfig {
237 fn default() -> Self {
239 Self {
240 journal_capacity: default_pipeline_journal_capacity(),
241 subscriber_capacity: default_pipeline_subscriber_capacity(),
242 concurrent_restart_limit: default_concurrent_restart_limit(),
243 }
244 }
245}
246
247#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
249pub struct DynamicSupervisorConfig {
250 #[config(default = true)]
252 #[serde(default = "default_true")]
253 pub enabled: bool,
254 #[serde(default)]
256 pub child_limit: Option<usize>,
257}
258
259impl DynamicSupervisorConfig {
260 pub fn to_runtime(&self) -> DynamicSupervisorPolicy {
270 DynamicSupervisorPolicy {
271 enabled: self.enabled,
272 child_limit: self.child_limit,
273 }
274 }
275}
276
277impl Default for DynamicSupervisorConfig {
278 fn default() -> Self {
280 Self {
281 enabled: true,
282 child_limit: None,
283 }
284 }
285}
286
287#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
289pub struct RestartLimitConfig {
290 pub max_restarts: u32,
292 pub window_ms: u64,
294}
295
296impl RestartLimitConfig {
297 pub fn to_runtime(&self) -> RestartLimit {
307 RestartLimit::new(self.max_restarts, Duration::from_millis(self.window_ms))
308 }
309}
310
311#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
313pub struct GroupConfig {
314 pub name: String,
316 #[config(default = [])]
318 #[serde(default)]
319 pub children: Vec<String>,
320 #[serde(default)]
322 pub budget: Option<RestartBudgetConfig>,
323}
324
325impl GroupConfig {
326 pub fn to_runtime(&self) -> RuntimeGroupConfig {
336 RuntimeGroupConfig::new(
337 self.name.clone(),
338 self.children.iter().map(ChildId::new).collect(),
339 self.budget.as_ref().map(RestartBudgetConfig::to_runtime),
340 )
341 }
342}
343
344#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
346pub struct GroupStrategyConfig {
347 pub group: String,
349 pub strategy: SupervisionStrategy,
351 #[serde(default)]
353 pub restart_limit: Option<RestartLimitConfig>,
354 #[serde(default)]
356 pub escalation_policy: Option<EscalationPolicy>,
357}
358
359impl GroupStrategyConfig {
360 pub fn to_runtime(&self) -> GroupStrategy {
370 let mut strategy = GroupStrategy::new(self.group.clone(), self.strategy);
371 strategy.restart_limit = self
372 .restart_limit
373 .as_ref()
374 .map(RestartLimitConfig::to_runtime);
375 strategy.escalation_policy = self.escalation_policy;
376 strategy
377 }
378}
379
380#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
382pub struct ChildStrategyOverrideConfig {
383 pub child_id: String,
385 pub strategy: SupervisionStrategy,
387 #[serde(default)]
389 pub restart_limit: Option<RestartLimitConfig>,
390 #[serde(default)]
392 pub escalation_policy: Option<EscalationPolicy>,
393}
394
395impl ChildStrategyOverrideConfig {
396 pub fn to_runtime(&self) -> ChildStrategyOverride {
406 let mut override_config =
407 ChildStrategyOverride::new(ChildId::new(&self.child_id), self.strategy);
408 override_config.restart_limit = self
409 .restart_limit
410 .as_ref()
411 .map(RestartLimitConfig::to_runtime);
412 override_config.escalation_policy = self.escalation_policy;
413 override_config
414 }
415}
416
417#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
419pub struct GroupDependencyConfig {
420 pub from_group: String,
422 pub to_group: String,
424 pub propagation: PropagationPolicy,
426}
427
428impl GroupDependencyConfig {
429 pub fn to_runtime(&self) -> GroupDependencyEdge {
439 GroupDependencyEdge {
440 from_group: self.from_group.clone(),
441 to_group: self.to_group.clone(),
442 propagation: self.propagation,
443 }
444 }
445}
446
447#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
449pub struct SeverityDefaultConfig {
450 pub task_role: TaskRole,
452 pub severity: SeverityClass,
454}
455
456fn default_restart_budget_window_secs() -> u64 {
458 60
459}
460
461fn default_restart_budget_max_burst() -> u32 {
463 10
464}
465
466fn default_restart_budget_recovery_rate() -> f64 {
468 0.5
469}
470
471fn default_failure_window_secs() -> u64 {
473 60
474}
475
476fn default_failure_window_max_count() -> usize {
478 5
479}
480
481fn default_failure_window_threshold() -> usize {
483 5
484}
485
486fn default_meltdown_child_max_restarts() -> u32 {
488 3
489}
490
491fn default_meltdown_child_window_secs() -> u64 {
493 10
494}
495
496fn default_meltdown_group_max_failures() -> u32 {
498 5
499}
500
501fn default_meltdown_group_window_secs() -> u64 {
503 30
504}
505
506fn default_meltdown_supervisor_max_failures() -> u32 {
508 10
509}
510
511fn default_meltdown_supervisor_window_secs() -> u64 {
513 60
514}
515
516fn default_meltdown_reset_after_secs() -> u64 {
518 120
519}
520
521fn default_pipeline_journal_capacity() -> usize {
523 100
524}
525
526fn default_pipeline_subscriber_capacity() -> usize {
528 10
529}
530
531fn default_concurrent_restart_limit() -> u32 {
533 5
534}
535
536fn default_true() -> bool {
538 true
539}