use confique::Config;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use crate::id::types::ChildId;
use crate::policy::budget as runtime_budget;
use crate::policy::failure_window as runtime_failure_window;
use crate::policy::group::{GroupDependencyEdge, PropagationPolicy};
use crate::policy::meltdown::MeltdownPolicy;
use crate::policy::task_role_defaults::{SeverityClass, TaskRole};
use crate::spec::supervisor::{
ChildStrategyOverride, DynamicSupervisorPolicy, EscalationPolicy,
GroupConfig as RuntimeGroupConfig, GroupStrategy, RestartLimit, SupervisionStrategy,
};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
pub struct RestartBudgetConfig {
#[config(default = 60)]
#[serde(default = "default_restart_budget_window_secs")]
pub window_secs: u64,
#[config(default = 10)]
#[serde(default = "default_restart_budget_max_burst")]
pub max_burst: u32,
#[config(default = 0.5)]
#[serde(default = "default_restart_budget_recovery_rate")]
pub recovery_rate_per_sec: f64,
}
impl RestartBudgetConfig {
pub fn to_runtime(&self) -> runtime_budget::RestartBudgetConfig {
runtime_budget::RestartBudgetConfig::new(
Duration::from_secs(self.window_secs),
self.max_burst,
self.recovery_rate_per_sec,
)
}
}
impl Default for RestartBudgetConfig {
fn default() -> Self {
Self {
window_secs: default_restart_budget_window_secs(),
max_burst: default_restart_budget_max_burst(),
recovery_rate_per_sec: default_restart_budget_recovery_rate(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum FailureWindowMode {
TimeSliding,
CountSliding,
}
impl Default for FailureWindowMode {
fn default() -> Self {
Self::TimeSliding
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct FailureWindowConfig {
#[config(default = "time_sliding")]
#[serde(default)]
pub mode: FailureWindowMode,
#[config(default = 60)]
#[serde(default = "default_failure_window_secs")]
pub window_secs: u64,
#[config(default = 5)]
#[serde(default = "default_failure_window_max_count")]
pub max_count: usize,
#[config(default = 5)]
#[serde(default = "default_failure_window_threshold")]
pub threshold: usize,
}
impl FailureWindowConfig {
pub fn to_runtime(&self) -> runtime_failure_window::FailureWindowConfig {
match self.mode {
FailureWindowMode::TimeSliding => {
runtime_failure_window::FailureWindowConfig::time_sliding(
self.window_secs,
self.threshold,
)
}
FailureWindowMode::CountSliding => {
runtime_failure_window::FailureWindowConfig::count_sliding(
self.max_count,
self.threshold,
)
}
}
}
}
impl Default for FailureWindowConfig {
fn default() -> Self {
Self {
mode: FailureWindowMode::default(),
window_secs: default_failure_window_secs(),
max_count: default_failure_window_max_count(),
threshold: default_failure_window_threshold(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct MeltdownConfig {
#[config(default = 3)]
#[serde(default = "default_meltdown_child_max_restarts")]
pub child_max_restarts: u32,
#[config(default = 10)]
#[serde(default = "default_meltdown_child_window_secs")]
pub child_window_secs: u64,
#[config(default = 5)]
#[serde(default = "default_meltdown_group_max_failures")]
pub group_max_failures: u32,
#[config(default = 30)]
#[serde(default = "default_meltdown_group_window_secs")]
pub group_window_secs: u64,
#[config(default = 10)]
#[serde(default = "default_meltdown_supervisor_max_failures")]
pub supervisor_max_failures: u32,
#[config(default = 60)]
#[serde(default = "default_meltdown_supervisor_window_secs")]
pub supervisor_window_secs: u64,
#[config(default = 120)]
#[serde(default = "default_meltdown_reset_after_secs")]
pub reset_after_secs: u64,
}
impl MeltdownConfig {
pub fn to_runtime(&self) -> MeltdownPolicy {
MeltdownPolicy::new(
self.child_max_restarts,
Duration::from_secs(self.child_window_secs),
self.group_max_failures,
Duration::from_secs(self.group_window_secs),
self.supervisor_max_failures,
Duration::from_secs(self.supervisor_window_secs),
Duration::from_secs(self.reset_after_secs),
)
}
}
impl Default for MeltdownConfig {
fn default() -> Self {
Self {
child_max_restarts: default_meltdown_child_max_restarts(),
child_window_secs: default_meltdown_child_window_secs(),
group_max_failures: default_meltdown_group_max_failures(),
group_window_secs: default_meltdown_group_window_secs(),
supervisor_max_failures: default_meltdown_supervisor_max_failures(),
supervisor_window_secs: default_meltdown_supervisor_window_secs(),
reset_after_secs: default_meltdown_reset_after_secs(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct SupervisionPipelineConfig {
#[config(default = 100)]
#[serde(default = "default_pipeline_journal_capacity")]
pub journal_capacity: usize,
#[config(default = 10)]
#[serde(default = "default_pipeline_subscriber_capacity")]
pub subscriber_capacity: usize,
#[config(default = 5)]
#[serde(default = "default_concurrent_restart_limit")]
pub concurrent_restart_limit: u32,
}
impl Default for SupervisionPipelineConfig {
fn default() -> Self {
Self {
journal_capacity: default_pipeline_journal_capacity(),
subscriber_capacity: default_pipeline_subscriber_capacity(),
concurrent_restart_limit: default_concurrent_restart_limit(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct DynamicSupervisorConfig {
#[config(default = true)]
#[serde(default = "default_true")]
pub enabled: bool,
#[serde(default)]
pub child_limit: Option<usize>,
}
impl DynamicSupervisorConfig {
pub fn to_runtime(&self) -> DynamicSupervisorPolicy {
DynamicSupervisorPolicy {
enabled: self.enabled,
child_limit: self.child_limit,
}
}
}
impl Default for DynamicSupervisorConfig {
fn default() -> Self {
Self {
enabled: true,
child_limit: None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct RestartLimitConfig {
pub max_restarts: u32,
pub window_ms: u64,
}
impl RestartLimitConfig {
pub fn to_runtime(&self) -> RestartLimit {
RestartLimit::new(self.max_restarts, Duration::from_millis(self.window_ms))
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
pub struct GroupConfig {
pub name: String,
#[config(default = [])]
#[serde(default)]
pub children: Vec<String>,
#[serde(default)]
pub budget: Option<RestartBudgetConfig>,
}
impl GroupConfig {
pub fn to_runtime(&self) -> RuntimeGroupConfig {
RuntimeGroupConfig::new(
self.name.clone(),
self.children.iter().map(ChildId::new).collect(),
self.budget.as_ref().map(RestartBudgetConfig::to_runtime),
)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
pub struct GroupStrategyConfig {
pub group: String,
pub strategy: SupervisionStrategy,
#[serde(default)]
pub restart_limit: Option<RestartLimitConfig>,
#[serde(default)]
pub escalation_policy: Option<EscalationPolicy>,
}
impl GroupStrategyConfig {
pub fn to_runtime(&self) -> GroupStrategy {
let mut strategy = GroupStrategy::new(self.group.clone(), self.strategy);
strategy.restart_limit = self
.restart_limit
.as_ref()
.map(RestartLimitConfig::to_runtime);
strategy.escalation_policy = self.escalation_policy;
strategy
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Config, JsonSchema)]
pub struct ChildStrategyOverrideConfig {
pub child_id: String,
pub strategy: SupervisionStrategy,
#[serde(default)]
pub restart_limit: Option<RestartLimitConfig>,
#[serde(default)]
pub escalation_policy: Option<EscalationPolicy>,
}
impl ChildStrategyOverrideConfig {
pub fn to_runtime(&self) -> ChildStrategyOverride {
let mut override_config =
ChildStrategyOverride::new(ChildId::new(&self.child_id), self.strategy);
override_config.restart_limit = self
.restart_limit
.as_ref()
.map(RestartLimitConfig::to_runtime);
override_config.escalation_policy = self.escalation_policy;
override_config
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct GroupDependencyConfig {
pub from_group: String,
pub to_group: String,
pub propagation: PropagationPolicy,
}
impl GroupDependencyConfig {
pub fn to_runtime(&self) -> GroupDependencyEdge {
GroupDependencyEdge {
from_group: self.from_group.clone(),
to_group: self.to_group.clone(),
propagation: self.propagation,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Config, JsonSchema)]
pub struct SeverityDefaultConfig {
pub task_role: TaskRole,
pub severity: SeverityClass,
}
fn default_restart_budget_window_secs() -> u64 {
60
}
fn default_restart_budget_max_burst() -> u32 {
10
}
fn default_restart_budget_recovery_rate() -> f64 {
0.5
}
fn default_failure_window_secs() -> u64 {
60
}
fn default_failure_window_max_count() -> usize {
5
}
fn default_failure_window_threshold() -> usize {
5
}
fn default_meltdown_child_max_restarts() -> u32 {
3
}
fn default_meltdown_child_window_secs() -> u64 {
10
}
fn default_meltdown_group_max_failures() -> u32 {
5
}
fn default_meltdown_group_window_secs() -> u64 {
30
}
fn default_meltdown_supervisor_max_failures() -> u32 {
10
}
fn default_meltdown_supervisor_window_secs() -> u64 {
60
}
fn default_meltdown_reset_after_secs() -> u64 {
120
}
fn default_pipeline_journal_capacity() -> usize {
100
}
fn default_pipeline_subscriber_capacity() -> usize {
10
}
fn default_concurrent_restart_limit() -> u32 {
5
}
fn default_true() -> bool {
true
}