Skip to main content

nodedb_types/config/tuning/
scheduler.rs

1//! Scheduler tuning — caps on concurrent jobs, per-job wall-clock
2//! budget, and outbound HTTP timeouts applied by Event-Plane emitters.
3
4use serde::{Deserialize, Serialize};
5
6fn default_max_concurrent_jobs() -> usize {
7    32
8}
9
10fn default_job_timeout_secs() -> u64 {
11    // Most scheduled bodies are retention deletes or incremental MV
12    // refreshes. 300s gives ample headroom while preventing a runaway
13    // job from holding `Arc<SharedState>` across the shutdown deadline.
14    300
15}
16
17fn default_webhook_timeout_secs() -> u64 {
18    5
19}
20
21fn default_siem_webhook_timeout_secs() -> u64 {
22    10
23}
24
25fn default_otel_timeout_secs() -> u64 {
26    5
27}
28
29/// Tuning knobs for the Event-Plane scheduler and outbound HTTP emitters.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct SchedulerTuning {
32    /// Hard cap on concurrent in-flight scheduled jobs.
33    /// Jobs beyond the cap at a minute-boundary are rejected rather than
34    /// starving request-path Tokio workers with parallel SQL.
35    #[serde(default = "default_max_concurrent_jobs")]
36    pub max_concurrent_jobs: usize,
37
38    /// Wall-clock timeout applied to each scheduled job via
39    /// `ExecutionBudget`. Long-running SQL bodies that exceed this are
40    /// cancelled cooperatively at the next statement boundary.
41    #[serde(default = "default_job_timeout_secs")]
42    pub job_timeout_secs: u64,
43
44    /// Timeout (seconds) for a single alert-webhook POST.
45    #[serde(default = "default_webhook_timeout_secs")]
46    pub webhook_timeout_secs: u64,
47
48    /// Timeout (seconds) for a SIEM-webhook POST.
49    #[serde(default = "default_siem_webhook_timeout_secs")]
50    pub siem_webhook_timeout_secs: u64,
51
52    /// Timeout (seconds) for an OTLP trace-span export.
53    #[serde(default = "default_otel_timeout_secs")]
54    pub otel_timeout_secs: u64,
55}
56
57impl Default for SchedulerTuning {
58    fn default() -> Self {
59        Self {
60            max_concurrent_jobs: default_max_concurrent_jobs(),
61            job_timeout_secs: default_job_timeout_secs(),
62            webhook_timeout_secs: default_webhook_timeout_secs(),
63            siem_webhook_timeout_secs: default_siem_webhook_timeout_secs(),
64            otel_timeout_secs: default_otel_timeout_secs(),
65        }
66    }
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn defaults() {
75        let t = SchedulerTuning::default();
76        assert_eq!(t.max_concurrent_jobs, 32);
77        assert_eq!(t.job_timeout_secs, 300);
78        assert_eq!(t.webhook_timeout_secs, 5);
79        assert_eq!(t.siem_webhook_timeout_secs, 10);
80        assert_eq!(t.otel_timeout_secs, 5);
81    }
82
83    #[test]
84    fn partial_override() {
85        let toml_str = r#"
86max_concurrent_jobs = 8
87webhook_timeout_secs = 15
88"#;
89        let t: SchedulerTuning = toml::from_str(toml_str).unwrap();
90        assert_eq!(t.max_concurrent_jobs, 8);
91        assert_eq!(t.webhook_timeout_secs, 15);
92        assert_eq!(t.siem_webhook_timeout_secs, 10);
93        assert_eq!(t.job_timeout_secs, 300);
94    }
95}