Skip to main content

nodedb_types/config/tuning/
scheduler.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Scheduler tuning — caps on concurrent jobs, per-job wall-clock
4//! budget, and outbound HTTP timeouts applied by Event-Plane emitters.
5
6use serde::{Deserialize, Serialize};
7
8fn default_max_concurrent_jobs() -> usize {
9    32
10}
11
12fn default_job_timeout_secs() -> u64 {
13    // Most scheduled bodies are retention deletes or incremental MV
14    // refreshes. 300s gives ample headroom while preventing a runaway
15    // job from holding `Arc<SharedState>` across the shutdown deadline.
16    300
17}
18
19fn default_webhook_timeout_secs() -> u64 {
20    5
21}
22
23fn default_siem_webhook_timeout_secs() -> u64 {
24    10
25}
26
27fn default_otel_timeout_secs() -> u64 {
28    5
29}
30
31/// Tuning knobs for the Event-Plane scheduler and outbound HTTP emitters.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SchedulerTuning {
34    /// Hard cap on concurrent in-flight scheduled jobs.
35    /// Jobs beyond the cap at a minute-boundary are rejected rather than
36    /// starving request-path Tokio workers with parallel SQL.
37    #[serde(default = "default_max_concurrent_jobs")]
38    pub max_concurrent_jobs: usize,
39
40    /// Wall-clock timeout applied to each scheduled job via
41    /// `ExecutionBudget`. Long-running SQL bodies that exceed this are
42    /// cancelled cooperatively at the next statement boundary.
43    #[serde(default = "default_job_timeout_secs")]
44    pub job_timeout_secs: u64,
45
46    /// Timeout (seconds) for a single alert-webhook POST.
47    #[serde(default = "default_webhook_timeout_secs")]
48    pub webhook_timeout_secs: u64,
49
50    /// Timeout (seconds) for a SIEM-webhook POST.
51    #[serde(default = "default_siem_webhook_timeout_secs")]
52    pub siem_webhook_timeout_secs: u64,
53
54    /// Timeout (seconds) for an OTLP trace-span export.
55    #[serde(default = "default_otel_timeout_secs")]
56    pub otel_timeout_secs: u64,
57}
58
59impl Default for SchedulerTuning {
60    fn default() -> Self {
61        Self {
62            max_concurrent_jobs: default_max_concurrent_jobs(),
63            job_timeout_secs: default_job_timeout_secs(),
64            webhook_timeout_secs: default_webhook_timeout_secs(),
65            siem_webhook_timeout_secs: default_siem_webhook_timeout_secs(),
66            otel_timeout_secs: default_otel_timeout_secs(),
67        }
68    }
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    #[test]
76    fn defaults() {
77        let t = SchedulerTuning::default();
78        assert_eq!(t.max_concurrent_jobs, 32);
79        assert_eq!(t.job_timeout_secs, 300);
80        assert_eq!(t.webhook_timeout_secs, 5);
81        assert_eq!(t.siem_webhook_timeout_secs, 10);
82        assert_eq!(t.otel_timeout_secs, 5);
83    }
84
85    #[test]
86    fn partial_override() {
87        let toml_str = r#"
88max_concurrent_jobs = 8
89webhook_timeout_secs = 15
90"#;
91        let t: SchedulerTuning = toml::from_str(toml_str).unwrap();
92        assert_eq!(t.max_concurrent_jobs, 8);
93        assert_eq!(t.webhook_timeout_secs, 15);
94        assert_eq!(t.siem_webhook_timeout_secs, 10);
95        assert_eq!(t.job_timeout_secs, 300);
96    }
97}