Skip to main content

devops_models/models/
prometheus.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use crate::models::validation::{ConfigValidator, Diagnostic, Severity, YamlType};
4
5// ═══════════════════════════════════════════════════════════════════════════
6// Prometheus Configuration
7// ═══════════════════════════════════════════════════════════════════════════
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct PrometheusGlobal {
11    #[serde(default)]
12    pub scrape_interval: Option<String>,
13    #[serde(default)]
14    pub scrape_timeout: Option<String>,
15    #[serde(default)]
16    pub evaluation_interval: Option<String>,
17    #[serde(default)]
18    pub external_labels: HashMap<String, String>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct StaticConfig {
23    pub targets: Vec<String>,
24    #[serde(default)]
25    pub labels: HashMap<String, String>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct ScrapeConfig {
30    pub job_name: String,
31    #[serde(default)]
32    pub static_configs: Vec<StaticConfig>,
33    #[serde(default)]
34    pub scrape_interval: Option<String>,
35    #[serde(default)]
36    pub scrape_timeout: Option<String>,
37    #[serde(default)]
38    pub metrics_path: Option<String>,
39    #[serde(default)]
40    pub scheme: Option<String>,
41    #[serde(default)]
42    pub honor_labels: Option<bool>,
43    #[serde(default)]
44    pub honor_timestamps: Option<bool>,
45    #[serde(default)]
46    pub params: Option<serde_json::Value>,
47    #[serde(default)]
48    pub basic_auth: Option<serde_json::Value>,
49    #[serde(default)]
50    pub bearer_token: Option<String>,
51    #[serde(default)]
52    pub bearer_token_file: Option<String>,
53    #[serde(default)]
54    pub tls_config: Option<serde_json::Value>,
55    #[serde(default)]
56    pub relabel_configs: Vec<serde_json::Value>,
57    #[serde(default)]
58    pub metric_relabel_configs: Vec<serde_json::Value>,
59    // Service discovery
60    #[serde(default)]
61    pub kubernetes_sd_configs: Vec<serde_json::Value>,
62    #[serde(default)]
63    pub consul_sd_configs: Vec<serde_json::Value>,
64    #[serde(default)]
65    pub dns_sd_configs: Vec<serde_json::Value>,
66    #[serde(default)]
67    pub file_sd_configs: Vec<serde_json::Value>,
68    #[serde(default)]
69    pub ec2_sd_configs: Vec<serde_json::Value>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct AlertingAlertmanager {
74    #[serde(default)]
75    pub static_configs: Vec<StaticConfig>,
76    #[serde(default)]
77    pub scheme: Option<String>,
78    #[serde(default)]
79    pub path_prefix: Option<String>,
80    #[serde(default)]
81    pub timeout: Option<String>,
82    #[serde(default)]
83    pub tls_config: Option<serde_json::Value>,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct AlertingConfig {
88    #[serde(default)]
89    pub alertmanagers: Vec<AlertingAlertmanager>,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct PrometheusConfig {
94    #[serde(default)]
95    pub global: Option<PrometheusGlobal>,
96    #[serde(default)]
97    pub scrape_configs: Vec<ScrapeConfig>,
98    #[serde(default)]
99    pub rule_files: Vec<String>,
100    #[serde(default)]
101    pub alerting: Option<AlertingConfig>,
102    #[serde(default)]
103    pub remote_write: Vec<serde_json::Value>,
104    #[serde(default)]
105    pub remote_read: Vec<serde_json::Value>,
106    #[serde(default)]
107    pub storage: Option<serde_json::Value>,
108}
109
110impl PrometheusConfig {
111    pub fn from_value(data: serde_json::Value) -> Result<Self, String> {
112        serde_json::from_value(data)
113            .map_err(|e| format!("Failed to parse Prometheus config: {e}"))
114    }
115}
116
117/// Parse a Prometheus duration string to seconds (e.g., "30s", "1m", "5m")
118fn parse_duration_secs(s: &str) -> Option<u64> {
119    let s = s.trim();
120    if let Some(rest) = s.strip_suffix('s') {
121        return rest.parse().ok();
122    }
123    if let Some(rest) = s.strip_suffix('m') {
124        return rest.parse::<u64>().ok().map(|m| m * 60);
125    }
126    if let Some(rest) = s.strip_suffix('h') {
127        return rest.parse::<u64>().ok().map(|h| h * 3600);
128    }
129    if let Some(rest) = s.strip_suffix('d') {
130        return rest.parse::<u64>().ok().map(|d| d * 86400);
131    }
132    None
133}
134
135impl ConfigValidator for PrometheusConfig {
136    fn yaml_type(&self) -> YamlType { YamlType::Prometheus }
137
138    fn validate_structure(&self) -> Vec<Diagnostic> {
139        let mut diags = Vec::new();
140        if self.scrape_configs.is_empty() {
141            diags.push(Diagnostic {
142                severity: Severity::Error,
143                message: "No scrape_configs defined — Prometheus won't scrape any targets".into(),
144                path: Some("scrape_configs".into()),
145            });
146        }
147        // Check that job names are unique
148        let mut job_names: HashMap<&str, usize> = HashMap::new();
149        for sc in &self.scrape_configs {
150            *job_names.entry(&sc.job_name).or_insert(0) += 1;
151        }
152        for (name, count) in &job_names {
153            if *count > 1 {
154                diags.push(Diagnostic {
155                    severity: Severity::Error,
156                    message: format!("Duplicate job_name '{}' found {} times", name, count),
157                    path: Some("scrape_configs > job_name".into()),
158                });
159            }
160        }
161        diags
162    }
163
164    fn validate_semantics(&self) -> Vec<Diagnostic> {
165        let mut diags = Vec::new();
166
167        // Global scrape_interval check
168        if let Some(global) = &self.global {
169            if let Some(interval) = &global.scrape_interval
170                && let Some(secs) = parse_duration_secs(interval)
171                    && secs < 5 {
172                        diags.push(Diagnostic {
173                            severity: Severity::Warning,
174                            message: format!("global.scrape_interval='{}' is very aggressive — may overload targets", interval),
175                            path: Some("global > scrape_interval".into()),
176                        });
177                    }
178            if let Some(timeout) = &global.scrape_timeout
179                && let (Some(t_secs), Some(i_secs)) = (
180                    parse_duration_secs(timeout),
181                    global.scrape_interval.as_ref().and_then(|i| parse_duration_secs(i)),
182                )
183                    && t_secs > i_secs {
184                        diags.push(Diagnostic {
185                            severity: Severity::Warning,
186                            message: format!("global.scrape_timeout ({}) > scrape_interval ({}) — scrapes may overlap", timeout, global.scrape_interval.as_deref().unwrap_or("?")),
187                            path: Some("global > scrape_timeout".into()),
188                        });
189                    }
190        }
191
192        // Per-job checks
193        for sc in &self.scrape_configs {
194            if sc.static_configs.is_empty()
195                && sc.kubernetes_sd_configs.is_empty()
196                && sc.consul_sd_configs.is_empty()
197                && sc.dns_sd_configs.is_empty()
198                && sc.file_sd_configs.is_empty()
199                && sc.ec2_sd_configs.is_empty()
200            {
201                diags.push(Diagnostic {
202                    severity: Severity::Warning,
203                    message: format!("Job '{}': no targets or service discovery configured", sc.job_name),
204                    path: Some(format!("scrape_configs > {}", sc.job_name)),
205                });
206            }
207        }
208
209        // Alerting check
210        if self.alerting.is_none() && !self.rule_files.is_empty() {
211            diags.push(Diagnostic {
212                severity: Severity::Warning,
213                message: "rule_files defined but no alerting config — alerts won't be delivered".into(),
214                path: Some("alerting".into()),
215            });
216        }
217
218        diags
219    }
220}
221
222// ═══════════════════════════════════════════════════════════════════════════
223// Alertmanager Configuration
224// ═══════════════════════════════════════════════════════════════════════════
225
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct AlertmanagerGlobal {
228    #[serde(default)]
229    pub smtp_smarthost: Option<String>,
230    #[serde(default)]
231    pub smtp_from: Option<String>,
232    #[serde(default)]
233    pub smtp_auth_username: Option<String>,
234    #[serde(default)]
235    pub smtp_require_tls: Option<bool>,
236    #[serde(default)]
237    pub slack_api_url: Option<String>,
238    #[serde(default)]
239    pub pagerduty_url: Option<String>,
240    #[serde(default)]
241    pub resolve_timeout: Option<String>,
242}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct AlertmanagerRoute {
246    #[serde(default)]
247    pub receiver: Option<String>,
248    #[serde(default)]
249    pub group_by: Vec<String>,
250    #[serde(default)]
251    pub group_wait: Option<String>,
252    #[serde(default)]
253    pub group_interval: Option<String>,
254    #[serde(default)]
255    pub repeat_interval: Option<String>,
256    #[serde(default, rename = "match")]
257    pub match_labels: Option<HashMap<String, String>>,
258    #[serde(default)]
259    pub match_re: Option<HashMap<String, String>>,
260    #[serde(default)]
261    pub matchers: Vec<String>,
262    #[serde(default)]
263    pub routes: Vec<AlertmanagerRoute>,
264    #[serde(default, rename = "continue")]
265    pub continue_matching: Option<bool>,
266    #[serde(default)]
267    pub mute_time_intervals: Vec<String>,
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct AlertmanagerReceiver {
272    pub name: String,
273    #[serde(default)]
274    pub email_configs: Vec<serde_json::Value>,
275    #[serde(default)]
276    pub slack_configs: Vec<serde_json::Value>,
277    #[serde(default)]
278    pub pagerduty_configs: Vec<serde_json::Value>,
279    #[serde(default)]
280    pub webhook_configs: Vec<serde_json::Value>,
281    #[serde(default)]
282    pub opsgenie_configs: Vec<serde_json::Value>,
283    #[serde(default)]
284    pub victorops_configs: Vec<serde_json::Value>,
285    #[serde(default)]
286    pub pushover_configs: Vec<serde_json::Value>,
287}
288
289#[derive(Debug, Clone, Serialize, Deserialize)]
290pub struct AlertmanagerInhibitRule {
291    #[serde(default)]
292    pub source_match: Option<HashMap<String, String>>,
293    #[serde(default)]
294    pub source_match_re: Option<HashMap<String, String>>,
295    #[serde(default)]
296    pub source_matchers: Vec<String>,
297    #[serde(default)]
298    pub target_match: Option<HashMap<String, String>>,
299    #[serde(default)]
300    pub target_match_re: Option<HashMap<String, String>>,
301    #[serde(default)]
302    pub target_matchers: Vec<String>,
303    #[serde(default)]
304    pub equal: Vec<String>,
305}
306
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct AlertmanagerConfig {
309    #[serde(default)]
310    pub global: Option<AlertmanagerGlobal>,
311    pub route: AlertmanagerRoute,
312    #[serde(default)]
313    pub receivers: Vec<AlertmanagerReceiver>,
314    #[serde(default)]
315    pub inhibit_rules: Vec<AlertmanagerInhibitRule>,
316    #[serde(default)]
317    pub templates: Vec<String>,
318    #[serde(default)]
319    pub time_intervals: Vec<serde_json::Value>,
320    #[serde(default)]
321    pub mute_time_intervals: Vec<serde_json::Value>,
322}
323
324impl AlertmanagerConfig {
325    pub fn from_value(data: serde_json::Value) -> Result<Self, String> {
326        serde_json::from_value(data)
327            .map_err(|e| format!("Failed to parse Alertmanager config: {e}"))
328    }
329}
330
331impl ConfigValidator for AlertmanagerConfig {
332    fn yaml_type(&self) -> YamlType { YamlType::Alertmanager }
333
334    fn validate_structure(&self) -> Vec<Diagnostic> {
335        let mut diags = Vec::new();
336        if self.route.receiver.is_none() {
337            diags.push(Diagnostic {
338                severity: Severity::Error,
339                message: "Root route must have a 'receiver'".into(),
340                path: Some("route > receiver".into()),
341            });
342        }
343        if self.receivers.is_empty() {
344            diags.push(Diagnostic {
345                severity: Severity::Error,
346                message: "No receivers defined".into(),
347                path: Some("receivers".into()),
348            });
349        }
350        // Check that route.receiver references a defined receiver
351        let receiver_names: Vec<&str> = self.receivers.iter().map(|r| r.name.as_str()).collect();
352        if let Some(root_receiver) = &self.route.receiver
353            && !receiver_names.contains(&root_receiver.as_str()) {
354                diags.push(Diagnostic {
355                    severity: Severity::Error,
356                    message: format!("Root route receiver '{}' is not defined", root_receiver),
357                    path: Some("route > receiver".into()),
358                });
359            }
360        // Check sub-route receivers
361        check_route_receivers(&self.route, &receiver_names, &mut diags);
362        diags
363    }
364
365    fn validate_semantics(&self) -> Vec<Diagnostic> {
366        let mut diags = Vec::new();
367        // Check for "null" receivers (no notification configs)
368        for r in &self.receivers {
369            if r.email_configs.is_empty()
370                && r.slack_configs.is_empty()
371                && r.pagerduty_configs.is_empty()
372                && r.webhook_configs.is_empty()
373                && r.opsgenie_configs.is_empty()
374                && r.victorops_configs.is_empty()
375                && r.pushover_configs.is_empty()
376            {
377                diags.push(Diagnostic {
378                    severity: Severity::Warning,
379                    message: format!("Receiver '{}' has no notification channels configured — alerts will be silently dropped", r.name),
380                    path: Some(format!("receivers > {}", r.name)),
381                });
382            }
383        }
384        // group_by check
385        if self.route.group_by.is_empty() {
386            diags.push(Diagnostic {
387                severity: Severity::Info,
388                message: "Root route has no group_by — all alerts will be grouped together".into(),
389                path: Some("route > group_by".into()),
390            });
391        }
392        diags
393    }
394}
395
396fn check_route_receivers(route: &AlertmanagerRoute, receivers: &[&str], diags: &mut Vec<Diagnostic>) {
397    for sub in &route.routes {
398        if let Some(recv) = &sub.receiver
399            && !receivers.contains(&recv.as_str()) {
400                diags.push(Diagnostic {
401                    severity: Severity::Error,
402                    message: format!("Sub-route receiver '{}' is not defined", recv),
403                    path: Some("route > routes > receiver".into()),
404                });
405            }
406        check_route_receivers(sub, receivers, diags);
407    }
408}