Skip to main content

forge_core/observability/
alert.rs

1use std::str::FromStr;
2
3use serde::{Deserialize, Serialize};
4
5/// Alert severity level.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
7#[serde(rename_all = "lowercase")]
8pub enum AlertSeverity {
9    /// Informational alert.
10    Info,
11    /// Warning alert.
12    Warning,
13    /// Critical alert.
14    Critical,
15}
16
17/// Error for parsing AlertSeverity from string.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct ParseAlertSeverityError(pub String);
20
21impl std::fmt::Display for ParseAlertSeverityError {
22    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23        write!(f, "invalid alert severity: {}", self.0)
24    }
25}
26
27impl std::error::Error for ParseAlertSeverityError {}
28
29impl FromStr for AlertSeverity {
30    type Err = ParseAlertSeverityError;
31
32    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
33        match s.to_lowercase().as_str() {
34            "info" | "informational" => Ok(Self::Info),
35            "warning" | "warn" => Ok(Self::Warning),
36            "critical" | "error" => Ok(Self::Critical),
37            _ => Err(ParseAlertSeverityError(s.to_string())),
38        }
39    }
40}
41
42impl std::fmt::Display for AlertSeverity {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::Info => write!(f, "info"),
46            Self::Warning => write!(f, "warning"),
47            Self::Critical => write!(f, "critical"),
48        }
49    }
50}
51
52/// Alert status.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "lowercase")]
55pub enum AlertStatus {
56    /// Alert is inactive.
57    Inactive,
58    /// Alert is pending (condition met, waiting for duration).
59    Pending,
60    /// Alert is firing.
61    Firing,
62    /// Alert was resolved.
63    Resolved,
64}
65
66impl std::fmt::Display for AlertStatus {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            Self::Inactive => write!(f, "inactive"),
70            Self::Pending => write!(f, "pending"),
71            Self::Firing => write!(f, "firing"),
72            Self::Resolved => write!(f, "resolved"),
73        }
74    }
75}
76
77/// Alert condition expression.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct AlertCondition {
80    /// Condition expression (e.g., "rate(forge_http_errors[5m]) > 0.05").
81    pub expression: String,
82    /// Duration the condition must be true before firing.
83    pub for_duration: std::time::Duration,
84}
85
86impl AlertCondition {
87    /// Create a new condition.
88    pub fn new(expression: impl Into<String>, for_duration: std::time::Duration) -> Self {
89        Self {
90            expression: expression.into(),
91            for_duration,
92        }
93    }
94
95    /// Create a condition that fires immediately.
96    pub fn immediate(expression: impl Into<String>) -> Self {
97        Self::new(expression, std::time::Duration::ZERO)
98    }
99}
100
101/// Alert state tracking.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct AlertState {
104    /// Current status.
105    pub status: AlertStatus,
106    /// When the condition first became true.
107    pub pending_since: Option<chrono::DateTime<chrono::Utc>>,
108    /// When the alert started firing.
109    pub firing_since: Option<chrono::DateTime<chrono::Utc>>,
110    /// When the alert was last resolved.
111    pub resolved_at: Option<chrono::DateTime<chrono::Utc>>,
112    /// Last evaluation time.
113    pub last_evaluation: Option<chrono::DateTime<chrono::Utc>>,
114    /// Last evaluation result.
115    pub last_value: Option<f64>,
116}
117
118impl Default for AlertState {
119    fn default() -> Self {
120        Self {
121            status: AlertStatus::Inactive,
122            pending_since: None,
123            firing_since: None,
124            resolved_at: None,
125            last_evaluation: None,
126            last_value: None,
127        }
128    }
129}
130
131impl AlertState {
132    /// Transition to pending state.
133    pub fn set_pending(&mut self) {
134        if self.status != AlertStatus::Pending && self.status != AlertStatus::Firing {
135            self.status = AlertStatus::Pending;
136            self.pending_since = Some(chrono::Utc::now());
137        }
138    }
139
140    /// Transition to firing state.
141    pub fn set_firing(&mut self) {
142        if self.status != AlertStatus::Firing {
143            self.status = AlertStatus::Firing;
144            self.firing_since = Some(chrono::Utc::now());
145        }
146    }
147
148    /// Transition to resolved state.
149    pub fn set_resolved(&mut self) {
150        if self.status == AlertStatus::Firing || self.status == AlertStatus::Pending {
151            self.status = AlertStatus::Resolved;
152            self.resolved_at = Some(chrono::Utc::now());
153            self.pending_since = None;
154            self.firing_since = None;
155        }
156    }
157
158    /// Transition to inactive state.
159    pub fn set_inactive(&mut self) {
160        self.status = AlertStatus::Inactive;
161        self.pending_since = None;
162        self.firing_since = None;
163    }
164
165    /// Update after evaluation.
166    pub fn update_evaluation(&mut self, value: f64) {
167        self.last_evaluation = Some(chrono::Utc::now());
168        self.last_value = Some(value);
169    }
170
171    /// Check if the alert should transition from pending to firing.
172    pub fn should_fire(&self, for_duration: std::time::Duration) -> bool {
173        if self.status != AlertStatus::Pending {
174            return false;
175        }
176
177        if let Some(pending_since) = self.pending_since {
178            let elapsed = chrono::Utc::now() - pending_since;
179            return elapsed >= chrono::Duration::from_std(for_duration).unwrap();
180        }
181
182        false
183    }
184}
185
186/// Alert definition.
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct Alert {
189    /// Alert name.
190    pub name: String,
191    /// Alert condition.
192    pub condition: AlertCondition,
193    /// Alert severity.
194    pub severity: AlertSeverity,
195    /// Notification channels.
196    pub notify: Vec<String>,
197    /// Alert description.
198    pub description: Option<String>,
199    /// Current state.
200    pub state: AlertState,
201}
202
203impl Alert {
204    /// Create a new alert.
205    pub fn new(
206        name: impl Into<String>,
207        condition: AlertCondition,
208        severity: AlertSeverity,
209    ) -> Self {
210        Self {
211            name: name.into(),
212            condition,
213            severity,
214            notify: Vec::new(),
215            description: None,
216            state: AlertState::default(),
217        }
218    }
219
220    /// Add a notification channel.
221    pub fn with_notify(mut self, channel: impl Into<String>) -> Self {
222        self.notify.push(channel.into());
223        self
224    }
225
226    /// Set the description.
227    pub fn with_description(mut self, description: impl Into<String>) -> Self {
228        self.description = Some(description.into());
229        self
230    }
231
232    /// Check if the alert is currently firing.
233    pub fn is_firing(&self) -> bool {
234        self.state.status == AlertStatus::Firing
235    }
236
237    /// Check if the alert needs notification.
238    pub fn needs_notification(&self) -> bool {
239        self.is_firing() && !self.notify.is_empty()
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn test_alert_severity_ordering() {
249        assert!(AlertSeverity::Info < AlertSeverity::Warning);
250        assert!(AlertSeverity::Warning < AlertSeverity::Critical);
251    }
252
253    #[test]
254    fn test_alert_condition() {
255        let condition = AlertCondition::new(
256            "rate(errors[5m]) > 0.05",
257            std::time::Duration::from_secs(300),
258        );
259
260        assert_eq!(condition.expression, "rate(errors[5m]) > 0.05");
261        assert_eq!(condition.for_duration, std::time::Duration::from_secs(300));
262    }
263
264    #[test]
265    fn test_alert_state_transitions() {
266        let mut state = AlertState::default();
267        assert_eq!(state.status, AlertStatus::Inactive);
268
269        state.set_pending();
270        assert_eq!(state.status, AlertStatus::Pending);
271        assert!(state.pending_since.is_some());
272
273        state.set_firing();
274        assert_eq!(state.status, AlertStatus::Firing);
275        assert!(state.firing_since.is_some());
276
277        state.set_resolved();
278        assert_eq!(state.status, AlertStatus::Resolved);
279        assert!(state.resolved_at.is_some());
280    }
281
282    #[test]
283    fn test_alert_creation() {
284        let alert = Alert::new(
285            "high_error_rate",
286            AlertCondition::new(
287                "rate(errors[5m]) > 0.05",
288                std::time::Duration::from_secs(300),
289            ),
290            AlertSeverity::Critical,
291        )
292        .with_notify("slack:#alerts")
293        .with_description("Error rate exceeds 5%");
294
295        assert_eq!(alert.name, "high_error_rate");
296        assert_eq!(alert.severity, AlertSeverity::Critical);
297        assert_eq!(alert.notify, vec!["slack:#alerts"]);
298        assert!(!alert.is_firing());
299    }
300}