llm_shield_cloud/
observability.rs

1//! Cloud observability abstractions.
2//!
3//! Provides unified traits for metrics, logging, and tracing across cloud providers:
4//! - AWS: CloudWatch Metrics, CloudWatch Logs, X-Ray
5//! - GCP: Cloud Monitoring, Cloud Logging, Cloud Trace
6//! - Azure: Azure Monitor, Application Insights
7
8use crate::error::{CloudError, Result};
9use async_trait::async_trait;
10use std::collections::HashMap;
11use std::time::SystemTime;
12
13/// Log severity level.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum LogLevel {
16    /// Trace-level logging (most verbose).
17    Trace,
18    /// Debug-level logging.
19    Debug,
20    /// Info-level logging.
21    Info,
22    /// Warning-level logging.
23    Warn,
24    /// Error-level logging.
25    Error,
26    /// Fatal/critical-level logging.
27    Fatal,
28}
29
30impl LogLevel {
31    /// Converts log level to a string.
32    pub fn as_str(&self) -> &str {
33        match self {
34            LogLevel::Trace => "TRACE",
35            LogLevel::Debug => "DEBUG",
36            LogLevel::Info => "INFO",
37            LogLevel::Warn => "WARN",
38            LogLevel::Error => "ERROR",
39            LogLevel::Fatal => "FATAL",
40        }
41    }
42
43    /// Converts log level to numeric severity (higher = more severe).
44    pub fn to_severity(&self) -> u8 {
45        match self {
46            LogLevel::Trace => 0,
47            LogLevel::Debug => 1,
48            LogLevel::Info => 2,
49            LogLevel::Warn => 3,
50            LogLevel::Error => 4,
51            LogLevel::Fatal => 5,
52        }
53    }
54}
55
56/// Structured log entry.
57#[derive(Debug, Clone)]
58pub struct LogEntry {
59    /// Timestamp of the log entry.
60    pub timestamp: SystemTime,
61
62    /// Log severity level.
63    pub level: LogLevel,
64
65    /// Log message.
66    pub message: String,
67
68    /// Optional labels/tags for the log entry.
69    pub labels: HashMap<String, String>,
70
71    /// Optional trace context (for correlation).
72    pub trace_id: Option<String>,
73
74    /// Optional span context (for correlation).
75    pub span_id: Option<String>,
76
77    /// Source location (file, line).
78    pub source: Option<String>,
79}
80
81impl LogEntry {
82    /// Creates a new log entry with the given level and message.
83    pub fn new(level: LogLevel, message: impl Into<String>) -> Self {
84        Self {
85            timestamp: SystemTime::now(),
86            level,
87            message: message.into(),
88            labels: HashMap::new(),
89            trace_id: None,
90            span_id: None,
91            source: None,
92        }
93    }
94
95    /// Adds a label to the log entry.
96    pub fn with_label(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
97        self.labels.insert(key.into(), value.into());
98        self
99    }
100
101    /// Sets the trace ID for correlation.
102    pub fn with_trace_id(mut self, trace_id: impl Into<String>) -> Self {
103        self.trace_id = Some(trace_id.into());
104        self
105    }
106
107    /// Sets the span ID for correlation.
108    pub fn with_span_id(mut self, span_id: impl Into<String>) -> Self {
109        self.span_id = Some(span_id.into());
110        self
111    }
112}
113
114/// Metric data point.
115#[derive(Debug, Clone)]
116pub struct Metric {
117    /// Metric name.
118    pub name: String,
119
120    /// Metric value.
121    pub value: f64,
122
123    /// Timestamp of the metric.
124    pub timestamp: u64,
125
126    /// Metric dimensions/labels.
127    pub dimensions: HashMap<String, String>,
128
129    /// Metric unit (e.g., "Count", "Seconds", "Bytes").
130    pub unit: Option<String>,
131}
132
133impl Metric {
134    /// Creates a new metric with the given name and value.
135    pub fn new(name: impl Into<String>, value: f64) -> Self {
136        Self {
137            name: name.into(),
138            value,
139            timestamp: SystemTime::now()
140                .duration_since(std::time::UNIX_EPOCH)
141                .unwrap()
142                .as_secs(),
143            dimensions: HashMap::new(),
144            unit: None,
145        }
146    }
147
148    /// Adds a dimension to the metric.
149    pub fn with_dimension(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
150        self.dimensions.insert(key.into(), value.into());
151        self
152    }
153
154    /// Sets the metric unit.
155    pub fn with_unit(mut self, unit: impl Into<String>) -> Self {
156        self.unit = Some(unit.into());
157        self
158    }
159}
160
161/// Trace span representing a unit of work.
162#[derive(Debug, Clone)]
163pub struct Span {
164    /// Span name.
165    pub name: String,
166
167    /// Span ID (unique within a trace).
168    pub span_id: String,
169
170    /// Trace ID (groups related spans).
171    pub trace_id: String,
172
173    /// Parent span ID (if this is a child span).
174    pub parent_span_id: Option<String>,
175
176    /// When the span started.
177    pub start_time: SystemTime,
178
179    /// When the span ended (None if still active).
180    pub end_time: Option<SystemTime>,
181
182    /// Span attributes/tags.
183    pub attributes: HashMap<String, String>,
184
185    /// Span status (e.g., "OK", "ERROR").
186    pub status: Option<String>,
187}
188
189impl Span {
190    /// Creates a new span with the given name.
191    pub fn new(name: impl Into<String>, trace_id: impl Into<String>) -> Self {
192        Self {
193            name: name.into(),
194            span_id: uuid::Uuid::new_v4().to_string(),
195            trace_id: trace_id.into(),
196            parent_span_id: None,
197            start_time: SystemTime::now(),
198            end_time: None,
199            attributes: HashMap::new(),
200            status: None,
201        }
202    }
203
204    /// Sets the parent span ID.
205    pub fn with_parent(mut self, parent_span_id: impl Into<String>) -> Self {
206        self.parent_span_id = Some(parent_span_id.into());
207        self
208    }
209
210    /// Adds an attribute to the span.
211    pub fn with_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
212        self.attributes.insert(key.into(), value.into());
213        self
214    }
215
216    /// Ends the span with the given status.
217    pub fn end_with_status(mut self, status: impl Into<String>) -> Self {
218        self.end_time = Some(SystemTime::now());
219        self.status = Some(status.into());
220        self
221    }
222
223    /// Gets the span duration if ended.
224    pub fn duration(&self) -> Option<std::time::Duration> {
225        self.end_time
226            .and_then(|end| end.duration_since(self.start_time).ok())
227    }
228}
229
230/// Unified trait for cloud metrics export.
231#[async_trait]
232pub trait CloudMetrics: Send + Sync {
233    /// Exports a batch of metrics to the cloud provider.
234    ///
235    /// # Arguments
236    ///
237    /// * `metrics` - Slice of metrics to export
238    ///
239    /// # Errors
240    ///
241    /// Returns `CloudError::MetricsExport` if the export operation fails.
242    async fn export_metrics(&self, metrics: &[Metric]) -> Result<()>;
243
244    /// Exports a single metric.
245    ///
246    /// # Arguments
247    ///
248    /// * `metric` - The metric to export
249    ///
250    /// # Errors
251    ///
252    /// Returns `CloudError::MetricsExport` if the export operation fails.
253    async fn export_metric(&self, metric: &Metric) -> Result<()> {
254        self.export_metrics(&[metric.clone()]).await
255    }
256}
257
258/// Unified trait for cloud logging.
259#[async_trait]
260pub trait CloudLogger: Send + Sync {
261    /// Writes a simple log message.
262    ///
263    /// # Arguments
264    ///
265    /// * `message` - The log message
266    /// * `level` - The log level
267    ///
268    /// # Errors
269    ///
270    /// Returns `CloudError::LogWrite` if the write operation fails.
271    async fn log(&self, message: &str, level: LogLevel) -> Result<()>;
272
273    /// Writes a structured log entry.
274    ///
275    /// # Arguments
276    ///
277    /// * `entry` - The structured log entry
278    ///
279    /// # Errors
280    ///
281    /// Returns `CloudError::LogWrite` if the write operation fails.
282    async fn log_structured(&self, entry: &LogEntry) -> Result<()>;
283
284    /// Writes a batch of log entries.
285    ///
286    /// # Arguments
287    ///
288    /// * `entries` - Slice of log entries to write
289    ///
290    /// # Errors
291    ///
292    /// Returns `CloudError::LogWrite` if the write operation fails.
293    async fn log_batch(&self, entries: &[LogEntry]) -> Result<()> {
294        // Default implementation writes one by one
295        for entry in entries {
296            self.log_structured(entry).await?;
297        }
298        Ok(())
299    }
300}
301
302/// Unified trait for distributed tracing.
303#[async_trait]
304pub trait CloudTracer: Send + Sync {
305    /// Starts a new trace span.
306    ///
307    /// # Arguments
308    ///
309    /// * `name` - The span name
310    ///
311    /// # Returns
312    ///
313    /// Returns a new span with a generated trace ID.
314    fn start_span(&self, name: &str) -> Span {
315        Span::new(name, uuid::Uuid::new_v4().to_string())
316    }
317
318    /// Starts a child span.
319    ///
320    /// # Arguments
321    ///
322    /// * `name` - The span name
323    /// * `parent` - The parent span
324    ///
325    /// # Returns
326    ///
327    /// Returns a new child span.
328    fn start_child_span(&self, name: &str, parent: &Span) -> Span {
329        Span::new(name, parent.trace_id.clone())
330            .with_parent(parent.span_id.clone())
331    }
332
333    /// Ends a span and exports it.
334    ///
335    /// # Arguments
336    ///
337    /// * `span` - The span to end and export
338    ///
339    /// # Errors
340    ///
341    /// Returns `CloudError::TraceExport` if the export operation fails.
342    async fn end_span(&self, span: Span) -> Result<()>;
343
344    /// Exports a batch of spans.
345    ///
346    /// # Arguments
347    ///
348    /// * `spans` - Slice of spans to export
349    ///
350    /// # Errors
351    ///
352    /// Returns `CloudError::TraceExport` if the export operation fails.
353    async fn export_spans(&self, spans: &[Span]) -> Result<()> {
354        // Default implementation exports one by one
355        for span in spans {
356            self.end_span(span.clone()).await?;
357        }
358        Ok(())
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    #[test]
367    fn test_log_level_as_str() {
368        assert_eq!(LogLevel::Info.as_str(), "INFO");
369        assert_eq!(LogLevel::Error.as_str(), "ERROR");
370    }
371
372    #[test]
373    fn test_log_level_severity() {
374        assert!(LogLevel::Fatal.to_severity() > LogLevel::Error.to_severity());
375        assert!(LogLevel::Error.to_severity() > LogLevel::Warn.to_severity());
376        assert!(LogLevel::Warn.to_severity() > LogLevel::Info.to_severity());
377    }
378
379    #[test]
380    fn test_log_entry_builder() {
381        let entry = LogEntry::new(LogLevel::Info, "Test message")
382            .with_label("service", "llm-shield")
383            .with_trace_id("trace-123")
384            .with_span_id("span-456");
385
386        assert_eq!(entry.message, "Test message");
387        assert_eq!(entry.level, LogLevel::Info);
388        assert_eq!(entry.labels.get("service"), Some(&"llm-shield".to_string()));
389        assert_eq!(entry.trace_id, Some("trace-123".to_string()));
390        assert_eq!(entry.span_id, Some("span-456".to_string()));
391    }
392
393    #[test]
394    fn test_metric_builder() {
395        let metric = Metric::new("http_requests_total", 100.0)
396            .with_dimension("method", "POST")
397            .with_dimension("status", "200")
398            .with_unit("Count");
399
400        assert_eq!(metric.name, "http_requests_total");
401        assert_eq!(metric.value, 100.0);
402        assert_eq!(metric.dimensions.get("method"), Some(&"POST".to_string()));
403        assert_eq!(metric.unit, Some("Count".to_string()));
404    }
405
406    #[test]
407    fn test_span_creation() {
408        let span = Span::new("test_operation", "trace-abc")
409            .with_attribute("http.method", "GET")
410            .with_attribute("http.status_code", "200");
411
412        assert_eq!(span.name, "test_operation");
413        assert_eq!(span.trace_id, "trace-abc");
414        assert!(span.parent_span_id.is_none());
415        assert!(span.end_time.is_none());
416        assert_eq!(span.attributes.len(), 2);
417    }
418
419    #[test]
420    fn test_span_child() {
421        let parent = Span::new("parent", "trace-123");
422        let child = Span::new("child", parent.trace_id.clone())
423            .with_parent(parent.span_id.clone());
424
425        assert_eq!(child.trace_id, parent.trace_id);
426        assert_eq!(child.parent_span_id, Some(parent.span_id));
427    }
428
429    #[test]
430    fn test_span_duration() {
431        let span = Span::new("test", "trace-1");
432        assert!(span.duration().is_none());
433
434        let ended_span = span.end_with_status("OK");
435        assert!(ended_span.duration().is_some());
436        assert_eq!(ended_span.status, Some("OK".to_string()));
437    }
438}