pulseengine_mcp_logging/
structured.rs

1//! Enhanced structured logging for better observability
2//!
3//! This module provides comprehensive structured logging with:
4//! - Contextual field enrichment
5//! - Performance metrics
6//! - Error classification
7//! - Business logic tracing
8//! - System health indicators
9
10use crate::sanitization::get_sanitizer;
11use serde_json::Value;
12use std::collections::HashMap;
13use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
14use tracing::{error, info, warn};
15use uuid::Uuid;
16
17/// Structured logging context with rich observability fields
18#[derive(Debug, Clone)]
19pub struct StructuredContext {
20    /// Request tracking
21    pub request_id: String,
22    pub parent_request_id: Option<String>,
23    pub correlation_id: String,
24
25    /// Service identification
26    pub service_name: String,
27    pub service_version: String,
28    pub instance_id: String,
29
30    /// Request metadata
31    pub tool_name: String,
32    pub tool_version: Option<String>,
33    pub client_id: Option<String>,
34    pub user_agent: Option<String>,
35    pub session_id: Option<String>,
36
37    /// Performance metrics
38    pub start_time: Instant,
39    pub start_timestamp: u64,
40
41    /// Loxone-specific context
42    pub loxone_host: Option<String>,
43    pub loxone_version: Option<String>,
44    pub room_name: Option<String>,
45    pub device_uuid: Option<String>,
46    pub device_type: Option<String>,
47
48    /// Custom fields for business logic
49    pub custom_fields: HashMap<String, Value>,
50}
51
52impl StructuredContext {
53    /// Create a new structured context
54    pub fn new(tool_name: String) -> Self {
55        let now = Instant::now();
56        let timestamp = SystemTime::now()
57            .duration_since(UNIX_EPOCH)
58            .unwrap_or_default()
59            .as_secs();
60
61        Self {
62            request_id: generate_request_id(),
63            parent_request_id: None,
64            correlation_id: generate_correlation_id(),
65            service_name: "loxone-mcp-server".to_string(),
66            service_version: env!("CARGO_PKG_VERSION").to_string(),
67            instance_id: generate_instance_id(),
68            tool_name,
69            tool_version: None,
70            client_id: None,
71            user_agent: None,
72            session_id: None,
73            start_time: now,
74            start_timestamp: timestamp,
75            loxone_host: None,
76            loxone_version: None,
77            room_name: None,
78            device_uuid: None,
79            device_type: None,
80            custom_fields: HashMap::new(),
81        }
82    }
83
84    /// Create a child context for sub-operations
85    pub fn child(&self, operation: &str) -> Self {
86        let mut child = Self::new(format!("{}::{}", self.tool_name, operation));
87        child.parent_request_id = Some(self.request_id.clone());
88        child.correlation_id = self.correlation_id.clone();
89        child.client_id = self.client_id.clone();
90        child.user_agent = self.user_agent.clone();
91        child.session_id = self.session_id.clone();
92        child.loxone_host = self.loxone_host.clone();
93        child.loxone_version = self.loxone_version.clone();
94        child
95    }
96
97    /// Add Loxone-specific context
98    pub fn with_loxone_context(mut self, host: String, version: Option<String>) -> Self {
99        self.loxone_host = Some(host);
100        self.loxone_version = version;
101        self
102    }
103
104    /// Add device context
105    pub fn with_device_context(
106        mut self,
107        device_uuid: String,
108        device_type: Option<String>,
109        room_name: Option<String>,
110    ) -> Self {
111        self.device_uuid = Some(device_uuid);
112        self.device_type = device_type;
113        self.room_name = room_name;
114        self
115    }
116
117    /// Add client context
118    pub fn with_client_context(
119        mut self,
120        client_id: String,
121        user_agent: Option<String>,
122        session_id: Option<String>,
123    ) -> Self {
124        self.client_id = Some(client_id);
125        self.user_agent = user_agent;
126        self.session_id = session_id;
127        self
128    }
129
130    /// Add custom field
131    pub fn with_field<K: ToString, V: Into<Value>>(mut self, key: K, value: V) -> Self {
132        self.custom_fields.insert(key.to_string(), value.into());
133        self
134    }
135
136    /// Get elapsed time since context creation
137    pub fn elapsed(&self) -> Duration {
138        self.start_time.elapsed()
139    }
140
141    /// Get elapsed time in milliseconds
142    pub fn elapsed_ms(&self) -> u64 {
143        self.elapsed().as_millis() as u64
144    }
145}
146
147/// Error classification for better observability
148#[derive(Debug, Clone)]
149pub enum ErrorClass {
150    /// Client errors (4xx equivalent)
151    Client { error_type: String, retryable: bool },
152    /// Server errors (5xx equivalent)
153    Server { error_type: String, retryable: bool },
154    /// Network/communication errors
155    Network { error_type: String, timeout: bool },
156    /// Authentication/authorization errors
157    Auth { error_type: String },
158    /// Business logic errors
159    Business { error_type: String, domain: String },
160}
161
162impl ErrorClass {
163    /// Classify an error using the ErrorClassification trait
164    pub fn from_error<E: crate::ErrorClassification>(error: &E) -> Self {
165        if error.is_auth_error() {
166            Self::Auth {
167                error_type: error.error_type().to_string(),
168            }
169        } else if error.is_connection_error() {
170            Self::Network {
171                error_type: error.error_type().to_string(),
172                timeout: error.is_timeout(),
173            }
174        } else if error.is_timeout() {
175            Self::Network {
176                error_type: error.error_type().to_string(),
177                timeout: true,
178            }
179        } else if error.is_retryable() {
180            Self::Server {
181                error_type: error.error_type().to_string(),
182                retryable: true,
183            }
184        } else {
185            Self::Client {
186                error_type: error.error_type().to_string(),
187                retryable: false,
188            }
189        }
190    }
191}
192
193/// Enhanced structured logger
194pub struct StructuredLogger;
195
196impl StructuredLogger {
197    /// Log request start with comprehensive context
198    pub fn log_request_start(ctx: &StructuredContext, params: &Value) {
199        let sanitized_params = sanitize_value(params);
200
201        info!(
202            // Core request tracking
203            request_id = %ctx.request_id,
204            parent_request_id = ?ctx.parent_request_id,
205            correlation_id = %ctx.correlation_id,
206
207            // Service identification
208            service_name = %ctx.service_name,
209            service_version = %ctx.service_version,
210            instance_id = %ctx.instance_id,
211
212            // Request metadata
213            tool_name = %ctx.tool_name,
214            tool_version = ?ctx.tool_version,
215            client_id = ?ctx.client_id,
216            user_agent = ?ctx.user_agent,
217            session_id = ?ctx.session_id,
218
219            // Timing
220            start_timestamp = ctx.start_timestamp,
221
222            // Loxone context
223            loxone_host = ?ctx.loxone_host,
224            loxone_version = ?ctx.loxone_version,
225            room_name = ?ctx.room_name,
226            device_uuid = ?ctx.device_uuid,
227            device_type = ?ctx.device_type,
228
229            // Request data
230            params = ?sanitized_params,
231
232            // Custom fields
233            custom_fields = ?ctx.custom_fields,
234
235            "MCP request started"
236        );
237    }
238
239    /// Log request completion with performance metrics
240    pub fn log_request_end<E: crate::ErrorClassification>(
241        ctx: &StructuredContext,
242        success: bool,
243        error: Option<&E>,
244        response_size: Option<usize>,
245    ) {
246        let duration_ms = ctx.elapsed_ms();
247        let end_timestamp = SystemTime::now()
248            .duration_since(UNIX_EPOCH)
249            .unwrap_or_default()
250            .as_secs();
251
252        let error_class = error.map(ErrorClass::from_error);
253
254        if success {
255            info!(
256                // Core tracking
257                request_id = %ctx.request_id,
258                correlation_id = %ctx.correlation_id,
259                tool_name = %ctx.tool_name,
260
261                // Performance metrics
262                duration_ms = duration_ms,
263                start_timestamp = ctx.start_timestamp,
264                end_timestamp = end_timestamp,
265                response_size_bytes = ?response_size,
266
267                // Context
268                loxone_host = ?ctx.loxone_host,
269                room_name = ?ctx.room_name,
270                device_uuid = ?ctx.device_uuid,
271
272                "MCP request completed successfully"
273            );
274        } else {
275            error!(
276                // Core tracking
277                request_id = %ctx.request_id,
278                correlation_id = %ctx.correlation_id,
279                tool_name = %ctx.tool_name,
280
281                // Performance metrics
282                duration_ms = duration_ms,
283                start_timestamp = ctx.start_timestamp,
284                end_timestamp = end_timestamp,
285
286                // Error classification (sanitized)
287                error_class = ?error_class,
288                error_message = ?error.map(|e| get_sanitizer().sanitize_error(e)),
289
290                // Context
291                loxone_host = ?ctx.loxone_host,
292                room_name = ?ctx.room_name,
293                device_uuid = ?ctx.device_uuid,
294
295                "MCP request failed"
296            );
297        }
298    }
299
300    /// Log performance warnings
301    pub fn log_slow_request(ctx: &StructuredContext, threshold_ms: u64) {
302        let duration_ms = ctx.elapsed_ms();
303        if duration_ms > threshold_ms {
304            warn!(
305                request_id = %ctx.request_id,
306                correlation_id = %ctx.correlation_id,
307                tool_name = %ctx.tool_name,
308                duration_ms = duration_ms,
309                threshold_ms = threshold_ms,
310
311                // Performance context
312                loxone_host = ?ctx.loxone_host,
313                device_uuid = ?ctx.device_uuid,
314                room_name = ?ctx.room_name,
315
316                "Slow MCP request detected"
317            );
318        }
319    }
320
321    /// Log Loxone API calls with timing
322    pub fn log_loxone_api_call(
323        ctx: &StructuredContext,
324        method: &str,
325        endpoint: &str,
326        duration_ms: u64,
327        status_code: Option<u16>,
328        error: Option<&str>,
329    ) {
330        if let Some(err) = error {
331            warn!(
332                request_id = %ctx.request_id,
333                correlation_id = %ctx.correlation_id,
334
335                // API call details
336                api_method = method,
337                api_endpoint = endpoint,
338                api_duration_ms = duration_ms,
339                api_status_code = ?status_code,
340                api_error = err,
341
342                // Context
343                loxone_host = ?ctx.loxone_host,
344
345                "Loxone API call failed"
346            );
347        } else {
348            info!(
349                request_id = %ctx.request_id,
350                correlation_id = %ctx.correlation_id,
351
352                // API call details
353                api_method = method,
354                api_endpoint = endpoint,
355                api_duration_ms = duration_ms,
356                api_status_code = ?status_code,
357
358                // Context
359                loxone_host = ?ctx.loxone_host,
360
361                "Loxone API call completed"
362            );
363        }
364    }
365
366    /// Log device control operations
367    pub fn log_device_operation(
368        ctx: &StructuredContext,
369        operation: &str,
370        device_uuid: &str,
371        device_name: Option<&str>,
372        room_name: Option<&str>,
373        success: bool,
374        error: Option<&str>,
375    ) {
376        if success {
377            info!(
378                request_id = %ctx.request_id,
379                correlation_id = %ctx.correlation_id,
380
381                // Device operation details
382                device_operation = operation,
383                device_uuid = device_uuid,
384                device_name = ?device_name,
385                device_room = ?room_name,
386
387                // Context
388                loxone_host = ?ctx.loxone_host,
389
390                "Device operation completed successfully"
391            );
392        } else {
393            error!(
394                request_id = %ctx.request_id,
395                correlation_id = %ctx.correlation_id,
396
397                // Device operation details
398                device_operation = operation,
399                device_uuid = device_uuid,
400                device_name = ?device_name,
401                device_room = ?room_name,
402                device_error = ?error,
403
404                // Context
405                loxone_host = ?ctx.loxone_host,
406
407                "Device operation failed"
408            );
409        }
410    }
411
412    /// Log system health metrics
413    pub fn log_health_metrics(
414        connection_status: bool,
415        api_latency_ms: Option<u64>,
416        active_requests: usize,
417        error_rate: f64,
418        memory_usage_mb: Option<f64>,
419    ) {
420        info!(
421            // Health indicators
422            system_healthy = connection_status,
423            api_latency_ms = ?api_latency_ms,
424            active_requests = active_requests,
425            error_rate_percent = error_rate * 100.0,
426            memory_usage_mb = ?memory_usage_mb,
427
428            // Service identification
429            service_name = "loxone-mcp-server",
430            service_version = env!("CARGO_PKG_VERSION"),
431
432            "System health metrics"
433        );
434    }
435
436    /// Create a structured tracing span
437    pub fn create_span(ctx: &StructuredContext) -> tracing::Span {
438        tracing::info_span!(
439            "mcp_request",
440            request_id = %ctx.request_id,
441            correlation_id = %ctx.correlation_id,
442            tool_name = %ctx.tool_name,
443            client_id = ?ctx.client_id,
444            loxone_host = ?ctx.loxone_host,
445            device_uuid = ?ctx.device_uuid,
446            room_name = ?ctx.room_name
447        )
448    }
449}
450
451/// Sanitize values for logging (remove sensitive data)
452fn sanitize_value(value: &Value) -> Value {
453    match value {
454        Value::Object(map) => {
455            let mut sanitized = serde_json::Map::new();
456            for (key, val) in map {
457                if is_sensitive_field(key) {
458                    sanitized.insert(key.clone(), Value::String("***".to_string()));
459                } else {
460                    sanitized.insert(key.clone(), sanitize_value(val));
461                }
462            }
463            Value::Object(sanitized)
464        }
465        Value::Array(arr) => Value::Array(arr.iter().map(sanitize_value).collect()),
466        _ => value.clone(),
467    }
468}
469
470/// Check if a field contains sensitive data
471fn is_sensitive_field(field: &str) -> bool {
472    let field_lower = field.to_lowercase();
473    field_lower.contains("password")
474        || field_lower.contains("secret")
475        || field_lower.contains("token")
476        || field_lower.contains("api_key")
477        || field_lower.contains("apikey")
478        || field_lower.contains("auth")
479        || field_lower.contains("credential")
480        || field_lower.contains("private_key")
481        || field_lower.contains("session")
482}
483
484/// Generate a unique request ID
485fn generate_request_id() -> String {
486    let uuid = Uuid::new_v4();
487    let bytes = uuid.as_bytes();
488    hex::encode(&bytes[..8]) // 16 character hex string
489}
490
491/// Generate a correlation ID for request tracing
492fn generate_correlation_id() -> String {
493    let uuid = Uuid::new_v4();
494    let bytes = uuid.as_bytes();
495    hex::encode(&bytes[..12]) // 24 character hex string
496}
497
498/// Generate a service instance ID
499fn generate_instance_id() -> String {
500    use std::sync::OnceLock;
501    static INSTANCE_ID: OnceLock<String> = OnceLock::new();
502
503    INSTANCE_ID
504        .get_or_init(|| {
505            let uuid = Uuid::new_v4();
506            let bytes = uuid.as_bytes();
507            hex::encode(&bytes[..6]) // 12 character hex string
508        })
509        .clone()
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    #[test]
517    fn test_structured_context_creation() {
518        let ctx = StructuredContext::new("test_tool".to_string());
519        assert_eq!(ctx.tool_name, "test_tool");
520        assert_eq!(ctx.service_name, "loxone-mcp-server");
521        assert!(ctx.request_id.len() == 16);
522        assert!(ctx.correlation_id.len() == 24);
523    }
524
525    #[test]
526    fn test_child_context() {
527        let parent = StructuredContext::new("parent_tool".to_string());
528        let child = parent.child("sub_operation");
529
530        assert_eq!(child.tool_name, "parent_tool::sub_operation");
531        assert_eq!(child.correlation_id, parent.correlation_id);
532        assert_eq!(child.parent_request_id, Some(parent.request_id));
533    }
534
535    #[test]
536    fn test_context_enrichment() {
537        let ctx = StructuredContext::new("test_tool".to_string())
538            .with_loxone_context("192.168.1.100".to_string(), Some("12.0.0".to_string()))
539            .with_device_context(
540                "device-123".to_string(),
541                Some("Switch".to_string()),
542                Some("Living Room".to_string()),
543            )
544            .with_field("custom_field", "custom_value");
545
546        assert_eq!(ctx.loxone_host, Some("192.168.1.100".to_string()));
547        assert_eq!(ctx.device_uuid, Some("device-123".to_string()));
548        assert_eq!(
549            ctx.custom_fields.get("custom_field"),
550            Some(&Value::String("custom_value".to_string()))
551        );
552    }
553
554    #[test]
555    fn test_error_classification() {
556        // Create a mock error implementing ErrorClassification
557        #[derive(Debug)]
558        struct MockError;
559
560        impl std::fmt::Display for MockError {
561            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
562                write!(f, "Mock authentication error")
563            }
564        }
565
566        impl std::error::Error for MockError {}
567
568        impl crate::ErrorClassification for MockError {
569            fn error_type(&self) -> &str {
570                "auth_error"
571            }
572            fn is_retryable(&self) -> bool {
573                false
574            }
575            fn is_timeout(&self) -> bool {
576                false
577            }
578            fn is_auth_error(&self) -> bool {
579                true
580            }
581            fn is_connection_error(&self) -> bool {
582                false
583            }
584        }
585
586        let mock_error = MockError;
587        let error_class = ErrorClass::from_error(&mock_error);
588
589        matches!(error_class, ErrorClass::Auth { .. });
590    }
591
592    #[test]
593    fn test_sanitize_sensitive_data() {
594        let data = serde_json::json!({
595            "username": "test_user",
596            "password": "secret123",
597            "device_id": "dev123"
598        });
599
600        let sanitized = sanitize_value(&data);
601        assert_eq!(sanitized["username"], "test_user");
602        assert_eq!(sanitized["password"], "***");
603        assert_eq!(sanitized["device_id"], "dev123");
604    }
605}