pulseengine_mcp_logging/
structured.rs

1//! Enhanced structured logging for better observability
2//!
3//! This module provides comprehensive structured logging with:
4//! - Contextual field enrichment
5//! - Performance metrics
6//! - Error classification
7//! - Business logic tracing
8//! - System health indicators
9
10use crate::sanitization::get_sanitizer;
11use serde_json::Value;
12use std::collections::HashMap;
13use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
14use tracing::{error, info, warn};
15use uuid::Uuid;
16
17/// Structured logging context with rich observability fields
18#[derive(Debug, Clone)]
19pub struct StructuredContext {
20    /// Request tracking
21    pub request_id: String,
22    pub parent_request_id: Option<String>,
23    pub correlation_id: String,
24
25    /// Service identification
26    pub service_name: String,
27    pub service_version: String,
28    pub instance_id: String,
29
30    /// Request metadata
31    pub tool_name: String,
32    pub tool_version: Option<String>,
33    pub client_id: Option<String>,
34    pub user_agent: Option<String>,
35    pub session_id: Option<String>,
36
37    /// Performance metrics
38    pub start_time: Instant,
39    pub start_timestamp: u64,
40
41    /// Loxone-specific context
42    pub loxone_host: Option<String>,
43    pub loxone_version: Option<String>,
44    pub room_name: Option<String>,
45    pub device_uuid: Option<String>,
46    pub device_type: Option<String>,
47
48    /// Custom fields for business logic
49    pub custom_fields: HashMap<String, Value>,
50}
51
52impl StructuredContext {
53    /// Create a new structured context
54    pub fn new(tool_name: String) -> Self {
55        let now = Instant::now();
56        let timestamp = SystemTime::now()
57            .duration_since(UNIX_EPOCH)
58            .unwrap_or_default()
59            .as_secs();
60
61        Self {
62            request_id: generate_request_id(),
63            parent_request_id: None,
64            correlation_id: generate_correlation_id(),
65            service_name: "loxone-mcp-server".to_string(),
66            service_version: env!("CARGO_PKG_VERSION").to_string(),
67            instance_id: generate_instance_id(),
68            tool_name,
69            tool_version: None,
70            client_id: None,
71            user_agent: None,
72            session_id: None,
73            start_time: now,
74            start_timestamp: timestamp,
75            loxone_host: None,
76            loxone_version: None,
77            room_name: None,
78            device_uuid: None,
79            device_type: None,
80            custom_fields: HashMap::new(),
81        }
82    }
83
84    /// Create a child context for sub-operations
85    #[must_use]
86    pub fn child(&self, operation: &str) -> Self {
87        let mut child = Self::new(format!("{}::{}", self.tool_name, operation));
88        child.parent_request_id = Some(self.request_id.clone());
89        child.correlation_id.clone_from(&self.correlation_id);
90        child.client_id.clone_from(&self.client_id);
91        child.user_agent.clone_from(&self.user_agent);
92        child.session_id.clone_from(&self.session_id);
93        child.loxone_host.clone_from(&self.loxone_host);
94        child.loxone_version.clone_from(&self.loxone_version);
95        child
96    }
97
98    /// Add Loxone-specific context
99    #[must_use]
100    pub fn with_loxone_context(mut self, host: String, version: Option<String>) -> Self {
101        self.loxone_host = Some(host);
102        self.loxone_version = version;
103        self
104    }
105
106    /// Add device context
107    #[must_use]
108    pub fn with_device_context(
109        mut self,
110        device_uuid: String,
111        device_type: Option<String>,
112        room_name: Option<String>,
113    ) -> Self {
114        self.device_uuid = Some(device_uuid);
115        self.device_type = device_type;
116        self.room_name = room_name;
117        self
118    }
119
120    /// Add client context
121    #[must_use]
122    pub fn with_client_context(
123        mut self,
124        client_id: String,
125        user_agent: Option<String>,
126        session_id: Option<String>,
127    ) -> Self {
128        self.client_id = Some(client_id);
129        self.user_agent = user_agent;
130        self.session_id = session_id;
131        self
132    }
133
134    /// Add custom field
135    #[must_use]
136    pub fn with_field<K: ToString, V: Into<Value>>(mut self, key: &K, value: V) -> Self {
137        self.custom_fields.insert(key.to_string(), value.into());
138        self
139    }
140
141    /// Get elapsed time since context creation
142    pub fn elapsed(&self) -> Duration {
143        self.start_time.elapsed()
144    }
145
146    /// Get elapsed time in milliseconds
147    pub fn elapsed_ms(&self) -> u64 {
148        self.elapsed().as_millis().try_into().unwrap_or(u64::MAX)
149    }
150}
151
152/// Error classification for better observability
153#[derive(Debug, Clone)]
154pub enum ErrorClass {
155    /// Client errors (4xx equivalent)
156    Client { error_type: String, retryable: bool },
157    /// Server errors (5xx equivalent)
158    Server { error_type: String, retryable: bool },
159    /// Network/communication errors
160    Network { error_type: String, timeout: bool },
161    /// Authentication/authorization errors
162    Auth { error_type: String },
163    /// Business logic errors
164    Business { error_type: String, domain: String },
165}
166
167impl ErrorClass {
168    /// Classify an error using the `ErrorClassification` trait
169    pub fn from_error<E: crate::ErrorClassification>(error: &E) -> Self {
170        if error.is_auth_error() {
171            Self::Auth {
172                error_type: error.error_type().to_string(),
173            }
174        } else if error.is_connection_error() {
175            Self::Network {
176                error_type: error.error_type().to_string(),
177                timeout: error.is_timeout(),
178            }
179        } else if error.is_timeout() {
180            Self::Network {
181                error_type: error.error_type().to_string(),
182                timeout: true,
183            }
184        } else if error.is_retryable() {
185            Self::Server {
186                error_type: error.error_type().to_string(),
187                retryable: true,
188            }
189        } else {
190            Self::Client {
191                error_type: error.error_type().to_string(),
192                retryable: false,
193            }
194        }
195    }
196}
197
198/// Enhanced structured logger
199#[derive(Default)]
200pub struct StructuredLogger;
201
202impl StructuredLogger {
203    /// Create a new structured logger instance
204    pub fn new() -> Self {
205        Self
206    }
207    /// Log request start with comprehensive context
208    pub fn log_request_start(ctx: &StructuredContext, params: &Value) {
209        let sanitized_params = sanitize_value(params);
210
211        info!(
212            // Core request tracking
213            request_id = %ctx.request_id,
214            parent_request_id = ?ctx.parent_request_id,
215            correlation_id = %ctx.correlation_id,
216
217            // Service identification
218            service_name = %ctx.service_name,
219            service_version = %ctx.service_version,
220            instance_id = %ctx.instance_id,
221
222            // Request metadata
223            tool_name = %ctx.tool_name,
224            tool_version = ?ctx.tool_version,
225            client_id = ?ctx.client_id,
226            user_agent = ?ctx.user_agent,
227            session_id = ?ctx.session_id,
228
229            // Timing
230            start_timestamp = ctx.start_timestamp,
231
232            // Loxone context
233            loxone_host = ?ctx.loxone_host,
234            loxone_version = ?ctx.loxone_version,
235            room_name = ?ctx.room_name,
236            device_uuid = ?ctx.device_uuid,
237            device_type = ?ctx.device_type,
238
239            // Request data
240            params = ?sanitized_params,
241
242            // Custom fields
243            custom_fields = ?ctx.custom_fields,
244
245            "MCP request started"
246        );
247    }
248
249    /// Log request completion with performance metrics
250    pub fn log_request_end<E: crate::ErrorClassification>(
251        ctx: &StructuredContext,
252        success: bool,
253        error: Option<&E>,
254        response_size: Option<usize>,
255    ) {
256        let duration_ms = ctx.elapsed_ms();
257        let end_timestamp = SystemTime::now()
258            .duration_since(UNIX_EPOCH)
259            .unwrap_or_default()
260            .as_secs();
261
262        let error_class = error.map(ErrorClass::from_error);
263
264        if success {
265            info!(
266                // Core tracking
267                request_id = %ctx.request_id,
268                correlation_id = %ctx.correlation_id,
269                tool_name = %ctx.tool_name,
270
271                // Performance metrics
272                duration_ms = duration_ms,
273                start_timestamp = ctx.start_timestamp,
274                end_timestamp = end_timestamp,
275                response_size_bytes = ?response_size,
276
277                // Context
278                loxone_host = ?ctx.loxone_host,
279                room_name = ?ctx.room_name,
280                device_uuid = ?ctx.device_uuid,
281
282                "MCP request completed successfully"
283            );
284        } else {
285            error!(
286                // Core tracking
287                request_id = %ctx.request_id,
288                correlation_id = %ctx.correlation_id,
289                tool_name = %ctx.tool_name,
290
291                // Performance metrics
292                duration_ms = duration_ms,
293                start_timestamp = ctx.start_timestamp,
294                end_timestamp = end_timestamp,
295
296                // Error classification (sanitized)
297                error_class = ?error_class,
298                error_message = ?error.map(|e| get_sanitizer().sanitize_error(e)),
299
300                // Context
301                loxone_host = ?ctx.loxone_host,
302                room_name = ?ctx.room_name,
303                device_uuid = ?ctx.device_uuid,
304
305                "MCP request failed"
306            );
307        }
308    }
309
310    /// Log performance warnings
311    pub fn log_slow_request(ctx: &StructuredContext, threshold_ms: u64) {
312        let duration_ms = ctx.elapsed_ms();
313        if duration_ms > threshold_ms {
314            warn!(
315                request_id = %ctx.request_id,
316                correlation_id = %ctx.correlation_id,
317                tool_name = %ctx.tool_name,
318                duration_ms = duration_ms,
319                threshold_ms = threshold_ms,
320
321                // Performance context
322                loxone_host = ?ctx.loxone_host,
323                device_uuid = ?ctx.device_uuid,
324                room_name = ?ctx.room_name,
325
326                "Slow MCP request detected"
327            );
328        }
329    }
330
331    /// Log Loxone API calls with timing
332    pub fn log_loxone_api_call(
333        ctx: &StructuredContext,
334        method: &str,
335        endpoint: &str,
336        duration_ms: u64,
337        status_code: Option<u16>,
338        error: Option<&str>,
339    ) {
340        if let Some(err) = error {
341            warn!(
342                request_id = %ctx.request_id,
343                correlation_id = %ctx.correlation_id,
344
345                // API call details
346                api_method = method,
347                api_endpoint = endpoint,
348                api_duration_ms = duration_ms,
349                api_status_code = ?status_code,
350                api_error = err,
351
352                // Context
353                loxone_host = ?ctx.loxone_host,
354
355                "Loxone API call failed"
356            );
357        } else {
358            info!(
359                request_id = %ctx.request_id,
360                correlation_id = %ctx.correlation_id,
361
362                // API call details
363                api_method = method,
364                api_endpoint = endpoint,
365                api_duration_ms = duration_ms,
366                api_status_code = ?status_code,
367
368                // Context
369                loxone_host = ?ctx.loxone_host,
370
371                "Loxone API call completed"
372            );
373        }
374    }
375
376    /// Log device control operations
377    pub fn log_device_operation(
378        ctx: &StructuredContext,
379        operation: &str,
380        device_uuid: &str,
381        device_name: Option<&str>,
382        room_name: Option<&str>,
383        success: bool,
384        error: Option<&str>,
385    ) {
386        if success {
387            info!(
388                request_id = %ctx.request_id,
389                correlation_id = %ctx.correlation_id,
390
391                // Device operation details
392                device_operation = operation,
393                device_uuid = device_uuid,
394                device_name = ?device_name,
395                device_room = ?room_name,
396
397                // Context
398                loxone_host = ?ctx.loxone_host,
399
400                "Device operation completed successfully"
401            );
402        } else {
403            error!(
404                request_id = %ctx.request_id,
405                correlation_id = %ctx.correlation_id,
406
407                // Device operation details
408                device_operation = operation,
409                device_uuid = device_uuid,
410                device_name = ?device_name,
411                device_room = ?room_name,
412                device_error = ?error,
413
414                // Context
415                loxone_host = ?ctx.loxone_host,
416
417                "Device operation failed"
418            );
419        }
420    }
421
422    /// Log system health metrics
423    pub fn log_health_metrics(
424        connection_status: bool,
425        api_latency_ms: Option<u64>,
426        active_requests: usize,
427        error_rate: f64,
428        memory_usage_mb: Option<f64>,
429    ) {
430        info!(
431            // Health indicators
432            system_healthy = connection_status,
433            api_latency_ms = ?api_latency_ms,
434            active_requests = active_requests,
435            error_rate_percent = error_rate * 100.0,
436            memory_usage_mb = ?memory_usage_mb,
437
438            // Service identification
439            service_name = "loxone-mcp-server",
440            service_version = env!("CARGO_PKG_VERSION"),
441
442            "System health metrics"
443        );
444    }
445
446    /// Create a structured tracing span
447    pub fn create_span(ctx: &StructuredContext) -> tracing::Span {
448        tracing::info_span!(
449            "mcp_request",
450            request_id = %ctx.request_id,
451            correlation_id = %ctx.correlation_id,
452            tool_name = %ctx.tool_name,
453            client_id = ?ctx.client_id,
454            loxone_host = ?ctx.loxone_host,
455            device_uuid = ?ctx.device_uuid,
456            room_name = ?ctx.room_name
457        )
458    }
459}
460
461/// Sanitize values for logging (remove sensitive data)
462fn sanitize_value(value: &Value) -> Value {
463    match value {
464        Value::Object(map) => {
465            let mut sanitized = serde_json::Map::new();
466            for (key, val) in map {
467                if is_sensitive_field(key) {
468                    sanitized.insert(key.clone(), Value::String("***".to_string()));
469                } else {
470                    sanitized.insert(key.clone(), sanitize_value(val));
471                }
472            }
473            Value::Object(sanitized)
474        }
475        Value::Array(arr) => Value::Array(arr.iter().map(sanitize_value).collect()),
476        _ => value.clone(),
477    }
478}
479
480/// Check if a field contains sensitive data
481fn is_sensitive_field(field: &str) -> bool {
482    let field_lower = field.to_lowercase();
483    field_lower.contains("password")
484        || field_lower.contains("passwd")
485        || field_lower.contains("pass")
486        || field_lower.contains("pwd")
487        || field_lower.contains("secret")
488        || field_lower.contains("token")
489        || field_lower.contains("api_key")
490        || field_lower.contains("apikey")
491        || field_lower.contains("key")
492        || field_lower.contains("auth")
493        || field_lower.contains("credential")
494        || field_lower.contains("private_key")
495        || field_lower.contains("session")
496        || field_lower.contains("bearer")
497}
498
499/// Generate a unique request ID
500fn generate_request_id() -> String {
501    let uuid = Uuid::new_v4();
502    let bytes = uuid.as_bytes();
503    hex::encode(&bytes[..8]) // 16 character hex string
504}
505
506/// Generate a correlation ID for request tracing
507fn generate_correlation_id() -> String {
508    let uuid = Uuid::new_v4();
509    let bytes = uuid.as_bytes();
510    hex::encode(&bytes[..12]) // 24 character hex string
511}
512
513/// Generate a service instance ID
514fn generate_instance_id() -> String {
515    use std::sync::OnceLock;
516    static INSTANCE_ID: OnceLock<String> = OnceLock::new();
517
518    INSTANCE_ID
519        .get_or_init(|| {
520            let uuid = Uuid::new_v4();
521            let bytes = uuid.as_bytes();
522            hex::encode(&bytes[..6]) // 12 character hex string
523        })
524        .clone()
525}
526
527#[cfg(test)]
528#[path = "structured_tests.rs"]
529mod structured_tests;
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn test_structured_context_creation() {
537        let ctx = StructuredContext::new("test_tool".to_string());
538        assert_eq!(ctx.tool_name, "test_tool");
539        assert_eq!(ctx.service_name, "loxone-mcp-server");
540        assert!(ctx.request_id.len() == 16);
541        assert!(ctx.correlation_id.len() == 24);
542    }
543
544    #[test]
545    fn test_child_context() {
546        let parent = StructuredContext::new("parent_tool".to_string());
547        let child = parent.child("sub_operation");
548
549        assert_eq!(child.tool_name, "parent_tool::sub_operation");
550        assert_eq!(child.correlation_id, parent.correlation_id);
551        assert_eq!(child.parent_request_id, Some(parent.request_id));
552    }
553
554    #[test]
555    fn test_context_enrichment() {
556        let ctx = StructuredContext::new("test_tool".to_string())
557            .with_loxone_context("192.168.1.100".to_string(), Some("12.0.0".to_string()))
558            .with_device_context(
559                "device-123".to_string(),
560                Some("Switch".to_string()),
561                Some("Living Room".to_string()),
562            )
563            .with_field(&"custom_field", "custom_value");
564
565        assert_eq!(ctx.loxone_host, Some("192.168.1.100".to_string()));
566        assert_eq!(ctx.device_uuid, Some("device-123".to_string()));
567        assert_eq!(
568            ctx.custom_fields.get("custom_field"),
569            Some(&Value::String("custom_value".to_string()))
570        );
571    }
572
573    #[test]
574    fn test_error_classification() {
575        // Create a mock error implementing ErrorClassification
576        #[derive(Debug)]
577        struct MockError;
578
579        impl std::fmt::Display for MockError {
580            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
581                write!(f, "Mock authentication error")
582            }
583        }
584
585        impl std::error::Error for MockError {}
586
587        impl crate::ErrorClassification for MockError {
588            fn error_type(&self) -> &'static str {
589                "auth_error"
590            }
591            fn is_retryable(&self) -> bool {
592                false
593            }
594            fn is_timeout(&self) -> bool {
595                false
596            }
597            fn is_auth_error(&self) -> bool {
598                true
599            }
600            fn is_connection_error(&self) -> bool {
601                false
602            }
603        }
604
605        let mock_error = MockError;
606        let error_class = ErrorClass::from_error(&mock_error);
607
608        matches!(error_class, ErrorClass::Auth { .. });
609    }
610
611    #[test]
612    fn test_sanitize_sensitive_data() {
613        let data = serde_json::json!({
614            "username": "test_user",
615            "password": "secret123",
616            "device_id": "dev123"
617        });
618
619        let sanitized = sanitize_value(&data);
620        assert_eq!(sanitized["username"], "test_user");
621        assert_eq!(sanitized["password"], "***");
622        assert_eq!(sanitized["device_id"], "dev123");
623    }
624}