pulseengine_mcp_logging/
sanitization.rs

1//! Log sanitization for production builds
2//!
3//! This module provides utilities for sanitizing sensitive information
4//! from log messages in production builds while preserving debugging
5//! capabilities in development.
6
7use regex::Regex;
8use std::sync::OnceLock;
9
10/// Regex patterns for detecting sensitive information
11static PASSWORD_REGEX: OnceLock<Regex> = OnceLock::new();
12static TOKEN_REGEX: OnceLock<Regex> = OnceLock::new();
13static API_KEY_REGEX: OnceLock<Regex> = OnceLock::new();
14static CREDENTIAL_REGEX: OnceLock<Regex> = OnceLock::new();
15static IP_REGEX: OnceLock<Regex> = OnceLock::new();
16static UUID_REGEX: OnceLock<Regex> = OnceLock::new();
17
18/// Initialize sanitization regex patterns
19fn init_sanitization_patterns() {
20    PASSWORD_REGEX.get_or_init(|| {
21        Regex::new(r#"(?i)(password|passwd|pwd)[\s]*[=:][\s]*['"]?([^'\s,}]+)"#)
22            .expect("Invalid password regex")
23    });
24
25    TOKEN_REGEX.get_or_init(|| {
26        Regex::new(r#"(?i)(token|bearer)[\s]*[=:][\s]*['"]?([a-zA-Z0-9._-]{10,})"#)
27            .expect("Invalid token regex")
28    });
29
30    API_KEY_REGEX.get_or_init(|| {
31        Regex::new(r#"(?i)(api[_-]?key|apikey|key)[\s]*[=:][\s]*['"]?([a-zA-Z0-9._-]{10,})"#)
32            .expect("Invalid API key regex")
33    });
34
35    CREDENTIAL_REGEX.get_or_init(|| {
36        Regex::new(r#"(?i)(credential|secret|auth)[\s]*[=:][\s]*['"]?([^'\s,}]+)"#)
37            .expect("Invalid credential regex")
38    });
39
40    IP_REGEX.get_or_init(|| {
41        Regex::new(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b")
42            .expect("Invalid IP regex")
43    });
44
45    UUID_REGEX.get_or_init(|| {
46        Regex::new(
47            r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b",
48        )
49        .expect("Invalid UUID regex")
50    });
51}
52
53/// Sanitization configuration
54#[derive(Debug, Clone)]
55pub struct SanitizationConfig {
56    /// Enable sanitization (typically true in production)
57    pub enabled: bool,
58
59    /// Preserve IP addresses in logs (might be needed for debugging)
60    pub preserve_ips: bool,
61
62    /// Preserve UUIDs in logs (needed for Loxone device identification)
63    pub preserve_uuids: bool,
64
65    /// Replacement string for sensitive data
66    pub replacement: String,
67}
68
69impl Default for SanitizationConfig {
70    fn default() -> Self {
71        Self {
72            enabled: cfg!(not(debug_assertions)), // Enable in release builds
73            preserve_ips: false,                  // Hide IPs in production
74            preserve_uuids: true,                 // Keep UUIDs for Loxone debugging
75            replacement: "[REDACTED]".to_string(),
76        }
77    }
78}
79
80/// Log sanitizer for removing sensitive information
81pub struct LogSanitizer {
82    config: SanitizationConfig,
83}
84
85impl LogSanitizer {
86    /// Create a new log sanitizer with default configuration
87    pub fn new() -> Self {
88        Self::with_config(SanitizationConfig::default())
89    }
90
91    /// Create a new log sanitizer with custom configuration
92    pub fn with_config(config: SanitizationConfig) -> Self {
93        init_sanitization_patterns();
94        Self { config }
95    }
96
97    /// Sanitize a log message by removing or redacting sensitive information
98    pub fn sanitize(&self, message: &str) -> String {
99        if !self.config.enabled {
100            return message.to_string();
101        }
102
103        let mut sanitized = message.to_string();
104
105        // Replace passwords
106        if let Some(regex) = PASSWORD_REGEX.get() {
107            sanitized = regex
108                .replace_all(&sanitized, |caps: &regex::Captures| {
109                    format!("{}={}", &caps[1], self.config.replacement)
110                })
111                .to_string();
112        }
113
114        // Replace tokens
115        if let Some(regex) = TOKEN_REGEX.get() {
116            sanitized = regex
117                .replace_all(&sanitized, |caps: &regex::Captures| {
118                    format!("{}={}", &caps[1], self.config.replacement)
119                })
120                .to_string();
121        }
122
123        // Replace API keys
124        if let Some(regex) = API_KEY_REGEX.get() {
125            sanitized = regex
126                .replace_all(&sanitized, |caps: &regex::Captures| {
127                    format!("{}={}", &caps[1], self.config.replacement)
128                })
129                .to_string();
130        }
131
132        // Replace credentials
133        if let Some(regex) = CREDENTIAL_REGEX.get() {
134            sanitized = regex
135                .replace_all(&sanitized, |caps: &regex::Captures| {
136                    format!("{}={}", &caps[1], self.config.replacement)
137                })
138                .to_string();
139        }
140
141        // Replace IP addresses if not preserved
142        if !self.config.preserve_ips {
143            if let Some(regex) = IP_REGEX.get() {
144                sanitized = regex.replace_all(&sanitized, "[IP_REDACTED]").to_string();
145            }
146        }
147
148        // Replace UUIDs if not preserved
149        if !self.config.preserve_uuids {
150            if let Some(regex) = UUID_REGEX.get() {
151                sanitized = regex.replace_all(&sanitized, "[UUID_REDACTED]").to_string();
152            }
153        }
154
155        sanitized
156    }
157
158    /// Sanitize error messages for production logging
159    pub fn sanitize_error(&self, error: &dyn std::error::Error) -> String {
160        let error_msg = error.to_string();
161
162        if !self.config.enabled {
163            return error_msg;
164        }
165
166        // In production, provide generic error messages for certain error types
167        if error_msg.contains("password") || error_msg.contains("credential") {
168            return "Authentication failed".to_string();
169        }
170
171        if error_msg.contains("connection") || error_msg.contains("timeout") {
172            return "Network connectivity issue".to_string();
173        }
174
175        if error_msg.contains("permission") || error_msg.contains("access") {
176            return "Access denied".to_string();
177        }
178
179        // For other errors, sanitize the message
180        self.sanitize(&error_msg)
181    }
182
183    /// Create a sanitized version of structured logging context
184    pub fn sanitize_context(&self, context: &serde_json::Value) -> serde_json::Value {
185        if !self.config.enabled {
186            return context.clone();
187        }
188
189        match context {
190            serde_json::Value::Object(map) => {
191                let mut sanitized_map = serde_json::Map::new();
192
193                for (key, value) in map {
194                    let sanitized_key = self.sanitize_field_name(key);
195                    let sanitized_value = if self.is_sensitive_field(&sanitized_key) {
196                        serde_json::Value::String(self.config.replacement.clone())
197                    } else {
198                        self.sanitize_context(value)
199                    };
200                    sanitized_map.insert(sanitized_key, sanitized_value);
201                }
202
203                serde_json::Value::Object(sanitized_map)
204            }
205            serde_json::Value::Array(arr) => {
206                let sanitized_arr: Vec<_> = arr.iter().map(|v| self.sanitize_context(v)).collect();
207                serde_json::Value::Array(sanitized_arr)
208            }
209            serde_json::Value::String(s) => serde_json::Value::String(self.sanitize(s)),
210            other => other.clone(),
211        }
212    }
213
214    /// Check if a field name indicates sensitive data
215    fn is_sensitive_field(&self, field_name: &str) -> bool {
216        let lower_name = field_name.to_lowercase();
217        matches!(
218            lower_name.as_str(),
219            "password"
220                | "passwd"
221                | "pwd"
222                | "token"
223                | "secret"
224                | "api_key"
225                | "apikey"
226                | "credential"
227                | "auth"
228                | "authorization"
229                | "client_secret"
230                | "private_key"
231                | "bearer"
232        )
233    }
234
235    /// Sanitize field names themselves if needed
236    fn sanitize_field_name(&self, field_name: &str) -> String {
237        // Keep field names as-is, just sanitize values
238        field_name.to_string()
239    }
240}
241
242impl Default for LogSanitizer {
243    fn default() -> Self {
244        Self::new()
245    }
246}
247
248/// Global sanitizer instance
249static GLOBAL_SANITIZER: OnceLock<LogSanitizer> = OnceLock::new();
250
251/// Get the global sanitizer instance
252pub fn get_sanitizer() -> &'static LogSanitizer {
253    GLOBAL_SANITIZER.get_or_init(LogSanitizer::new)
254}
255
256/// Initialize the global sanitizer with custom configuration
257pub fn init_sanitizer(config: SanitizationConfig) {
258    let _ = GLOBAL_SANITIZER.set(LogSanitizer::with_config(config));
259}
260
261/// Convenient macro for sanitized logging
262#[macro_export]
263macro_rules! sanitized_log {
264    ($level:ident, $($arg:tt)*) => {
265        {
266            let message = format!($($arg)*);
267            let sanitized = $crate::logging::sanitization::get_sanitizer().sanitize(&message);
268            tracing::$level!("{}", sanitized);
269        }
270    };
271}
272
273/// Convenient macros for different log levels
274#[macro_export]
275macro_rules! sanitized_error {
276    ($($arg:tt)*) => { sanitized_log!(error, $($arg)*) };
277}
278
279#[macro_export]
280macro_rules! sanitized_warn {
281    ($($arg:tt)*) => { sanitized_log!(warn, $($arg)*) };
282}
283
284#[macro_export]
285macro_rules! sanitized_info {
286    ($($arg:tt)*) => { sanitized_log!(info, $($arg)*) };
287}
288
289#[macro_export]
290macro_rules! sanitized_debug {
291    ($($arg:tt)*) => { sanitized_log!(debug, $($arg)*) };
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn test_password_sanitization() {
300        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
301            enabled: true,
302            ..Default::default()
303        });
304
305        let message = "Connecting with password=secret123 to server";
306        let sanitized = sanitizer.sanitize(message);
307        assert!(sanitized.contains("[REDACTED]"));
308        assert!(!sanitized.contains("secret123"));
309    }
310
311    #[test]
312    fn test_api_key_sanitization() {
313        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
314            enabled: true,
315            ..Default::default()
316        });
317
318        let message = "API request with api_key=abc123def456 failed";
319        let sanitized = sanitizer.sanitize(message);
320        assert!(sanitized.contains("[REDACTED]"));
321        assert!(!sanitized.contains("abc123def456"));
322    }
323
324    #[test]
325    fn test_ip_preservation() {
326        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
327            enabled: true,
328            preserve_ips: true,
329            ..Default::default()
330        });
331
332        let message = "Connecting to 192.168.1.100:8080";
333        let sanitized = sanitizer.sanitize(message);
334        assert!(sanitized.contains("192.168.1.100"));
335    }
336
337    #[test]
338    fn test_ip_redaction() {
339        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
340            enabled: true,
341            preserve_ips: false,
342            ..Default::default()
343        });
344
345        let message = "Connecting to 192.168.1.100:8080";
346        let sanitized = sanitizer.sanitize(message);
347        assert!(!sanitized.contains("192.168.1.100"));
348        assert!(sanitized.contains("[IP_REDACTED]"));
349    }
350
351    #[test]
352    fn test_uuid_preservation() {
353        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
354            enabled: true,
355            preserve_uuids: true,
356            ..Default::default()
357        });
358
359        let message = "Device 550e8400-e29b-41d4-a716-446655440000 state changed";
360        let sanitized = sanitizer.sanitize(message);
361        assert!(sanitized.contains("550e8400-e29b-41d4-a716-446655440000"));
362    }
363
364    #[test]
365    fn test_disabled_sanitization() {
366        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
367            enabled: false,
368            ..Default::default()
369        });
370
371        let message = "password=secret123 api_key=abc123";
372        let sanitized = sanitizer.sanitize(message);
373        assert_eq!(message, sanitized);
374    }
375
376    #[test]
377    fn test_error_sanitization() {
378        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
379            enabled: true,
380            ..Default::default()
381        });
382
383        let error = std::io::Error::new(
384            std::io::ErrorKind::PermissionDenied,
385            "password authentication failed",
386        );
387        let sanitized = sanitizer.sanitize_error(&error);
388        assert_eq!("Authentication failed", sanitized);
389    }
390
391    #[test]
392    fn test_context_sanitization() {
393        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
394            enabled: true,
395            ..Default::default()
396        });
397
398        let context = serde_json::json!({
399            "user": "admin",
400            "password": "secret123",
401            "host": "192.168.1.100",
402            "device_count": 42
403        });
404
405        let sanitized = sanitizer.sanitize_context(&context);
406        assert!(!sanitized.to_string().contains("secret123"));
407        assert!(sanitized.to_string().contains("[REDACTED]"));
408        assert!(sanitized.to_string().contains("admin")); // Non-sensitive fields preserved
409    }
410}