pulseengine_mcp_logging/
sanitization.rs

1//! Log sanitization for production builds
2//!
3//! This module provides utilities for sanitizing sensitive information
4//! from log messages in production builds while preserving debugging
5//! capabilities in development.
6
7use regex::Regex;
8use std::sync::OnceLock;
9
10/// Regex patterns for detecting sensitive information
11static PASSWORD_REGEX: OnceLock<Regex> = OnceLock::new();
12static TOKEN_REGEX: OnceLock<Regex> = OnceLock::new();
13static API_KEY_REGEX: OnceLock<Regex> = OnceLock::new();
14static CREDENTIAL_REGEX: OnceLock<Regex> = OnceLock::new();
15static IP_REGEX: OnceLock<Regex> = OnceLock::new();
16static UUID_REGEX: OnceLock<Regex> = OnceLock::new();
17
18/// Initialize sanitization regex patterns
19fn init_sanitization_patterns() {
20    PASSWORD_REGEX.get_or_init(|| {
21        Regex::new(
22            r#"(?i)(["']?)(password|passwd|pwd|pass)(["']?)[\s]*[=:][\s]*["`']?([^'"`\s,}]+)"#,
23        )
24        .expect("Invalid password regex")
25    });
26
27    TOKEN_REGEX.get_or_init(|| {
28        Regex::new(r#"(?i)(?:(["']?)(token)(["']?)[\s]*[=:][\s]*['"]?([a-zA-Z0-9._-]+)|(bearer)[\s]+([a-zA-Z0-9._-]+))"#)
29            .expect("Invalid token regex")
30    });
31
32    API_KEY_REGEX.get_or_init(|| {
33        Regex::new(
34            r#"(?i)(["']?)(api[_-]?key|apikey|key)(["']?)[\s]*[=:][\s]*['"]?([a-zA-Z0-9._-]+)"#,
35        )
36        .expect("Invalid API key regex")
37    });
38
39    CREDENTIAL_REGEX.get_or_init(|| {
40        Regex::new(r#"(?i)(["']?)(credential|credentials|secret|auth)(["']?)[\s]*[=:][\s]*['"]?([^'"\s,}]+)"#)
41            .expect("Invalid credential regex")
42    });
43
44    IP_REGEX.get_or_init(|| {
45        Regex::new(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b")
46            .expect("Invalid IP regex")
47    });
48
49    UUID_REGEX.get_or_init(|| {
50        Regex::new(
51            r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b",
52        )
53        .expect("Invalid UUID regex")
54    });
55}
56
57/// Sanitization configuration
58#[derive(Debug, Clone)]
59pub struct SanitizationConfig {
60    /// Enable sanitization (typically true in production)
61    pub enabled: bool,
62
63    /// Preserve IP addresses in logs (might be needed for debugging)
64    pub preserve_ips: bool,
65
66    /// Preserve UUIDs in logs (needed for Loxone device identification)
67    pub preserve_uuids: bool,
68
69    /// Replacement string for sensitive data
70    pub replacement: String,
71}
72
73impl Default for SanitizationConfig {
74    fn default() -> Self {
75        Self {
76            enabled: cfg!(not(debug_assertions)), // Enable in release builds
77            preserve_ips: false,                  // Hide IPs in production
78            preserve_uuids: true,                 // Keep UUIDs for Loxone debugging
79            replacement: "[REDACTED]".to_string(),
80        }
81    }
82}
83
84/// Log sanitizer for removing sensitive information
85pub struct LogSanitizer {
86    config: SanitizationConfig,
87}
88
89impl LogSanitizer {
90    /// Create a new log sanitizer with default configuration
91    pub fn new() -> Self {
92        Self::with_config(SanitizationConfig::default())
93    }
94
95    /// Create a new log sanitizer with custom configuration
96    pub fn with_config(config: SanitizationConfig) -> Self {
97        init_sanitization_patterns();
98        Self { config }
99    }
100
101    /// Sanitize a log message by removing or redacting sensitive information
102    pub fn sanitize(&self, message: &str) -> String {
103        if !self.config.enabled {
104            return message.to_string();
105        }
106
107        let mut sanitized = message.to_string();
108
109        // Replace passwords
110        if let Some(regex) = PASSWORD_REGEX.get() {
111            sanitized = regex
112                .replace_all(&sanitized, |caps: &regex::Captures| {
113                    let full_match = &caps[0];
114                    let value = &caps[4];
115
116                    // Replace the value part while preserving the rest of the match
117                    full_match.replace(value, &self.config.replacement)
118                })
119                .to_string();
120        }
121
122        // Replace tokens
123        if let Some(regex) = TOKEN_REGEX.get() {
124            sanitized = regex
125                .replace_all(&sanitized, |caps: &regex::Captures| {
126                    let full_match = &caps[0];
127                    // Check which alternative matched
128                    if caps.get(4).is_some() {
129                        // token=value pattern
130                        let value = &caps[4];
131                        full_match.replace(value, &self.config.replacement)
132                    } else {
133                        // bearer value pattern
134                        let value = &caps[6];
135                        full_match.replace(value, &self.config.replacement)
136                    }
137                })
138                .to_string();
139        }
140
141        // Replace API keys
142        if let Some(regex) = API_KEY_REGEX.get() {
143            sanitized = regex
144                .replace_all(&sanitized, |caps: &regex::Captures| {
145                    let full_match = &caps[0];
146                    let value = &caps[4];
147                    full_match.replace(value, &self.config.replacement)
148                })
149                .to_string();
150        }
151
152        // Replace credentials
153        if let Some(regex) = CREDENTIAL_REGEX.get() {
154            sanitized = regex
155                .replace_all(&sanitized, |caps: &regex::Captures| {
156                    let full_match = &caps[0];
157                    let value = &caps[4];
158                    full_match.replace(value, &self.config.replacement)
159                })
160                .to_string();
161        }
162
163        // Replace IP addresses if not preserved
164        if !self.config.preserve_ips {
165            if let Some(regex) = IP_REGEX.get() {
166                sanitized = regex.replace_all(&sanitized, "[IP_REDACTED]").to_string();
167            }
168        }
169
170        // Replace UUIDs if not preserved
171        if !self.config.preserve_uuids {
172            if let Some(regex) = UUID_REGEX.get() {
173                sanitized = regex.replace_all(&sanitized, "[UUID_REDACTED]").to_string();
174            }
175        }
176
177        sanitized
178    }
179
180    /// Sanitize error messages for production logging
181    pub fn sanitize_error(&self, error: &dyn std::error::Error) -> String {
182        let error_msg = error.to_string();
183
184        if !self.config.enabled {
185            return error_msg;
186        }
187
188        // Always sanitize the error message first
189        self.sanitize(&error_msg)
190    }
191
192    /// Create a sanitized version of structured logging context
193    pub fn sanitize_context(&self, context: &serde_json::Value) -> serde_json::Value {
194        if !self.config.enabled {
195            return context.clone();
196        }
197
198        match context {
199            serde_json::Value::Object(map) => {
200                let mut sanitized_map = serde_json::Map::new();
201
202                for (key, value) in map {
203                    // Don't sanitize field names in JSON contexts, only values
204                    let sanitized_value = if Self::is_sensitive_field(key) {
205                        serde_json::Value::String(self.config.replacement.clone())
206                    } else {
207                        self.sanitize_context(value)
208                    };
209                    sanitized_map.insert(key.clone(), sanitized_value);
210                }
211
212                serde_json::Value::Object(sanitized_map)
213            }
214            serde_json::Value::Array(arr) => {
215                let sanitized_arr: Vec<_> = arr.iter().map(|v| self.sanitize_context(v)).collect();
216                serde_json::Value::Array(sanitized_arr)
217            }
218            serde_json::Value::String(s) => serde_json::Value::String(self.sanitize(s)),
219            other => other.clone(),
220        }
221    }
222
223    /// Check if a field name indicates sensitive data
224    fn is_sensitive_field(field_name: &str) -> bool {
225        let lower_name = field_name.to_lowercase();
226        // Check for exact matches first
227        if matches!(
228            lower_name.as_str(),
229            "password"
230                | "passwd"
231                | "pwd"
232                | "pass"
233                | "token"
234                | "secret"
235                | "api_key"
236                | "apikey"
237                | "key"
238                | "credential"
239                | "credentials"
240                | "auth"
241                | "authorization"
242                | "client_secret"
243                | "private_key"
244                | "bearer"
245                | "access_token"
246                | "refresh_token"
247                | "auth_token"
248        ) {
249            return true;
250        }
251
252        // Also check if field name contains sensitive keywords
253        lower_name.contains("password")
254            || lower_name.contains("passwd")
255            || lower_name.contains("token")
256            || lower_name.contains("secret")
257            || lower_name.contains("api_key")
258            || lower_name.contains("apikey")
259            || lower_name.contains("credential")
260            || lower_name.contains("auth")
261            || lower_name.contains("bearer")
262    }
263
264    /// Sanitize field names themselves if needed
265    #[cfg(test)]
266    fn sanitize_field_name(field_name: &str) -> String {
267        // If the field name is sensitive and longer than 2 chars, partially redact it
268        if Self::is_sensitive_field(field_name) && field_name.len() > 2 {
269            let chars: Vec<char> = field_name.chars().collect();
270            let first_char = chars[0];
271            let last_char = chars[chars.len() - 1];
272            let middle_len = chars.len() - 2;
273            format!("{}{}{}", first_char, "*".repeat(middle_len), last_char)
274        } else {
275            field_name.to_string()
276        }
277    }
278}
279
280impl Default for LogSanitizer {
281    fn default() -> Self {
282        Self::new()
283    }
284}
285
286/// Global sanitizer instance
287static GLOBAL_SANITIZER: OnceLock<LogSanitizer> = OnceLock::new();
288
289/// Get the global sanitizer instance
290pub fn get_sanitizer() -> &'static LogSanitizer {
291    GLOBAL_SANITIZER.get_or_init(LogSanitizer::new)
292}
293
294/// Initialize the global sanitizer with custom configuration
295pub fn init_sanitizer(config: SanitizationConfig) {
296    let _ = GLOBAL_SANITIZER.set(LogSanitizer::with_config(config));
297}
298
299/// Convenient macro for sanitized logging
300#[macro_export]
301macro_rules! sanitized_log {
302    ($level:ident, $($arg:tt)*) => {
303        {
304            let message = format!($($arg)*);
305            let sanitized = $crate::logging::sanitization::get_sanitizer().sanitize(&message);
306            tracing::$level!("{}", sanitized);
307        }
308    };
309}
310
311/// Convenient macros for different log levels
312#[macro_export]
313macro_rules! sanitized_error {
314    ($($arg:tt)*) => { sanitized_log!(error, $($arg)*) };
315}
316
317#[macro_export]
318macro_rules! sanitized_warn {
319    ($($arg:tt)*) => { sanitized_log!(warn, $($arg)*) };
320}
321
322#[macro_export]
323macro_rules! sanitized_info {
324    ($($arg:tt)*) => { sanitized_log!(info, $($arg)*) };
325}
326
327#[macro_export]
328macro_rules! sanitized_debug {
329    ($($arg:tt)*) => { sanitized_log!(debug, $($arg)*) };
330}
331
332#[cfg(test)]
333#[path = "sanitization_tests.rs"]
334mod sanitization_tests;
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    #[test]
341    fn test_password_sanitization() {
342        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
343            enabled: true,
344            ..Default::default()
345        });
346
347        let message = "Connecting with password=secret123 to server";
348        let result = sanitizer.sanitize(message);
349        assert!(result.contains("[REDACTED]"));
350        assert!(!result.contains("secret123"));
351    }
352
353    #[test]
354    fn test_api_key_sanitization() {
355        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
356            enabled: true,
357            ..Default::default()
358        });
359
360        let message = "API request with api_key=abc123def456 failed";
361        let result = sanitizer.sanitize(message);
362        assert!(result.contains("[REDACTED]"));
363        assert!(!result.contains("abc123def456"));
364    }
365
366    #[test]
367    fn test_ip_preservation() {
368        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
369            enabled: true,
370            preserve_ips: true,
371            ..Default::default()
372        });
373
374        let message = "Connecting to 192.168.1.100:8080";
375        let result = sanitizer.sanitize(message);
376        assert!(result.contains("192.168.1.100"));
377    }
378
379    #[test]
380    fn test_ip_redaction() {
381        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
382            enabled: true,
383            preserve_ips: false,
384            ..Default::default()
385        });
386
387        let message = "Connecting to 192.168.1.100:8080";
388        let result = sanitizer.sanitize(message);
389        assert!(!result.contains("192.168.1.100"));
390        assert!(result.contains("[IP_REDACTED]"));
391    }
392
393    #[test]
394    fn test_uuid_preservation() {
395        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
396            enabled: true,
397            preserve_uuids: true,
398            ..Default::default()
399        });
400
401        let message = "Device 550e8400-e29b-41d4-a716-446655440000 state changed";
402        let result = sanitizer.sanitize(message);
403        assert!(result.contains("550e8400-e29b-41d4-a716-446655440000"));
404    }
405
406    #[test]
407    fn test_disabled_sanitization() {
408        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
409            enabled: false,
410            ..Default::default()
411        });
412
413        let message = "password=secret123 api_key=abc123";
414        let result = sanitizer.sanitize(message);
415        assert_eq!(message, result);
416    }
417
418    #[test]
419    fn test_error_sanitization() {
420        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
421            enabled: true,
422            ..Default::default()
423        });
424
425        let error = std::io::Error::new(
426            std::io::ErrorKind::PermissionDenied,
427            "password=secret123 authentication failed",
428        );
429        let result = sanitizer.sanitize_error(&error);
430        assert_eq!("password=[REDACTED] authentication failed", result);
431    }
432
433    #[test]
434    fn test_context_sanitization() {
435        let sanitizer = LogSanitizer::with_config(SanitizationConfig {
436            enabled: true,
437            ..Default::default()
438        });
439
440        let context = serde_json::json!({
441            "user": "admin",
442            "password": "secret123",
443            "host": "192.168.1.100",
444            "device_count": 42
445        });
446
447        let result = sanitizer.sanitize_context(&context);
448        assert!(!result.to_string().contains("secret123"));
449        assert!(result.to_string().contains("[REDACTED]"));
450        assert!(result.to_string().contains("admin")); // Non-sensitive fields preserved
451    }
452}