codex_memory/security/
pii.rs

1use crate::security::{PiiConfig, Result, SecurityError};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use tracing::{debug, warn};
6
7/// PII detection and masking manager
8pub struct PiiManager {
9    config: PiiConfig,
10    patterns: Vec<PiiPattern>,
11}
12
13/// PII pattern definition
14#[derive(Debug, Clone)]
15pub struct PiiPattern {
16    pub name: String,
17    pub regex: Regex,
18    pub mask_char: char,
19    pub severity: PiiSeverity,
20}
21
22/// PII severity levels
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum PiiSeverity {
25    Low,      // Public information that might be personal
26    Medium,   // Sensitive personal information
27    High,     // Highly sensitive information (SSN, credit cards)
28    Critical, // Extremely sensitive (passwords, tokens)
29}
30
31/// PII detection result
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct PiiDetectionResult {
34    pub found_patterns: Vec<PiiMatch>,
35    pub masked_content: String,
36    pub severity: PiiSeverity,
37    pub requires_action: bool,
38}
39
40/// Individual PII match
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct PiiMatch {
43    pub pattern_name: String,
44    pub severity: PiiSeverity,
45    pub start: usize,
46    pub end: usize,
47    pub matched_text: String,
48    pub masked_text: String,
49}
50
51/// PII statistics
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct PiiStatistics {
54    pub total_scans: u64,
55    pub total_matches: u64,
56    pub matches_by_type: HashMap<String, u64>,
57    pub high_severity_matches: u64,
58    pub critical_matches: u64,
59}
60
61impl PiiManager {
62    pub fn new(config: PiiConfig) -> Result<Self> {
63        let mut manager = Self {
64            config,
65            patterns: Vec::new(),
66        };
67
68        if manager.config.enabled {
69            manager.initialize_patterns()?;
70        }
71
72        Ok(manager)
73    }
74
75    fn initialize_patterns(&mut self) -> Result<()> {
76        // Email addresses
77        self.add_pattern(
78            "email",
79            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
80            '*',
81            PiiSeverity::Medium,
82        )?;
83
84        // Social Security Numbers (US)
85        self.add_pattern(
86            "ssn",
87            r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b",
88            'X',
89            PiiSeverity::High,
90        )?;
91
92        // Credit card numbers (basic pattern)
93        self.add_pattern(
94            "credit_card",
95            r"\b(?:\d{4}[-\s]?){3}\d{4}\b",
96            '*',
97            PiiSeverity::High,
98        )?;
99
100        // Phone numbers (US format)
101        self.add_pattern(
102            "phone",
103            r"\b(?:\+1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
104            'X',
105            PiiSeverity::Medium,
106        )?;
107
108        // IPv4 addresses
109        self.add_pattern(
110            "ipv4",
111            r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
112            'X',
113            PiiSeverity::Low,
114        )?;
115
116        // API keys (generic patterns)
117        // Pattern matches "api_key:", "api-key=", "secret_key ", etc. followed by 20+ alphanumeric chars
118        self.add_pattern(
119            "api_key",
120            r"(?i)(api[_-]?key|access[_-]?token|secret[_-]?key)[\s:=]+[\w-]{20,}",
121            '*',
122            PiiSeverity::Critical,
123        )?;
124
125        // Passwords in URLs or code
126        self.add_pattern(
127            "password",
128            r"(?i)(password|pwd|pass)[\s:=]+\S{4,}",
129            '*',
130            PiiSeverity::Critical,
131        )?;
132
133        // JWT tokens
134        self.add_pattern(
135            "jwt_token",
136            r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+",
137            '*',
138            PiiSeverity::Critical,
139        )?;
140
141        // Bitcoin addresses
142        self.add_pattern(
143            "bitcoin",
144            r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b|bc1[a-z0-9]{39,59}\b",
145            'X',
146            PiiSeverity::Medium,
147        )?;
148
149        // Bank account numbers (generic)
150        self.add_pattern("bank_account", r"\b\d{8,17}\b", 'X', PiiSeverity::High)?;
151
152        // Driver's license numbers (US format)
153        self.add_pattern(
154            "drivers_license",
155            r"\b[A-Z]{1,2}\d{6,8}\b|\b\d{8,9}\b",
156            'X',
157            PiiSeverity::High,
158        )?;
159
160        // Add custom patterns from config (clone to avoid borrow checker issues)
161        let custom_patterns = self.config.detect_patterns.clone();
162        for pattern in custom_patterns {
163            self.add_pattern("custom", &pattern, '*', PiiSeverity::Medium)?;
164        }
165
166        debug!("Initialized {} PII detection patterns", self.patterns.len());
167        Ok(())
168    }
169
170    fn add_pattern(
171        &mut self,
172        name: &str,
173        pattern: &str,
174        mask_char: char,
175        severity: PiiSeverity,
176    ) -> Result<()> {
177        let regex = Regex::new(pattern).map_err(|e| SecurityError::ValidationError {
178            message: format!("Invalid PII regex pattern '{pattern}': {e}"),
179        })?;
180
181        self.patterns.push(PiiPattern {
182            name: name.to_string(),
183            regex,
184            mask_char,
185            severity,
186        });
187
188        Ok(())
189    }
190
191    /// Detect PII in text content
192    pub fn detect_pii(&self, content: &str) -> PiiDetectionResult {
193        if !self.config.enabled {
194            return PiiDetectionResult {
195                found_patterns: Vec::new(),
196                masked_content: content.to_string(),
197                severity: PiiSeverity::Low,
198                requires_action: false,
199            };
200        }
201        
202        // Debug: log patterns being used
203        debug!("Detecting PII in content with {} patterns", self.patterns.len());
204
205        let mut found_patterns = Vec::new();
206        let mut masked_content = content.to_string();
207        let mut max_severity = PiiSeverity::Low;
208
209        // Apply each pattern
210        for pattern in &self.patterns {
211            for mat in pattern.regex.find_iter(content) {
212                let start = mat.start();
213                let end = mat.end();
214                let matched_text = mat.as_str().to_string();
215
216                // Create masked version
217                let masked_text = self.create_mask(&matched_text, pattern.mask_char);
218
219                // Update max severity
220                max_severity = self.max_severity(&max_severity, &pattern.severity);
221
222                found_patterns.push(PiiMatch {
223                    pattern_name: pattern.name.clone(),
224                    severity: pattern.severity.clone(),
225                    start,
226                    end,
227                    matched_text: matched_text.clone(),
228                    masked_text: masked_text.clone(),
229                });
230            }
231        }
232
233        // Apply masking if enabled
234        if !found_patterns.is_empty() {
235            // Sort matches by start position in reverse order to avoid position shifts
236            found_patterns.sort_by(|a, b| b.start.cmp(&a.start));
237
238            for pii_match in &found_patterns {
239                masked_content
240                    .replace_range(pii_match.start..pii_match.end, &pii_match.masked_text);
241            }
242
243            // Log PII detection
244            warn!(
245                "PII detected: {} matches, max severity: {:?}",
246                found_patterns.len(),
247                max_severity
248            );
249        }
250
251        let requires_action = matches!(max_severity, PiiSeverity::High | PiiSeverity::Critical);
252
253        PiiDetectionResult {
254            found_patterns,
255            masked_content,
256            severity: max_severity,
257            requires_action,
258        }
259    }
260
261    /// Mask sensitive content for logging
262    pub fn mask_for_logging(&self, content: &str) -> String {
263        if !self.config.enabled || !self.config.mask_in_logs {
264            return content.to_string();
265        }
266
267        let result = self.detect_pii(content);
268        result.masked_content
269    }
270
271    /// Mask sensitive content for API responses
272    pub fn mask_for_response(&self, content: &str) -> String {
273        if !self.config.enabled || !self.config.mask_in_responses {
274            return content.to_string();
275        }
276
277        let result = self.detect_pii(content);
278        result.masked_content
279    }
280
281    /// Check if content should be anonymized for storage
282    pub fn should_anonymize(&self, content: &str) -> bool {
283        if !self.config.enabled || !self.config.anonymize_storage {
284            return false;
285        }
286
287        let result = self.detect_pii(content);
288        result.requires_action
289    }
290
291    /// Anonymize content for storage
292    pub fn anonymize_for_storage(&self, content: &str) -> String {
293        if !self.config.enabled || !self.config.anonymize_storage {
294            return content.to_string();
295        }
296
297        let result = self.detect_pii(content);
298
299        if result.requires_action {
300            // For high-severity PII, replace with generic placeholders
301            // We need to work with the original content and replace based on positions
302            let mut anonymized = content.to_string();
303            
304            // Sort by position in reverse to avoid position shifts
305            let mut high_severity_matches: Vec<_> = result.found_patterns
306                .iter()
307                .filter(|m| matches!(m.severity, PiiSeverity::High | PiiSeverity::Critical))
308                .collect();
309            high_severity_matches.sort_by(|a, b| b.start.cmp(&a.start));
310
311            for pii_match in high_severity_matches {
312                let placeholder = match pii_match.pattern_name.as_str() {
313                    "email" => "[EMAIL]",
314                    "ssn" => "[SSN]",
315                    "credit_card" => "[CREDIT_CARD]",
316                    "phone" => "[PHONE]",
317                    "api_key" => "[API_KEY]",
318                    "password" => "[PASSWORD]",
319                    "jwt_token" => "[JWT_TOKEN]",
320                    "bank_account" => "[BANK_ACCOUNT]",
321                    "drivers_license" => "[DRIVERS_LICENSE]",
322                    _ => "[PII]",
323                };
324
325                anonymized.replace_range(pii_match.start..pii_match.end, placeholder);
326            }
327
328            anonymized
329        } else {
330            result.masked_content
331        }
332    }
333
334    fn create_mask(&self, text: &str, mask_char: char) -> String {
335        if text.len() <= 4 {
336            // For short strings, mask everything except first character
337            let mut masked = String::new();
338            for (i, _) in text.char_indices() {
339                if i == 0 {
340                    masked.push(text.chars().next().unwrap_or(mask_char));
341                } else {
342                    masked.push(mask_char);
343                }
344            }
345            masked
346        } else {
347            // For longer strings, show first 2 and last 2 characters
348            let chars: Vec<char> = text.chars().collect();
349            let mut masked = String::new();
350
351            for (i, &ch) in chars.iter().enumerate() {
352                if i < 2 || i >= chars.len() - 2 {
353                    masked.push(ch);
354                } else {
355                    masked.push(mask_char);
356                }
357            }
358
359            masked
360        }
361    }
362
363    fn max_severity(&self, a: &PiiSeverity, b: &PiiSeverity) -> PiiSeverity {
364        match (a, b) {
365            (PiiSeverity::Critical, _) | (_, PiiSeverity::Critical) => PiiSeverity::Critical,
366            (PiiSeverity::High, _) | (_, PiiSeverity::High) => PiiSeverity::High,
367            (PiiSeverity::Medium, _) | (_, PiiSeverity::Medium) => PiiSeverity::Medium,
368            _ => PiiSeverity::Low,
369        }
370    }
371
372    pub fn is_enabled(&self) -> bool {
373        self.config.enabled
374    }
375
376    pub fn get_pattern_count(&self) -> usize {
377        self.patterns.len()
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn test_pii_manager_creation() {
387        let config = PiiConfig::default();
388        let manager = PiiManager::new(config).unwrap();
389        assert!(!manager.is_enabled()); // disabled by default
390    }
391
392    #[test]
393    fn test_pii_manager_enabled() {
394        let mut config = PiiConfig::default();
395        config.enabled = true;
396
397        let manager = PiiManager::new(config).unwrap();
398        assert!(manager.is_enabled());
399        assert!(manager.get_pattern_count() > 0);
400    }
401
402    #[test]
403    fn test_email_detection() {
404        let mut config = PiiConfig::default();
405        config.enabled = true;
406        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
407
408        let manager = PiiManager::new(config).unwrap();
409
410        let text = "Please contact john.doe@example.com for support.";
411        let result = manager.detect_pii(text);
412
413        assert_eq!(result.found_patterns.len(), 1);
414        assert_eq!(result.found_patterns[0].pattern_name, "email");
415        assert!(matches!(
416            result.found_patterns[0].severity,
417            PiiSeverity::Medium
418        ));
419        assert_ne!(result.masked_content, text); // Should be masked
420    }
421
422    #[test]
423    fn test_ssn_detection() {
424        let mut config = PiiConfig::default();
425        config.enabled = true;
426        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
427
428        let manager = PiiManager::new(config).unwrap();
429
430        let text = "My SSN is 123-45-6789.";
431        let result = manager.detect_pii(text);
432
433        assert_eq!(result.found_patterns.len(), 1);
434        assert_eq!(result.found_patterns[0].pattern_name, "ssn");
435        assert!(matches!(
436            result.found_patterns[0].severity,
437            PiiSeverity::High
438        ));
439        assert!(result.requires_action);
440    }
441
442    #[test]
443    fn test_credit_card_detection() {
444        let mut config = PiiConfig::default();
445        config.enabled = true;
446        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
447
448        let manager = PiiManager::new(config).unwrap();
449
450        let text = "Credit card: 4532-1234-5678-9012";
451        let result = manager.detect_pii(text);
452
453        assert_eq!(result.found_patterns.len(), 1);
454        assert_eq!(result.found_patterns[0].pattern_name, "credit_card");
455        assert!(matches!(
456            result.found_patterns[0].severity,
457            PiiSeverity::High
458        ));
459    }
460
461    #[test]
462    fn test_api_key_detection() {
463        let mut config = PiiConfig::default();
464        config.enabled = true;
465        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
466
467        let manager = PiiManager::new(config).unwrap();
468
469        let text = "api_key: sk-1234567890abcdef1234567890abcdef";
470        let result = manager.detect_pii(text);
471
472        // Debug: print what was found
473        println!("API key test - found {} patterns", result.found_patterns.len());
474        for pattern in &result.found_patterns {
475            println!("  Found: {} - {}", pattern.pattern_name, pattern.matched_text);
476        }
477
478        assert_eq!(result.found_patterns.len(), 1);
479        assert_eq!(result.found_patterns[0].pattern_name, "api_key");
480        assert!(matches!(
481            result.found_patterns[0].severity,
482            PiiSeverity::Critical
483        ));
484        assert!(result.requires_action);
485    }
486
487    #[test]
488    fn test_multiple_pii_detection() {
489        let mut config = PiiConfig::default();
490        config.enabled = true;
491        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
492
493        let manager = PiiManager::new(config).unwrap();
494
495        let text = "Contact john@example.com or call 555-123-4567 about SSN 123-45-6789.";
496        let result = manager.detect_pii(text);
497
498        assert_eq!(result.found_patterns.len(), 3);
499
500        // Should detect email, phone, and SSN
501        let pattern_names: Vec<&str> = result
502            .found_patterns
503            .iter()
504            .map(|m| m.pattern_name.as_str())
505            .collect();
506
507        assert!(pattern_names.contains(&"email"));
508        assert!(pattern_names.contains(&"phone"));
509        assert!(pattern_names.contains(&"ssn"));
510
511        // Max severity should be High (from SSN)
512        assert!(matches!(result.severity, PiiSeverity::High));
513        assert!(result.requires_action);
514    }
515
516    #[test]
517    fn test_masking_for_logging() {
518        let mut config = PiiConfig::default();
519        config.enabled = true;
520        config.mask_in_logs = true;
521
522        let manager = PiiManager::new(config).unwrap();
523
524        let text = "User email: john.doe@example.com";
525        let masked = manager.mask_for_logging(text);
526
527        assert_ne!(masked, text);
528        assert!(!masked.contains("john.doe@example.com"));
529    }
530
531    #[test]
532    fn test_masking_for_response() {
533        let mut config = PiiConfig::default();
534        config.enabled = true;
535        config.mask_in_responses = true;
536
537        let manager = PiiManager::new(config).unwrap();
538
539        let text = "Phone: 555-123-4567";
540        let masked = manager.mask_for_response(text);
541
542        assert_ne!(masked, text);
543        assert!(!masked.contains("555-123-4567"));
544    }
545
546    #[test]
547    fn test_anonymization_for_storage() {
548        let mut config = PiiConfig::default();
549        config.enabled = true;
550        config.anonymize_storage = true;
551        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
552
553        let manager = PiiManager::new(config).unwrap();
554
555        let text = "SSN: 123-45-6789 and email: john@example.com";
556        let anonymized = manager.anonymize_for_storage(text);
557
558        // High-severity PII (SSN) should be replaced with placeholder
559        assert!(anonymized.contains("[SSN]"));
560        // Medium-severity PII (email) should be masked but not replaced
561        assert!(!anonymized.contains("123-45-6789"));
562    }
563
564    #[test]
565    fn test_should_anonymize() {
566        let mut config = PiiConfig::default();
567        config.enabled = true;
568        config.anonymize_storage = true;
569
570        let manager = PiiManager::new(config).unwrap();
571
572        // High-severity PII should trigger anonymization
573        assert!(manager.should_anonymize("SSN: 123-45-6789"));
574
575        // Low-severity PII should not trigger anonymization
576        assert!(!manager.should_anonymize("IP: 192.168.1.1"));
577
578        // No PII should not trigger anonymization
579        assert!(!manager.should_anonymize("This is normal text"));
580    }
581
582    #[test]
583    fn test_custom_patterns() {
584        let config = PiiConfig {
585            enabled: true,
586            detect_patterns: vec![
587                r"\bcustom-\d{6}\b".to_string(), // Custom pattern
588            ],
589            mask_in_logs: true,
590            mask_in_responses: false,
591            anonymize_storage: false,
592        };
593
594        let manager = PiiManager::new(config).unwrap();
595
596        let text = "Reference number: custom-123456";
597        let result = manager.detect_pii(text);
598
599        assert_eq!(result.found_patterns.len(), 1);
600        assert_eq!(result.found_patterns[0].pattern_name, "custom");
601    }
602
603    #[test]
604    fn test_disabled_pii_detection() {
605        let mut config = PiiConfig::default();
606        config.enabled = false;
607
608        let manager = PiiManager::new(config).unwrap();
609
610        let text = "SSN: 123-45-6789 and email: john@example.com";
611        let result = manager.detect_pii(text);
612
613        assert_eq!(result.found_patterns.len(), 0);
614        assert_eq!(result.masked_content, text);
615        assert!(!result.requires_action);
616    }
617
618    #[test]
619    fn test_mask_creation() {
620        let mut config = PiiConfig::default();
621        config.enabled = true;
622
623        let manager = PiiManager::new(config).unwrap();
624
625        // Test short string masking
626        let short_mask = manager.create_mask("abc", '*');
627        assert_eq!(short_mask, "a**");
628
629        // Test longer string masking
630        let long_mask = manager.create_mask("1234567890", 'X');
631        assert_eq!(long_mask, "12XXXXXX90");
632
633        // Test email masking (20 chars total: "john.doe@example.com")
634        // Shows first 2 and last 2 chars, masks the middle 16
635        let email_mask = manager.create_mask("john.doe@example.com", '*');
636        assert_eq!(email_mask, "jo****************om");
637    }
638
639    #[test]
640    fn test_severity_comparison() {
641        let mut config = PiiConfig::default();
642        config.enabled = true;
643
644        let manager = PiiManager::new(config).unwrap();
645
646        assert!(matches!(
647            manager.max_severity(&PiiSeverity::Low, &PiiSeverity::High),
648            PiiSeverity::High
649        ));
650        assert!(matches!(
651            manager.max_severity(&PiiSeverity::Critical, &PiiSeverity::Medium),
652            PiiSeverity::Critical
653        ));
654        assert!(matches!(
655            manager.max_severity(&PiiSeverity::Low, &PiiSeverity::Low),
656            PiiSeverity::Low
657        ));
658    }
659}