codex_memory/security/
pii.rs

1use crate::security::{PiiConfig, Result, SecurityError};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use tracing::{debug, warn};
6
7/// PII detection and masking manager
8pub struct PiiManager {
9    config: PiiConfig,
10    patterns: Vec<PiiPattern>,
11}
12
13/// PII pattern definition
14#[derive(Debug, Clone)]
15pub struct PiiPattern {
16    pub name: String,
17    pub regex: Regex,
18    pub mask_char: char,
19    pub severity: PiiSeverity,
20}
21
22/// PII severity levels
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum PiiSeverity {
25    Low,      // Public information that might be personal
26    Medium,   // Sensitive personal information
27    High,     // Highly sensitive information (SSN, credit cards)
28    Critical, // Extremely sensitive (passwords, tokens)
29}
30
31/// PII detection result
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct PiiDetectionResult {
34    pub found_patterns: Vec<PiiMatch>,
35    pub masked_content: String,
36    pub severity: PiiSeverity,
37    pub requires_action: bool,
38}
39
40/// Individual PII match
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct PiiMatch {
43    pub pattern_name: String,
44    pub severity: PiiSeverity,
45    pub start: usize,
46    pub end: usize,
47    pub matched_text: String,
48    pub masked_text: String,
49}
50
51/// PII statistics
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct PiiStatistics {
54    pub total_scans: u64,
55    pub total_matches: u64,
56    pub matches_by_type: HashMap<String, u64>,
57    pub high_severity_matches: u64,
58    pub critical_matches: u64,
59}
60
61impl PiiManager {
62    pub fn new(config: PiiConfig) -> Result<Self> {
63        let mut manager = Self {
64            config,
65            patterns: Vec::new(),
66        };
67
68        if manager.config.enabled {
69            manager.initialize_patterns()?;
70        }
71
72        Ok(manager)
73    }
74
75    fn initialize_patterns(&mut self) -> Result<()> {
76        // Email addresses
77        self.add_pattern(
78            "email",
79            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
80            '*',
81            PiiSeverity::Medium,
82        )?;
83
84        // Social Security Numbers (US)
85        self.add_pattern(
86            "ssn",
87            r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b",
88            'X',
89            PiiSeverity::High,
90        )?;
91
92        // Credit card numbers (basic pattern)
93        self.add_pattern(
94            "credit_card",
95            r"\b(?:\d{4}[-\s]?){3}\d{4}\b",
96            '*',
97            PiiSeverity::High,
98        )?;
99
100        // Phone numbers (US format)
101        self.add_pattern(
102            "phone",
103            r"\b(?:\+1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
104            'X',
105            PiiSeverity::Medium,
106        )?;
107
108        // IPv4 addresses
109        self.add_pattern(
110            "ipv4",
111            r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
112            'X',
113            PiiSeverity::Low,
114        )?;
115
116        // API keys (generic patterns)
117        // Pattern matches "api_key:", "api-key=", "secret_key ", etc. followed by 20+ alphanumeric chars
118        self.add_pattern(
119            "api_key",
120            r"(?i)(api[_-]?key|access[_-]?token|secret[_-]?key)[\s:=]+[\w-]{20,}",
121            '*',
122            PiiSeverity::Critical,
123        )?;
124
125        // Passwords in URLs or code
126        self.add_pattern(
127            "password",
128            r"(?i)(password|pwd|pass)[\s:=]+\S{4,}",
129            '*',
130            PiiSeverity::Critical,
131        )?;
132
133        // JWT tokens
134        self.add_pattern(
135            "jwt_token",
136            r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+",
137            '*',
138            PiiSeverity::Critical,
139        )?;
140
141        // Bitcoin addresses
142        self.add_pattern(
143            "bitcoin",
144            r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b|bc1[a-z0-9]{39,59}\b",
145            'X',
146            PiiSeverity::Medium,
147        )?;
148
149        // Bank account numbers (generic)
150        self.add_pattern("bank_account", r"\b\d{8,17}\b", 'X', PiiSeverity::High)?;
151
152        // Driver's license numbers (US format)
153        self.add_pattern(
154            "drivers_license",
155            r"\b[A-Z]{1,2}\d{6,8}\b|\b\d{8,9}\b",
156            'X',
157            PiiSeverity::High,
158        )?;
159
160        // Add custom patterns from config (clone to avoid borrow checker issues)
161        let custom_patterns = self.config.detect_patterns.clone();
162        for pattern in custom_patterns {
163            self.add_pattern("custom", &pattern, '*', PiiSeverity::Medium)?;
164        }
165
166        debug!("Initialized {} PII detection patterns", self.patterns.len());
167        Ok(())
168    }
169
170    fn add_pattern(
171        &mut self,
172        name: &str,
173        pattern: &str,
174        mask_char: char,
175        severity: PiiSeverity,
176    ) -> Result<()> {
177        let regex = Regex::new(pattern).map_err(|e| SecurityError::ValidationError {
178            message: format!("Invalid PII regex pattern '{pattern}': {e}"),
179        })?;
180
181        self.patterns.push(PiiPattern {
182            name: name.to_string(),
183            regex,
184            mask_char,
185            severity,
186        });
187
188        Ok(())
189    }
190
191    /// Detect PII in text content
192    pub fn detect_pii(&self, content: &str) -> PiiDetectionResult {
193        if !self.config.enabled {
194            return PiiDetectionResult {
195                found_patterns: Vec::new(),
196                masked_content: content.to_string(),
197                severity: PiiSeverity::Low,
198                requires_action: false,
199            };
200        }
201
202        // Debug: log patterns being used
203        debug!(
204            "Detecting PII in content with {} patterns",
205            self.patterns.len()
206        );
207
208        let mut found_patterns = Vec::new();
209        let mut masked_content = content.to_string();
210        let mut max_severity = PiiSeverity::Low;
211
212        // Apply each pattern
213        for pattern in &self.patterns {
214            for mat in pattern.regex.find_iter(content) {
215                let start = mat.start();
216                let end = mat.end();
217                let matched_text = mat.as_str().to_string();
218
219                // Create masked version
220                let masked_text = self.create_mask(&matched_text, pattern.mask_char);
221
222                // Update max severity
223                max_severity = self.max_severity(&max_severity, &pattern.severity);
224
225                found_patterns.push(PiiMatch {
226                    pattern_name: pattern.name.clone(),
227                    severity: pattern.severity.clone(),
228                    start,
229                    end,
230                    matched_text: matched_text.clone(),
231                    masked_text: masked_text.clone(),
232                });
233            }
234        }
235
236        // Apply masking if enabled
237        if !found_patterns.is_empty() {
238            // Sort matches by start position in reverse order to avoid position shifts
239            found_patterns.sort_by(|a, b| b.start.cmp(&a.start));
240
241            for pii_match in &found_patterns {
242                masked_content
243                    .replace_range(pii_match.start..pii_match.end, &pii_match.masked_text);
244            }
245
246            // Log PII detection
247            warn!(
248                "PII detected: {} matches, max severity: {:?}",
249                found_patterns.len(),
250                max_severity
251            );
252        }
253
254        let requires_action = matches!(max_severity, PiiSeverity::High | PiiSeverity::Critical);
255
256        PiiDetectionResult {
257            found_patterns,
258            masked_content,
259            severity: max_severity,
260            requires_action,
261        }
262    }
263
264    /// Mask sensitive content for logging
265    pub fn mask_for_logging(&self, content: &str) -> String {
266        if !self.config.enabled || !self.config.mask_in_logs {
267            return content.to_string();
268        }
269
270        let result = self.detect_pii(content);
271        result.masked_content
272    }
273
274    /// Mask sensitive content for API responses
275    pub fn mask_for_response(&self, content: &str) -> String {
276        if !self.config.enabled || !self.config.mask_in_responses {
277            return content.to_string();
278        }
279
280        let result = self.detect_pii(content);
281        result.masked_content
282    }
283
284    /// Check if content should be anonymized for storage
285    pub fn should_anonymize(&self, content: &str) -> bool {
286        if !self.config.enabled || !self.config.anonymize_storage {
287            return false;
288        }
289
290        let result = self.detect_pii(content);
291        result.requires_action
292    }
293
294    /// Anonymize content for storage
295    pub fn anonymize_for_storage(&self, content: &str) -> String {
296        if !self.config.enabled || !self.config.anonymize_storage {
297            return content.to_string();
298        }
299
300        let result = self.detect_pii(content);
301
302        if result.requires_action {
303            // For high-severity PII, replace with generic placeholders
304            // We need to work with the original content and replace based on positions
305            let mut anonymized = content.to_string();
306
307            // Sort by position in reverse to avoid position shifts
308            let mut high_severity_matches: Vec<_> = result
309                .found_patterns
310                .iter()
311                .filter(|m| matches!(m.severity, PiiSeverity::High | PiiSeverity::Critical))
312                .collect();
313            high_severity_matches.sort_by(|a, b| b.start.cmp(&a.start));
314
315            for pii_match in high_severity_matches {
316                let placeholder = match pii_match.pattern_name.as_str() {
317                    "email" => "[EMAIL]",
318                    "ssn" => "[SSN]",
319                    "credit_card" => "[CREDIT_CARD]",
320                    "phone" => "[PHONE]",
321                    "api_key" => "[API_KEY]",
322                    "password" => "[PASSWORD]",
323                    "jwt_token" => "[JWT_TOKEN]",
324                    "bank_account" => "[BANK_ACCOUNT]",
325                    "drivers_license" => "[DRIVERS_LICENSE]",
326                    _ => "[PII]",
327                };
328
329                anonymized.replace_range(pii_match.start..pii_match.end, placeholder);
330            }
331
332            anonymized
333        } else {
334            result.masked_content
335        }
336    }
337
338    fn create_mask(&self, text: &str, mask_char: char) -> String {
339        if text.len() <= 4 {
340            // For short strings, mask everything except first character
341            let mut masked = String::new();
342            for (i, _) in text.char_indices() {
343                if i == 0 {
344                    masked.push(text.chars().next().unwrap_or(mask_char));
345                } else {
346                    masked.push(mask_char);
347                }
348            }
349            masked
350        } else {
351            // For longer strings, show first 2 and last 2 characters
352            let chars: Vec<char> = text.chars().collect();
353            let mut masked = String::new();
354
355            for (i, &ch) in chars.iter().enumerate() {
356                if i < 2 || i >= chars.len() - 2 {
357                    masked.push(ch);
358                } else {
359                    masked.push(mask_char);
360                }
361            }
362
363            masked
364        }
365    }
366
367    fn max_severity(&self, a: &PiiSeverity, b: &PiiSeverity) -> PiiSeverity {
368        match (a, b) {
369            (PiiSeverity::Critical, _) | (_, PiiSeverity::Critical) => PiiSeverity::Critical,
370            (PiiSeverity::High, _) | (_, PiiSeverity::High) => PiiSeverity::High,
371            (PiiSeverity::Medium, _) | (_, PiiSeverity::Medium) => PiiSeverity::Medium,
372            _ => PiiSeverity::Low,
373        }
374    }
375
376    pub fn is_enabled(&self) -> bool {
377        self.config.enabled
378    }
379
380    pub fn get_pattern_count(&self) -> usize {
381        self.patterns.len()
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    #[test]
390    fn test_pii_manager_creation() {
391        let config = PiiConfig::default();
392        let manager = PiiManager::new(config).unwrap();
393        assert!(!manager.is_enabled()); // disabled by default
394    }
395
396    #[test]
397    fn test_pii_manager_enabled() {
398        let mut config = PiiConfig::default();
399        config.enabled = true;
400
401        let manager = PiiManager::new(config).unwrap();
402        assert!(manager.is_enabled());
403        assert!(manager.get_pattern_count() > 0);
404    }
405
406    #[test]
407    fn test_email_detection() {
408        let mut config = PiiConfig::default();
409        config.enabled = true;
410        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
411
412        let manager = PiiManager::new(config).unwrap();
413
414        let text = "Please contact john.doe@example.com for support.";
415        let result = manager.detect_pii(text);
416
417        assert_eq!(result.found_patterns.len(), 1);
418        assert_eq!(result.found_patterns[0].pattern_name, "email");
419        assert!(matches!(
420            result.found_patterns[0].severity,
421            PiiSeverity::Medium
422        ));
423        assert_ne!(result.masked_content, text); // Should be masked
424    }
425
426    #[test]
427    fn test_ssn_detection() {
428        let mut config = PiiConfig::default();
429        config.enabled = true;
430        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
431
432        let manager = PiiManager::new(config).unwrap();
433
434        let text = "My SSN is 123-45-6789.";
435        let result = manager.detect_pii(text);
436
437        assert_eq!(result.found_patterns.len(), 1);
438        assert_eq!(result.found_patterns[0].pattern_name, "ssn");
439        assert!(matches!(
440            result.found_patterns[0].severity,
441            PiiSeverity::High
442        ));
443        assert!(result.requires_action);
444    }
445
446    #[test]
447    fn test_credit_card_detection() {
448        let mut config = PiiConfig::default();
449        config.enabled = true;
450        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
451
452        let manager = PiiManager::new(config).unwrap();
453
454        let text = "Credit card: 4532-1234-5678-9012";
455        let result = manager.detect_pii(text);
456
457        assert_eq!(result.found_patterns.len(), 1);
458        assert_eq!(result.found_patterns[0].pattern_name, "credit_card");
459        assert!(matches!(
460            result.found_patterns[0].severity,
461            PiiSeverity::High
462        ));
463    }
464
465    #[test]
466    fn test_api_key_detection() {
467        let mut config = PiiConfig::default();
468        config.enabled = true;
469        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
470
471        let manager = PiiManager::new(config).unwrap();
472
473        let text = "api_key: sk-1234567890abcdef1234567890abcdef";
474        let result = manager.detect_pii(text);
475
476        // Debug: print what was found
477        println!(
478            "API key test - found {} patterns",
479            result.found_patterns.len()
480        );
481        for pattern in &result.found_patterns {
482            println!(
483                "  Found: {} - {}",
484                pattern.pattern_name, pattern.matched_text
485            );
486        }
487
488        assert_eq!(result.found_patterns.len(), 1);
489        assert_eq!(result.found_patterns[0].pattern_name, "api_key");
490        assert!(matches!(
491            result.found_patterns[0].severity,
492            PiiSeverity::Critical
493        ));
494        assert!(result.requires_action);
495    }
496
497    #[test]
498    fn test_multiple_pii_detection() {
499        let mut config = PiiConfig::default();
500        config.enabled = true;
501        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
502
503        let manager = PiiManager::new(config).unwrap();
504
505        let text = "Contact john@example.com or call 555-123-4567 about SSN 123-45-6789.";
506        let result = manager.detect_pii(text);
507
508        assert_eq!(result.found_patterns.len(), 3);
509
510        // Should detect email, phone, and SSN
511        let pattern_names: Vec<&str> = result
512            .found_patterns
513            .iter()
514            .map(|m| m.pattern_name.as_str())
515            .collect();
516
517        assert!(pattern_names.contains(&"email"));
518        assert!(pattern_names.contains(&"phone"));
519        assert!(pattern_names.contains(&"ssn"));
520
521        // Max severity should be High (from SSN)
522        assert!(matches!(result.severity, PiiSeverity::High));
523        assert!(result.requires_action);
524    }
525
526    #[test]
527    fn test_masking_for_logging() {
528        let mut config = PiiConfig::default();
529        config.enabled = true;
530        config.mask_in_logs = true;
531
532        let manager = PiiManager::new(config).unwrap();
533
534        let text = "User email: john.doe@example.com";
535        let masked = manager.mask_for_logging(text);
536
537        assert_ne!(masked, text);
538        assert!(!masked.contains("john.doe@example.com"));
539    }
540
541    #[test]
542    fn test_masking_for_response() {
543        let mut config = PiiConfig::default();
544        config.enabled = true;
545        config.mask_in_responses = true;
546
547        let manager = PiiManager::new(config).unwrap();
548
549        let text = "Phone: 555-123-4567";
550        let masked = manager.mask_for_response(text);
551
552        assert_ne!(masked, text);
553        assert!(!masked.contains("555-123-4567"));
554    }
555
556    #[test]
557    fn test_anonymization_for_storage() {
558        let mut config = PiiConfig::default();
559        config.enabled = true;
560        config.anonymize_storage = true;
561        config.detect_patterns.clear(); // Clear custom patterns to avoid duplicates
562
563        let manager = PiiManager::new(config).unwrap();
564
565        let text = "SSN: 123-45-6789 and email: john@example.com";
566        let anonymized = manager.anonymize_for_storage(text);
567
568        // High-severity PII (SSN) should be replaced with placeholder
569        assert!(anonymized.contains("[SSN]"));
570        // Medium-severity PII (email) should be masked but not replaced
571        assert!(!anonymized.contains("123-45-6789"));
572    }
573
574    #[test]
575    fn test_should_anonymize() {
576        let mut config = PiiConfig::default();
577        config.enabled = true;
578        config.anonymize_storage = true;
579
580        let manager = PiiManager::new(config).unwrap();
581
582        // High-severity PII should trigger anonymization
583        assert!(manager.should_anonymize("SSN: 123-45-6789"));
584
585        // Low-severity PII should not trigger anonymization
586        assert!(!manager.should_anonymize("IP: 192.168.1.1"));
587
588        // No PII should not trigger anonymization
589        assert!(!manager.should_anonymize("This is normal text"));
590    }
591
592    #[test]
593    fn test_custom_patterns() {
594        let config = PiiConfig {
595            enabled: true,
596            detect_patterns: vec![
597                r"\bcustom-\d{6}\b".to_string(), // Custom pattern
598            ],
599            mask_in_logs: true,
600            mask_in_responses: false,
601            anonymize_storage: false,
602        };
603
604        let manager = PiiManager::new(config).unwrap();
605
606        let text = "Reference number: custom-123456";
607        let result = manager.detect_pii(text);
608
609        assert_eq!(result.found_patterns.len(), 1);
610        assert_eq!(result.found_patterns[0].pattern_name, "custom");
611    }
612
613    #[test]
614    fn test_disabled_pii_detection() {
615        let mut config = PiiConfig::default();
616        config.enabled = false;
617
618        let manager = PiiManager::new(config).unwrap();
619
620        let text = "SSN: 123-45-6789 and email: john@example.com";
621        let result = manager.detect_pii(text);
622
623        assert_eq!(result.found_patterns.len(), 0);
624        assert_eq!(result.masked_content, text);
625        assert!(!result.requires_action);
626    }
627
628    #[test]
629    fn test_mask_creation() {
630        let mut config = PiiConfig::default();
631        config.enabled = true;
632
633        let manager = PiiManager::new(config).unwrap();
634
635        // Test short string masking
636        let short_mask = manager.create_mask("abc", '*');
637        assert_eq!(short_mask, "a**");
638
639        // Test longer string masking
640        let long_mask = manager.create_mask("1234567890", 'X');
641        assert_eq!(long_mask, "12XXXXXX90");
642
643        // Test email masking (20 chars total: "john.doe@example.com")
644        // Shows first 2 and last 2 chars, masks the middle 16
645        let email_mask = manager.create_mask("john.doe@example.com", '*');
646        assert_eq!(email_mask, "jo****************om");
647    }
648
649    #[test]
650    fn test_severity_comparison() {
651        let mut config = PiiConfig::default();
652        config.enabled = true;
653
654        let manager = PiiManager::new(config).unwrap();
655
656        assert!(matches!(
657            manager.max_severity(&PiiSeverity::Low, &PiiSeverity::High),
658            PiiSeverity::High
659        ));
660        assert!(matches!(
661            manager.max_severity(&PiiSeverity::Critical, &PiiSeverity::Medium),
662            PiiSeverity::Critical
663        ));
664        assert!(matches!(
665            manager.max_severity(&PiiSeverity::Low, &PiiSeverity::Low),
666            PiiSeverity::Low
667        ));
668    }
669}