codex_memory/security/
pii.rs

1use crate::security::{PiiConfig, Result, SecurityError};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use tracing::{debug, warn};
6
7/// PII detection and masking manager
8pub struct PiiManager {
9    config: PiiConfig,
10    patterns: Vec<PiiPattern>,
11}
12
13/// PII pattern definition
14#[derive(Debug, Clone)]
15pub struct PiiPattern {
16    pub name: String,
17    pub regex: Regex,
18    pub mask_char: char,
19    pub severity: PiiSeverity,
20}
21
22/// PII severity levels
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum PiiSeverity {
25    Low,      // Public information that might be personal
26    Medium,   // Sensitive personal information
27    High,     // Highly sensitive information (SSN, credit cards)
28    Critical, // Extremely sensitive (passwords, tokens)
29}
30
31/// PII detection result
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct PiiDetectionResult {
34    pub found_patterns: Vec<PiiMatch>,
35    pub masked_content: String,
36    pub severity: PiiSeverity,
37    pub requires_action: bool,
38}
39
40/// Individual PII match
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct PiiMatch {
43    pub pattern_name: String,
44    pub severity: PiiSeverity,
45    pub start: usize,
46    pub end: usize,
47    pub matched_text: String,
48    pub masked_text: String,
49}
50
51/// PII statistics
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct PiiStatistics {
54    pub total_scans: u64,
55    pub total_matches: u64,
56    pub matches_by_type: HashMap<String, u64>,
57    pub high_severity_matches: u64,
58    pub critical_matches: u64,
59}
60
61impl PiiManager {
62    pub fn new(config: PiiConfig) -> Result<Self> {
63        let mut manager = Self {
64            config,
65            patterns: Vec::new(),
66        };
67
68        if manager.config.enabled {
69            manager.initialize_patterns()?;
70        }
71
72        Ok(manager)
73    }
74
75    fn initialize_patterns(&mut self) -> Result<()> {
76        // Email addresses
77        self.add_pattern(
78            "email",
79            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
80            '*',
81            PiiSeverity::Medium,
82        )?;
83
84        // Social Security Numbers (US)
85        self.add_pattern(
86            "ssn",
87            r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b",
88            'X',
89            PiiSeverity::High,
90        )?;
91
92        // Credit card numbers (basic pattern)
93        self.add_pattern(
94            "credit_card",
95            r"\b(?:\d{4}[-\s]?){3}\d{4}\b",
96            '*',
97            PiiSeverity::High,
98        )?;
99
100        // Phone numbers (US format)
101        self.add_pattern(
102            "phone",
103            r"\b(?:\+1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
104            'X',
105            PiiSeverity::Medium,
106        )?;
107
108        // IPv4 addresses
109        self.add_pattern(
110            "ipv4",
111            r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
112            'X',
113            PiiSeverity::Low,
114        )?;
115
116        // API keys (generic patterns)
117        self.add_pattern(
118            "api_key",
119            r"(?i)(api[_-]?key|access[_-]?token|secret[_-]?key)[\s:=]+[a-zA-Z0-9+/=]{20,}",
120            '*',
121            PiiSeverity::Critical,
122        )?;
123
124        // Passwords in URLs or code
125        self.add_pattern(
126            "password",
127            r"(?i)(password|pwd|pass)[\s:=]+\S{4,}",
128            '*',
129            PiiSeverity::Critical,
130        )?;
131
132        // JWT tokens
133        self.add_pattern(
134            "jwt_token",
135            r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+",
136            '*',
137            PiiSeverity::Critical,
138        )?;
139
140        // Bitcoin addresses
141        self.add_pattern(
142            "bitcoin",
143            r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b|bc1[a-z0-9]{39,59}\b",
144            'X',
145            PiiSeverity::Medium,
146        )?;
147
148        // Bank account numbers (generic)
149        self.add_pattern("bank_account", r"\b\d{8,17}\b", 'X', PiiSeverity::High)?;
150
151        // Driver's license numbers (US format)
152        self.add_pattern(
153            "drivers_license",
154            r"\b[A-Z]{1,2}\d{6,8}\b|\b\d{8,9}\b",
155            'X',
156            PiiSeverity::High,
157        )?;
158
159        // Add custom patterns from config (clone to avoid borrow checker issues)
160        let custom_patterns = self.config.detect_patterns.clone();
161        for pattern in custom_patterns {
162            self.add_pattern("custom", &pattern, '*', PiiSeverity::Medium)?;
163        }
164
165        debug!("Initialized {} PII detection patterns", self.patterns.len());
166        Ok(())
167    }
168
169    fn add_pattern(
170        &mut self,
171        name: &str,
172        pattern: &str,
173        mask_char: char,
174        severity: PiiSeverity,
175    ) -> Result<()> {
176        let regex = Regex::new(pattern).map_err(|e| SecurityError::ValidationError {
177            message: format!("Invalid PII regex pattern '{pattern}': {e}"),
178        })?;
179
180        self.patterns.push(PiiPattern {
181            name: name.to_string(),
182            regex,
183            mask_char,
184            severity,
185        });
186
187        Ok(())
188    }
189
190    /// Detect PII in text content
191    pub fn detect_pii(&self, content: &str) -> PiiDetectionResult {
192        if !self.config.enabled {
193            return PiiDetectionResult {
194                found_patterns: Vec::new(),
195                masked_content: content.to_string(),
196                severity: PiiSeverity::Low,
197                requires_action: false,
198            };
199        }
200
201        let mut found_patterns = Vec::new();
202        let mut masked_content = content.to_string();
203        let mut max_severity = PiiSeverity::Low;
204
205        // Apply each pattern
206        for pattern in &self.patterns {
207            for mat in pattern.regex.find_iter(content) {
208                let start = mat.start();
209                let end = mat.end();
210                let matched_text = mat.as_str().to_string();
211
212                // Create masked version
213                let masked_text = self.create_mask(&matched_text, pattern.mask_char);
214
215                // Update max severity
216                max_severity = self.max_severity(&max_severity, &pattern.severity);
217
218                found_patterns.push(PiiMatch {
219                    pattern_name: pattern.name.clone(),
220                    severity: pattern.severity.clone(),
221                    start,
222                    end,
223                    matched_text: matched_text.clone(),
224                    masked_text: masked_text.clone(),
225                });
226            }
227        }
228
229        // Apply masking if enabled
230        if !found_patterns.is_empty() {
231            // Sort matches by start position in reverse order to avoid position shifts
232            found_patterns.sort_by(|a, b| b.start.cmp(&a.start));
233
234            for pii_match in &found_patterns {
235                masked_content
236                    .replace_range(pii_match.start..pii_match.end, &pii_match.masked_text);
237            }
238
239            // Log PII detection
240            warn!(
241                "PII detected: {} matches, max severity: {:?}",
242                found_patterns.len(),
243                max_severity
244            );
245        }
246
247        let requires_action = matches!(max_severity, PiiSeverity::High | PiiSeverity::Critical);
248
249        PiiDetectionResult {
250            found_patterns,
251            masked_content,
252            severity: max_severity,
253            requires_action,
254        }
255    }
256
257    /// Mask sensitive content for logging
258    pub fn mask_for_logging(&self, content: &str) -> String {
259        if !self.config.enabled || !self.config.mask_in_logs {
260            return content.to_string();
261        }
262
263        let result = self.detect_pii(content);
264        result.masked_content
265    }
266
267    /// Mask sensitive content for API responses
268    pub fn mask_for_response(&self, content: &str) -> String {
269        if !self.config.enabled || !self.config.mask_in_responses {
270            return content.to_string();
271        }
272
273        let result = self.detect_pii(content);
274        result.masked_content
275    }
276
277    /// Check if content should be anonymized for storage
278    pub fn should_anonymize(&self, content: &str) -> bool {
279        if !self.config.enabled || !self.config.anonymize_storage {
280            return false;
281        }
282
283        let result = self.detect_pii(content);
284        result.requires_action
285    }
286
287    /// Anonymize content for storage
288    pub fn anonymize_for_storage(&self, content: &str) -> String {
289        if !self.config.enabled || !self.config.anonymize_storage {
290            return content.to_string();
291        }
292
293        let result = self.detect_pii(content);
294
295        if result.requires_action {
296            // For high-severity PII, replace with generic placeholders
297            let mut anonymized = result.masked_content;
298
299            for pii_match in &result.found_patterns {
300                if matches!(
301                    pii_match.severity,
302                    PiiSeverity::High | PiiSeverity::Critical
303                ) {
304                    let placeholder = match pii_match.pattern_name.as_str() {
305                        "email" => "[EMAIL]",
306                        "ssn" => "[SSN]",
307                        "credit_card" => "[CREDIT_CARD]",
308                        "phone" => "[PHONE]",
309                        "api_key" => "[API_KEY]",
310                        "password" => "[PASSWORD]",
311                        "jwt_token" => "[JWT_TOKEN]",
312                        "bank_account" => "[BANK_ACCOUNT]",
313                        "drivers_license" => "[DRIVERS_LICENSE]",
314                        _ => "[PII]",
315                    };
316
317                    anonymized = anonymized.replace(&pii_match.masked_text, placeholder);
318                }
319            }
320
321            anonymized
322        } else {
323            result.masked_content
324        }
325    }
326
327    fn create_mask(&self, text: &str, mask_char: char) -> String {
328        if text.len() <= 4 {
329            // For short strings, mask everything except first character
330            let mut masked = String::new();
331            for (i, _) in text.char_indices() {
332                if i == 0 {
333                    masked.push(text.chars().next().unwrap_or(mask_char));
334                } else {
335                    masked.push(mask_char);
336                }
337            }
338            masked
339        } else {
340            // For longer strings, show first 2 and last 2 characters
341            let chars: Vec<char> = text.chars().collect();
342            let mut masked = String::new();
343
344            for (i, &ch) in chars.iter().enumerate() {
345                if i < 2 || i >= chars.len() - 2 {
346                    masked.push(ch);
347                } else {
348                    masked.push(mask_char);
349                }
350            }
351
352            masked
353        }
354    }
355
356    fn max_severity(&self, a: &PiiSeverity, b: &PiiSeverity) -> PiiSeverity {
357        match (a, b) {
358            (PiiSeverity::Critical, _) | (_, PiiSeverity::Critical) => PiiSeverity::Critical,
359            (PiiSeverity::High, _) | (_, PiiSeverity::High) => PiiSeverity::High,
360            (PiiSeverity::Medium, _) | (_, PiiSeverity::Medium) => PiiSeverity::Medium,
361            _ => PiiSeverity::Low,
362        }
363    }
364
365    pub fn is_enabled(&self) -> bool {
366        self.config.enabled
367    }
368
369    pub fn get_pattern_count(&self) -> usize {
370        self.patterns.len()
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    #[test]
379    fn test_pii_manager_creation() {
380        let config = PiiConfig::default();
381        let manager = PiiManager::new(config).unwrap();
382        assert!(!manager.is_enabled()); // disabled by default
383    }
384
385    #[test]
386    fn test_pii_manager_enabled() {
387        let mut config = PiiConfig::default();
388        config.enabled = true;
389
390        let manager = PiiManager::new(config).unwrap();
391        assert!(manager.is_enabled());
392        assert!(manager.get_pattern_count() > 0);
393    }
394
395    #[test]
396    fn test_email_detection() {
397        let mut config = PiiConfig::default();
398        config.enabled = true;
399
400        let manager = PiiManager::new(config).unwrap();
401
402        let text = "Please contact john.doe@example.com for support.";
403        let result = manager.detect_pii(text);
404
405        assert_eq!(result.found_patterns.len(), 1);
406        assert_eq!(result.found_patterns[0].pattern_name, "email");
407        assert!(matches!(
408            result.found_patterns[0].severity,
409            PiiSeverity::Medium
410        ));
411        assert_ne!(result.masked_content, text); // Should be masked
412    }
413
414    #[test]
415    fn test_ssn_detection() {
416        let mut config = PiiConfig::default();
417        config.enabled = true;
418
419        let manager = PiiManager::new(config).unwrap();
420
421        let text = "My SSN is 123-45-6789.";
422        let result = manager.detect_pii(text);
423
424        assert_eq!(result.found_patterns.len(), 1);
425        assert_eq!(result.found_patterns[0].pattern_name, "ssn");
426        assert!(matches!(
427            result.found_patterns[0].severity,
428            PiiSeverity::High
429        ));
430        assert!(result.requires_action);
431    }
432
433    #[test]
434    fn test_credit_card_detection() {
435        let mut config = PiiConfig::default();
436        config.enabled = true;
437
438        let manager = PiiManager::new(config).unwrap();
439
440        let text = "Credit card: 4532-1234-5678-9012";
441        let result = manager.detect_pii(text);
442
443        assert_eq!(result.found_patterns.len(), 1);
444        assert_eq!(result.found_patterns[0].pattern_name, "credit_card");
445        assert!(matches!(
446            result.found_patterns[0].severity,
447            PiiSeverity::High
448        ));
449    }
450
451    #[test]
452    fn test_api_key_detection() {
453        let mut config = PiiConfig::default();
454        config.enabled = true;
455
456        let manager = PiiManager::new(config).unwrap();
457
458        let text = "api_key: sk-1234567890abcdef1234567890abcdef";
459        let result = manager.detect_pii(text);
460
461        assert_eq!(result.found_patterns.len(), 1);
462        assert_eq!(result.found_patterns[0].pattern_name, "api_key");
463        assert!(matches!(
464            result.found_patterns[0].severity,
465            PiiSeverity::Critical
466        ));
467        assert!(result.requires_action);
468    }
469
470    #[test]
471    fn test_multiple_pii_detection() {
472        let mut config = PiiConfig::default();
473        config.enabled = true;
474
475        let manager = PiiManager::new(config).unwrap();
476
477        let text = "Contact john@example.com or call 555-123-4567 about SSN 123-45-6789.";
478        let result = manager.detect_pii(text);
479
480        assert_eq!(result.found_patterns.len(), 3);
481
482        // Should detect email, phone, and SSN
483        let pattern_names: Vec<&str> = result
484            .found_patterns
485            .iter()
486            .map(|m| m.pattern_name.as_str())
487            .collect();
488
489        assert!(pattern_names.contains(&"email"));
490        assert!(pattern_names.contains(&"phone"));
491        assert!(pattern_names.contains(&"ssn"));
492
493        // Max severity should be High (from SSN)
494        assert!(matches!(result.severity, PiiSeverity::High));
495        assert!(result.requires_action);
496    }
497
498    #[test]
499    fn test_masking_for_logging() {
500        let mut config = PiiConfig::default();
501        config.enabled = true;
502        config.mask_in_logs = true;
503
504        let manager = PiiManager::new(config).unwrap();
505
506        let text = "User email: john.doe@example.com";
507        let masked = manager.mask_for_logging(text);
508
509        assert_ne!(masked, text);
510        assert!(!masked.contains("john.doe@example.com"));
511    }
512
513    #[test]
514    fn test_masking_for_response() {
515        let mut config = PiiConfig::default();
516        config.enabled = true;
517        config.mask_in_responses = true;
518
519        let manager = PiiManager::new(config).unwrap();
520
521        let text = "Phone: 555-123-4567";
522        let masked = manager.mask_for_response(text);
523
524        assert_ne!(masked, text);
525        assert!(!masked.contains("555-123-4567"));
526    }
527
528    #[test]
529    fn test_anonymization_for_storage() {
530        let mut config = PiiConfig::default();
531        config.enabled = true;
532        config.anonymize_storage = true;
533
534        let manager = PiiManager::new(config).unwrap();
535
536        let text = "SSN: 123-45-6789 and email: john@example.com";
537        let anonymized = manager.anonymize_for_storage(text);
538
539        // High-severity PII (SSN) should be replaced with placeholder
540        assert!(anonymized.contains("[SSN]"));
541        // Medium-severity PII (email) should be masked but not replaced
542        assert!(!anonymized.contains("123-45-6789"));
543    }
544
545    #[test]
546    fn test_should_anonymize() {
547        let mut config = PiiConfig::default();
548        config.enabled = true;
549        config.anonymize_storage = true;
550
551        let manager = PiiManager::new(config).unwrap();
552
553        // High-severity PII should trigger anonymization
554        assert!(manager.should_anonymize("SSN: 123-45-6789"));
555
556        // Low-severity PII should not trigger anonymization
557        assert!(!manager.should_anonymize("IP: 192.168.1.1"));
558
559        // No PII should not trigger anonymization
560        assert!(!manager.should_anonymize("This is normal text"));
561    }
562
563    #[test]
564    fn test_custom_patterns() {
565        let config = PiiConfig {
566            enabled: true,
567            detect_patterns: vec![
568                r"\bcustom-\d{6}\b".to_string(), // Custom pattern
569            ],
570            mask_in_logs: true,
571            mask_in_responses: false,
572            anonymize_storage: false,
573        };
574
575        let manager = PiiManager::new(config).unwrap();
576
577        let text = "Reference number: custom-123456";
578        let result = manager.detect_pii(text);
579
580        assert_eq!(result.found_patterns.len(), 1);
581        assert_eq!(result.found_patterns[0].pattern_name, "custom");
582    }
583
584    #[test]
585    fn test_disabled_pii_detection() {
586        let mut config = PiiConfig::default();
587        config.enabled = false;
588
589        let manager = PiiManager::new(config).unwrap();
590
591        let text = "SSN: 123-45-6789 and email: john@example.com";
592        let result = manager.detect_pii(text);
593
594        assert_eq!(result.found_patterns.len(), 0);
595        assert_eq!(result.masked_content, text);
596        assert!(!result.requires_action);
597    }
598
599    #[test]
600    fn test_mask_creation() {
601        let mut config = PiiConfig::default();
602        config.enabled = true;
603
604        let manager = PiiManager::new(config).unwrap();
605
606        // Test short string masking
607        let short_mask = manager.create_mask("abc", '*');
608        assert_eq!(short_mask, "a**");
609
610        // Test longer string masking
611        let long_mask = manager.create_mask("1234567890", 'X');
612        assert_eq!(long_mask, "12XXXXXX90");
613
614        // Test email masking
615        let email_mask = manager.create_mask("john.doe@example.com", '*');
616        assert_eq!(email_mask, "jo*************om");
617    }
618
619    #[test]
620    fn test_severity_comparison() {
621        let mut config = PiiConfig::default();
622        config.enabled = true;
623
624        let manager = PiiManager::new(config).unwrap();
625
626        assert!(matches!(
627            manager.max_severity(&PiiSeverity::Low, &PiiSeverity::High),
628            PiiSeverity::High
629        ));
630        assert!(matches!(
631            manager.max_severity(&PiiSeverity::Critical, &PiiSeverity::Medium),
632            PiiSeverity::Critical
633        ));
634        assert!(matches!(
635            manager.max_severity(&PiiSeverity::Low, &PiiSeverity::Low),
636            PiiSeverity::Low
637        ));
638    }
639}