1use crate::security::{PiiConfig, Result, SecurityError};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use tracing::{debug, warn};
6
7pub struct PiiManager {
9 config: PiiConfig,
10 patterns: Vec<PiiPattern>,
11}
12
13#[derive(Debug, Clone)]
15pub struct PiiPattern {
16 pub name: String,
17 pub regex: Regex,
18 pub mask_char: char,
19 pub severity: PiiSeverity,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum PiiSeverity {
25 Low, Medium, High, Critical, }
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct PiiDetectionResult {
34 pub found_patterns: Vec<PiiMatch>,
35 pub masked_content: String,
36 pub severity: PiiSeverity,
37 pub requires_action: bool,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct PiiMatch {
43 pub pattern_name: String,
44 pub severity: PiiSeverity,
45 pub start: usize,
46 pub end: usize,
47 pub matched_text: String,
48 pub masked_text: String,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct PiiStatistics {
54 pub total_scans: u64,
55 pub total_matches: u64,
56 pub matches_by_type: HashMap<String, u64>,
57 pub high_severity_matches: u64,
58 pub critical_matches: u64,
59}
60
61impl PiiManager {
62 pub fn new(config: PiiConfig) -> Result<Self> {
63 let mut manager = Self {
64 config,
65 patterns: Vec::new(),
66 };
67
68 if manager.config.enabled {
69 manager.initialize_patterns()?;
70 }
71
72 Ok(manager)
73 }
74
75 fn initialize_patterns(&mut self) -> Result<()> {
76 self.add_pattern(
78 "email",
79 r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
80 '*',
81 PiiSeverity::Medium,
82 )?;
83
84 self.add_pattern(
86 "ssn",
87 r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b",
88 'X',
89 PiiSeverity::High,
90 )?;
91
92 self.add_pattern(
94 "credit_card",
95 r"\b(?:\d{4}[-\s]?){3}\d{4}\b",
96 '*',
97 PiiSeverity::High,
98 )?;
99
100 self.add_pattern(
102 "phone",
103 r"\b(?:\+1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
104 'X',
105 PiiSeverity::Medium,
106 )?;
107
108 self.add_pattern(
110 "ipv4",
111 r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
112 'X',
113 PiiSeverity::Low,
114 )?;
115
116 self.add_pattern(
119 "api_key",
120 r"(?i)(api[_-]?key|access[_-]?token|secret[_-]?key)[\s:=]+[\w-]{20,}",
121 '*',
122 PiiSeverity::Critical,
123 )?;
124
125 self.add_pattern(
127 "password",
128 r"(?i)(password|pwd|pass)[\s:=]+\S{4,}",
129 '*',
130 PiiSeverity::Critical,
131 )?;
132
133 self.add_pattern(
135 "jwt_token",
136 r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+",
137 '*',
138 PiiSeverity::Critical,
139 )?;
140
141 self.add_pattern(
143 "bitcoin",
144 r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b|bc1[a-z0-9]{39,59}\b",
145 'X',
146 PiiSeverity::Medium,
147 )?;
148
149 self.add_pattern("bank_account", r"\b\d{8,17}\b", 'X', PiiSeverity::High)?;
151
152 self.add_pattern(
154 "drivers_license",
155 r"\b[A-Z]{1,2}\d{6,8}\b|\b\d{8,9}\b",
156 'X',
157 PiiSeverity::High,
158 )?;
159
160 let custom_patterns = self.config.detect_patterns.clone();
162 for pattern in custom_patterns {
163 self.add_pattern("custom", &pattern, '*', PiiSeverity::Medium)?;
164 }
165
166 debug!("Initialized {} PII detection patterns", self.patterns.len());
167 Ok(())
168 }
169
170 fn add_pattern(
171 &mut self,
172 name: &str,
173 pattern: &str,
174 mask_char: char,
175 severity: PiiSeverity,
176 ) -> Result<()> {
177 let regex = Regex::new(pattern).map_err(|e| SecurityError::ValidationError {
178 message: format!("Invalid PII regex pattern '{pattern}': {e}"),
179 })?;
180
181 self.patterns.push(PiiPattern {
182 name: name.to_string(),
183 regex,
184 mask_char,
185 severity,
186 });
187
188 Ok(())
189 }
190
191 pub fn detect_pii(&self, content: &str) -> PiiDetectionResult {
193 if !self.config.enabled {
194 return PiiDetectionResult {
195 found_patterns: Vec::new(),
196 masked_content: content.to_string(),
197 severity: PiiSeverity::Low,
198 requires_action: false,
199 };
200 }
201
202 debug!(
204 "Detecting PII in content with {} patterns",
205 self.patterns.len()
206 );
207
208 let mut found_patterns = Vec::new();
209 let mut masked_content = content.to_string();
210 let mut max_severity = PiiSeverity::Low;
211
212 for pattern in &self.patterns {
214 for mat in pattern.regex.find_iter(content) {
215 let start = mat.start();
216 let end = mat.end();
217 let matched_text = mat.as_str().to_string();
218
219 let masked_text = self.create_mask(&matched_text, pattern.mask_char);
221
222 max_severity = self.max_severity(&max_severity, &pattern.severity);
224
225 found_patterns.push(PiiMatch {
226 pattern_name: pattern.name.clone(),
227 severity: pattern.severity.clone(),
228 start,
229 end,
230 matched_text: matched_text.clone(),
231 masked_text: masked_text.clone(),
232 });
233 }
234 }
235
236 if !found_patterns.is_empty() {
238 found_patterns.sort_by(|a, b| b.start.cmp(&a.start));
240
241 for pii_match in &found_patterns {
242 masked_content
243 .replace_range(pii_match.start..pii_match.end, &pii_match.masked_text);
244 }
245
246 warn!(
248 "PII detected: {} matches, max severity: {:?}",
249 found_patterns.len(),
250 max_severity
251 );
252 }
253
254 let requires_action = matches!(max_severity, PiiSeverity::High | PiiSeverity::Critical);
255
256 PiiDetectionResult {
257 found_patterns,
258 masked_content,
259 severity: max_severity,
260 requires_action,
261 }
262 }
263
264 pub fn mask_for_logging(&self, content: &str) -> String {
266 if !self.config.enabled || !self.config.mask_in_logs {
267 return content.to_string();
268 }
269
270 let result = self.detect_pii(content);
271 result.masked_content
272 }
273
274 pub fn mask_for_response(&self, content: &str) -> String {
276 if !self.config.enabled || !self.config.mask_in_responses {
277 return content.to_string();
278 }
279
280 let result = self.detect_pii(content);
281 result.masked_content
282 }
283
284 pub fn should_anonymize(&self, content: &str) -> bool {
286 if !self.config.enabled || !self.config.anonymize_storage {
287 return false;
288 }
289
290 let result = self.detect_pii(content);
291 result.requires_action
292 }
293
294 pub fn anonymize_for_storage(&self, content: &str) -> String {
296 if !self.config.enabled || !self.config.anonymize_storage {
297 return content.to_string();
298 }
299
300 let result = self.detect_pii(content);
301
302 if result.requires_action {
303 let mut anonymized = content.to_string();
306
307 let mut high_severity_matches: Vec<_> = result
309 .found_patterns
310 .iter()
311 .filter(|m| matches!(m.severity, PiiSeverity::High | PiiSeverity::Critical))
312 .collect();
313 high_severity_matches.sort_by(|a, b| b.start.cmp(&a.start));
314
315 for pii_match in high_severity_matches {
316 let placeholder = match pii_match.pattern_name.as_str() {
317 "email" => "[EMAIL]",
318 "ssn" => "[SSN]",
319 "credit_card" => "[CREDIT_CARD]",
320 "phone" => "[PHONE]",
321 "api_key" => "[API_KEY]",
322 "password" => "[PASSWORD]",
323 "jwt_token" => "[JWT_TOKEN]",
324 "bank_account" => "[BANK_ACCOUNT]",
325 "drivers_license" => "[DRIVERS_LICENSE]",
326 _ => "[PII]",
327 };
328
329 anonymized.replace_range(pii_match.start..pii_match.end, placeholder);
330 }
331
332 anonymized
333 } else {
334 result.masked_content
335 }
336 }
337
338 fn create_mask(&self, text: &str, mask_char: char) -> String {
339 if text.len() <= 4 {
340 let mut masked = String::new();
342 for (i, _) in text.char_indices() {
343 if i == 0 {
344 masked.push(text.chars().next().unwrap_or(mask_char));
345 } else {
346 masked.push(mask_char);
347 }
348 }
349 masked
350 } else {
351 let chars: Vec<char> = text.chars().collect();
353 let mut masked = String::new();
354
355 for (i, &ch) in chars.iter().enumerate() {
356 if i < 2 || i >= chars.len() - 2 {
357 masked.push(ch);
358 } else {
359 masked.push(mask_char);
360 }
361 }
362
363 masked
364 }
365 }
366
367 fn max_severity(&self, a: &PiiSeverity, b: &PiiSeverity) -> PiiSeverity {
368 match (a, b) {
369 (PiiSeverity::Critical, _) | (_, PiiSeverity::Critical) => PiiSeverity::Critical,
370 (PiiSeverity::High, _) | (_, PiiSeverity::High) => PiiSeverity::High,
371 (PiiSeverity::Medium, _) | (_, PiiSeverity::Medium) => PiiSeverity::Medium,
372 _ => PiiSeverity::Low,
373 }
374 }
375
376 pub fn is_enabled(&self) -> bool {
377 self.config.enabled
378 }
379
380 pub fn get_pattern_count(&self) -> usize {
381 self.patterns.len()
382 }
383}
384
385#[cfg(test)]
386mod tests {
387 use super::*;
388
389 #[test]
390 fn test_pii_manager_creation() {
391 let config = PiiConfig::default();
392 let manager = PiiManager::new(config).unwrap();
393 assert!(!manager.is_enabled()); }
395
396 #[test]
397 fn test_pii_manager_enabled() {
398 let mut config = PiiConfig::default();
399 config.enabled = true;
400
401 let manager = PiiManager::new(config).unwrap();
402 assert!(manager.is_enabled());
403 assert!(manager.get_pattern_count() > 0);
404 }
405
406 #[test]
407 fn test_email_detection() {
408 let mut config = PiiConfig::default();
409 config.enabled = true;
410 config.detect_patterns.clear(); let manager = PiiManager::new(config).unwrap();
413
414 let text = "Please contact john.doe@example.com for support.";
415 let result = manager.detect_pii(text);
416
417 assert_eq!(result.found_patterns.len(), 1);
418 assert_eq!(result.found_patterns[0].pattern_name, "email");
419 assert!(matches!(
420 result.found_patterns[0].severity,
421 PiiSeverity::Medium
422 ));
423 assert_ne!(result.masked_content, text); }
425
426 #[test]
427 fn test_ssn_detection() {
428 let mut config = PiiConfig::default();
429 config.enabled = true;
430 config.detect_patterns.clear(); let manager = PiiManager::new(config).unwrap();
433
434 let text = "My SSN is 123-45-6789.";
435 let result = manager.detect_pii(text);
436
437 assert_eq!(result.found_patterns.len(), 1);
438 assert_eq!(result.found_patterns[0].pattern_name, "ssn");
439 assert!(matches!(
440 result.found_patterns[0].severity,
441 PiiSeverity::High
442 ));
443 assert!(result.requires_action);
444 }
445
446 #[test]
447 fn test_credit_card_detection() {
448 let mut config = PiiConfig::default();
449 config.enabled = true;
450 config.detect_patterns.clear(); let manager = PiiManager::new(config).unwrap();
453
454 let text = "Credit card: 4532-1234-5678-9012";
455 let result = manager.detect_pii(text);
456
457 assert_eq!(result.found_patterns.len(), 1);
458 assert_eq!(result.found_patterns[0].pattern_name, "credit_card");
459 assert!(matches!(
460 result.found_patterns[0].severity,
461 PiiSeverity::High
462 ));
463 }
464
465 #[test]
466 fn test_api_key_detection() {
467 let mut config = PiiConfig::default();
468 config.enabled = true;
469 config.detect_patterns.clear(); let manager = PiiManager::new(config).unwrap();
472
473 let text = "api_key: sk-1234567890abcdef1234567890abcdef";
474 let result = manager.detect_pii(text);
475
476 println!(
478 "API key test - found {} patterns",
479 result.found_patterns.len()
480 );
481 for pattern in &result.found_patterns {
482 println!(
483 " Found: {} - {}",
484 pattern.pattern_name, pattern.matched_text
485 );
486 }
487
488 assert_eq!(result.found_patterns.len(), 1);
489 assert_eq!(result.found_patterns[0].pattern_name, "api_key");
490 assert!(matches!(
491 result.found_patterns[0].severity,
492 PiiSeverity::Critical
493 ));
494 assert!(result.requires_action);
495 }
496
497 #[test]
498 fn test_multiple_pii_detection() {
499 let mut config = PiiConfig::default();
500 config.enabled = true;
501 config.detect_patterns.clear(); let manager = PiiManager::new(config).unwrap();
504
505 let text = "Contact john@example.com or call 555-123-4567 about SSN 123-45-6789.";
506 let result = manager.detect_pii(text);
507
508 assert_eq!(result.found_patterns.len(), 3);
509
510 let pattern_names: Vec<&str> = result
512 .found_patterns
513 .iter()
514 .map(|m| m.pattern_name.as_str())
515 .collect();
516
517 assert!(pattern_names.contains(&"email"));
518 assert!(pattern_names.contains(&"phone"));
519 assert!(pattern_names.contains(&"ssn"));
520
521 assert!(matches!(result.severity, PiiSeverity::High));
523 assert!(result.requires_action);
524 }
525
526 #[test]
527 fn test_masking_for_logging() {
528 let mut config = PiiConfig::default();
529 config.enabled = true;
530 config.mask_in_logs = true;
531
532 let manager = PiiManager::new(config).unwrap();
533
534 let text = "User email: john.doe@example.com";
535 let masked = manager.mask_for_logging(text);
536
537 assert_ne!(masked, text);
538 assert!(!masked.contains("john.doe@example.com"));
539 }
540
541 #[test]
542 fn test_masking_for_response() {
543 let mut config = PiiConfig::default();
544 config.enabled = true;
545 config.mask_in_responses = true;
546
547 let manager = PiiManager::new(config).unwrap();
548
549 let text = "Phone: 555-123-4567";
550 let masked = manager.mask_for_response(text);
551
552 assert_ne!(masked, text);
553 assert!(!masked.contains("555-123-4567"));
554 }
555
556 #[test]
557 fn test_anonymization_for_storage() {
558 let mut config = PiiConfig::default();
559 config.enabled = true;
560 config.anonymize_storage = true;
561 config.detect_patterns.clear(); let manager = PiiManager::new(config).unwrap();
564
565 let text = "SSN: 123-45-6789 and email: john@example.com";
566 let anonymized = manager.anonymize_for_storage(text);
567
568 assert!(anonymized.contains("[SSN]"));
570 assert!(!anonymized.contains("123-45-6789"));
572 }
573
574 #[test]
575 fn test_should_anonymize() {
576 let mut config = PiiConfig::default();
577 config.enabled = true;
578 config.anonymize_storage = true;
579
580 let manager = PiiManager::new(config).unwrap();
581
582 assert!(manager.should_anonymize("SSN: 123-45-6789"));
584
585 assert!(!manager.should_anonymize("IP: 192.168.1.1"));
587
588 assert!(!manager.should_anonymize("This is normal text"));
590 }
591
592 #[test]
593 fn test_custom_patterns() {
594 let config = PiiConfig {
595 enabled: true,
596 detect_patterns: vec![
597 r"\bcustom-\d{6}\b".to_string(), ],
599 mask_in_logs: true,
600 mask_in_responses: false,
601 anonymize_storage: false,
602 };
603
604 let manager = PiiManager::new(config).unwrap();
605
606 let text = "Reference number: custom-123456";
607 let result = manager.detect_pii(text);
608
609 assert_eq!(result.found_patterns.len(), 1);
610 assert_eq!(result.found_patterns[0].pattern_name, "custom");
611 }
612
613 #[test]
614 fn test_disabled_pii_detection() {
615 let mut config = PiiConfig::default();
616 config.enabled = false;
617
618 let manager = PiiManager::new(config).unwrap();
619
620 let text = "SSN: 123-45-6789 and email: john@example.com";
621 let result = manager.detect_pii(text);
622
623 assert_eq!(result.found_patterns.len(), 0);
624 assert_eq!(result.masked_content, text);
625 assert!(!result.requires_action);
626 }
627
628 #[test]
629 fn test_mask_creation() {
630 let mut config = PiiConfig::default();
631 config.enabled = true;
632
633 let manager = PiiManager::new(config).unwrap();
634
635 let short_mask = manager.create_mask("abc", '*');
637 assert_eq!(short_mask, "a**");
638
639 let long_mask = manager.create_mask("1234567890", 'X');
641 assert_eq!(long_mask, "12XXXXXX90");
642
643 let email_mask = manager.create_mask("john.doe@example.com", '*');
646 assert_eq!(email_mask, "jo****************om");
647 }
648
649 #[test]
650 fn test_severity_comparison() {
651 let mut config = PiiConfig::default();
652 config.enabled = true;
653
654 let manager = PiiManager::new(config).unwrap();
655
656 assert!(matches!(
657 manager.max_severity(&PiiSeverity::Low, &PiiSeverity::High),
658 PiiSeverity::High
659 ));
660 assert!(matches!(
661 manager.max_severity(&PiiSeverity::Critical, &PiiSeverity::Medium),
662 PiiSeverity::Critical
663 ));
664 assert!(matches!(
665 manager.max_severity(&PiiSeverity::Low, &PiiSeverity::Low),
666 PiiSeverity::Low
667 ));
668 }
669}