1use crate::security::{PiiConfig, Result, SecurityError};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use tracing::{debug, warn};
6
7pub struct PiiManager {
9 config: PiiConfig,
10 patterns: Vec<PiiPattern>,
11}
12
13#[derive(Debug, Clone)]
15pub struct PiiPattern {
16 pub name: String,
17 pub regex: Regex,
18 pub mask_char: char,
19 pub severity: PiiSeverity,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum PiiSeverity {
25 Low, Medium, High, Critical, }
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct PiiDetectionResult {
34 pub found_patterns: Vec<PiiMatch>,
35 pub masked_content: String,
36 pub severity: PiiSeverity,
37 pub requires_action: bool,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct PiiMatch {
43 pub pattern_name: String,
44 pub severity: PiiSeverity,
45 pub start: usize,
46 pub end: usize,
47 pub matched_text: String,
48 pub masked_text: String,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct PiiStatistics {
54 pub total_scans: u64,
55 pub total_matches: u64,
56 pub matches_by_type: HashMap<String, u64>,
57 pub high_severity_matches: u64,
58 pub critical_matches: u64,
59}
60
61impl PiiManager {
62 pub fn new(config: PiiConfig) -> Result<Self> {
63 let mut manager = Self {
64 config,
65 patterns: Vec::new(),
66 };
67
68 if manager.config.enabled {
69 manager.initialize_patterns()?;
70 }
71
72 Ok(manager)
73 }
74
75 fn initialize_patterns(&mut self) -> Result<()> {
76 self.add_pattern(
78 "email",
79 r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
80 '*',
81 PiiSeverity::Medium,
82 )?;
83
84 self.add_pattern(
86 "ssn",
87 r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b",
88 'X',
89 PiiSeverity::High,
90 )?;
91
92 self.add_pattern(
94 "credit_card",
95 r"\b(?:\d{4}[-\s]?){3}\d{4}\b",
96 '*',
97 PiiSeverity::High,
98 )?;
99
100 self.add_pattern(
102 "phone",
103 r"\b(?:\+1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
104 'X',
105 PiiSeverity::Medium,
106 )?;
107
108 self.add_pattern(
110 "ipv4",
111 r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
112 'X',
113 PiiSeverity::Low,
114 )?;
115
116 self.add_pattern(
118 "api_key",
119 r"(?i)(api[_-]?key|access[_-]?token|secret[_-]?key)[\s:=]+[a-zA-Z0-9+/=]{20,}",
120 '*',
121 PiiSeverity::Critical,
122 )?;
123
124 self.add_pattern(
126 "password",
127 r"(?i)(password|pwd|pass)[\s:=]+\S{4,}",
128 '*',
129 PiiSeverity::Critical,
130 )?;
131
132 self.add_pattern(
134 "jwt_token",
135 r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+",
136 '*',
137 PiiSeverity::Critical,
138 )?;
139
140 self.add_pattern(
142 "bitcoin",
143 r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b|bc1[a-z0-9]{39,59}\b",
144 'X',
145 PiiSeverity::Medium,
146 )?;
147
148 self.add_pattern("bank_account", r"\b\d{8,17}\b", 'X', PiiSeverity::High)?;
150
151 self.add_pattern(
153 "drivers_license",
154 r"\b[A-Z]{1,2}\d{6,8}\b|\b\d{8,9}\b",
155 'X',
156 PiiSeverity::High,
157 )?;
158
159 let custom_patterns = self.config.detect_patterns.clone();
161 for pattern in custom_patterns {
162 self.add_pattern("custom", &pattern, '*', PiiSeverity::Medium)?;
163 }
164
165 debug!("Initialized {} PII detection patterns", self.patterns.len());
166 Ok(())
167 }
168
169 fn add_pattern(
170 &mut self,
171 name: &str,
172 pattern: &str,
173 mask_char: char,
174 severity: PiiSeverity,
175 ) -> Result<()> {
176 let regex = Regex::new(pattern).map_err(|e| SecurityError::ValidationError {
177 message: format!("Invalid PII regex pattern '{pattern}': {e}"),
178 })?;
179
180 self.patterns.push(PiiPattern {
181 name: name.to_string(),
182 regex,
183 mask_char,
184 severity,
185 });
186
187 Ok(())
188 }
189
190 pub fn detect_pii(&self, content: &str) -> PiiDetectionResult {
192 if !self.config.enabled {
193 return PiiDetectionResult {
194 found_patterns: Vec::new(),
195 masked_content: content.to_string(),
196 severity: PiiSeverity::Low,
197 requires_action: false,
198 };
199 }
200
201 let mut found_patterns = Vec::new();
202 let mut masked_content = content.to_string();
203 let mut max_severity = PiiSeverity::Low;
204
205 for pattern in &self.patterns {
207 for mat in pattern.regex.find_iter(content) {
208 let start = mat.start();
209 let end = mat.end();
210 let matched_text = mat.as_str().to_string();
211
212 let masked_text = self.create_mask(&matched_text, pattern.mask_char);
214
215 max_severity = self.max_severity(&max_severity, &pattern.severity);
217
218 found_patterns.push(PiiMatch {
219 pattern_name: pattern.name.clone(),
220 severity: pattern.severity.clone(),
221 start,
222 end,
223 matched_text: matched_text.clone(),
224 masked_text: masked_text.clone(),
225 });
226 }
227 }
228
229 if !found_patterns.is_empty() {
231 found_patterns.sort_by(|a, b| b.start.cmp(&a.start));
233
234 for pii_match in &found_patterns {
235 masked_content
236 .replace_range(pii_match.start..pii_match.end, &pii_match.masked_text);
237 }
238
239 warn!(
241 "PII detected: {} matches, max severity: {:?}",
242 found_patterns.len(),
243 max_severity
244 );
245 }
246
247 let requires_action = matches!(max_severity, PiiSeverity::High | PiiSeverity::Critical);
248
249 PiiDetectionResult {
250 found_patterns,
251 masked_content,
252 severity: max_severity,
253 requires_action,
254 }
255 }
256
257 pub fn mask_for_logging(&self, content: &str) -> String {
259 if !self.config.enabled || !self.config.mask_in_logs {
260 return content.to_string();
261 }
262
263 let result = self.detect_pii(content);
264 result.masked_content
265 }
266
267 pub fn mask_for_response(&self, content: &str) -> String {
269 if !self.config.enabled || !self.config.mask_in_responses {
270 return content.to_string();
271 }
272
273 let result = self.detect_pii(content);
274 result.masked_content
275 }
276
277 pub fn should_anonymize(&self, content: &str) -> bool {
279 if !self.config.enabled || !self.config.anonymize_storage {
280 return false;
281 }
282
283 let result = self.detect_pii(content);
284 result.requires_action
285 }
286
287 pub fn anonymize_for_storage(&self, content: &str) -> String {
289 if !self.config.enabled || !self.config.anonymize_storage {
290 return content.to_string();
291 }
292
293 let result = self.detect_pii(content);
294
295 if result.requires_action {
296 let mut anonymized = result.masked_content;
298
299 for pii_match in &result.found_patterns {
300 if matches!(
301 pii_match.severity,
302 PiiSeverity::High | PiiSeverity::Critical
303 ) {
304 let placeholder = match pii_match.pattern_name.as_str() {
305 "email" => "[EMAIL]",
306 "ssn" => "[SSN]",
307 "credit_card" => "[CREDIT_CARD]",
308 "phone" => "[PHONE]",
309 "api_key" => "[API_KEY]",
310 "password" => "[PASSWORD]",
311 "jwt_token" => "[JWT_TOKEN]",
312 "bank_account" => "[BANK_ACCOUNT]",
313 "drivers_license" => "[DRIVERS_LICENSE]",
314 _ => "[PII]",
315 };
316
317 anonymized = anonymized.replace(&pii_match.masked_text, placeholder);
318 }
319 }
320
321 anonymized
322 } else {
323 result.masked_content
324 }
325 }
326
327 fn create_mask(&self, text: &str, mask_char: char) -> String {
328 if text.len() <= 4 {
329 let mut masked = String::new();
331 for (i, _) in text.char_indices() {
332 if i == 0 {
333 masked.push(text.chars().next().unwrap_or(mask_char));
334 } else {
335 masked.push(mask_char);
336 }
337 }
338 masked
339 } else {
340 let chars: Vec<char> = text.chars().collect();
342 let mut masked = String::new();
343
344 for (i, &ch) in chars.iter().enumerate() {
345 if i < 2 || i >= chars.len() - 2 {
346 masked.push(ch);
347 } else {
348 masked.push(mask_char);
349 }
350 }
351
352 masked
353 }
354 }
355
356 fn max_severity(&self, a: &PiiSeverity, b: &PiiSeverity) -> PiiSeverity {
357 match (a, b) {
358 (PiiSeverity::Critical, _) | (_, PiiSeverity::Critical) => PiiSeverity::Critical,
359 (PiiSeverity::High, _) | (_, PiiSeverity::High) => PiiSeverity::High,
360 (PiiSeverity::Medium, _) | (_, PiiSeverity::Medium) => PiiSeverity::Medium,
361 _ => PiiSeverity::Low,
362 }
363 }
364
365 pub fn is_enabled(&self) -> bool {
366 self.config.enabled
367 }
368
369 pub fn get_pattern_count(&self) -> usize {
370 self.patterns.len()
371 }
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377
378 #[test]
379 fn test_pii_manager_creation() {
380 let config = PiiConfig::default();
381 let manager = PiiManager::new(config).unwrap();
382 assert!(!manager.is_enabled()); }
384
385 #[test]
386 fn test_pii_manager_enabled() {
387 let mut config = PiiConfig::default();
388 config.enabled = true;
389
390 let manager = PiiManager::new(config).unwrap();
391 assert!(manager.is_enabled());
392 assert!(manager.get_pattern_count() > 0);
393 }
394
395 #[test]
396 fn test_email_detection() {
397 let mut config = PiiConfig::default();
398 config.enabled = true;
399
400 let manager = PiiManager::new(config).unwrap();
401
402 let text = "Please contact john.doe@example.com for support.";
403 let result = manager.detect_pii(text);
404
405 assert_eq!(result.found_patterns.len(), 1);
406 assert_eq!(result.found_patterns[0].pattern_name, "email");
407 assert!(matches!(
408 result.found_patterns[0].severity,
409 PiiSeverity::Medium
410 ));
411 assert_ne!(result.masked_content, text); }
413
414 #[test]
415 fn test_ssn_detection() {
416 let mut config = PiiConfig::default();
417 config.enabled = true;
418
419 let manager = PiiManager::new(config).unwrap();
420
421 let text = "My SSN is 123-45-6789.";
422 let result = manager.detect_pii(text);
423
424 assert_eq!(result.found_patterns.len(), 1);
425 assert_eq!(result.found_patterns[0].pattern_name, "ssn");
426 assert!(matches!(
427 result.found_patterns[0].severity,
428 PiiSeverity::High
429 ));
430 assert!(result.requires_action);
431 }
432
433 #[test]
434 fn test_credit_card_detection() {
435 let mut config = PiiConfig::default();
436 config.enabled = true;
437
438 let manager = PiiManager::new(config).unwrap();
439
440 let text = "Credit card: 4532-1234-5678-9012";
441 let result = manager.detect_pii(text);
442
443 assert_eq!(result.found_patterns.len(), 1);
444 assert_eq!(result.found_patterns[0].pattern_name, "credit_card");
445 assert!(matches!(
446 result.found_patterns[0].severity,
447 PiiSeverity::High
448 ));
449 }
450
451 #[test]
452 fn test_api_key_detection() {
453 let mut config = PiiConfig::default();
454 config.enabled = true;
455
456 let manager = PiiManager::new(config).unwrap();
457
458 let text = "api_key: sk-1234567890abcdef1234567890abcdef";
459 let result = manager.detect_pii(text);
460
461 assert_eq!(result.found_patterns.len(), 1);
462 assert_eq!(result.found_patterns[0].pattern_name, "api_key");
463 assert!(matches!(
464 result.found_patterns[0].severity,
465 PiiSeverity::Critical
466 ));
467 assert!(result.requires_action);
468 }
469
470 #[test]
471 fn test_multiple_pii_detection() {
472 let mut config = PiiConfig::default();
473 config.enabled = true;
474
475 let manager = PiiManager::new(config).unwrap();
476
477 let text = "Contact john@example.com or call 555-123-4567 about SSN 123-45-6789.";
478 let result = manager.detect_pii(text);
479
480 assert_eq!(result.found_patterns.len(), 3);
481
482 let pattern_names: Vec<&str> = result
484 .found_patterns
485 .iter()
486 .map(|m| m.pattern_name.as_str())
487 .collect();
488
489 assert!(pattern_names.contains(&"email"));
490 assert!(pattern_names.contains(&"phone"));
491 assert!(pattern_names.contains(&"ssn"));
492
493 assert!(matches!(result.severity, PiiSeverity::High));
495 assert!(result.requires_action);
496 }
497
498 #[test]
499 fn test_masking_for_logging() {
500 let mut config = PiiConfig::default();
501 config.enabled = true;
502 config.mask_in_logs = true;
503
504 let manager = PiiManager::new(config).unwrap();
505
506 let text = "User email: john.doe@example.com";
507 let masked = manager.mask_for_logging(text);
508
509 assert_ne!(masked, text);
510 assert!(!masked.contains("john.doe@example.com"));
511 }
512
513 #[test]
514 fn test_masking_for_response() {
515 let mut config = PiiConfig::default();
516 config.enabled = true;
517 config.mask_in_responses = true;
518
519 let manager = PiiManager::new(config).unwrap();
520
521 let text = "Phone: 555-123-4567";
522 let masked = manager.mask_for_response(text);
523
524 assert_ne!(masked, text);
525 assert!(!masked.contains("555-123-4567"));
526 }
527
528 #[test]
529 fn test_anonymization_for_storage() {
530 let mut config = PiiConfig::default();
531 config.enabled = true;
532 config.anonymize_storage = true;
533
534 let manager = PiiManager::new(config).unwrap();
535
536 let text = "SSN: 123-45-6789 and email: john@example.com";
537 let anonymized = manager.anonymize_for_storage(text);
538
539 assert!(anonymized.contains("[SSN]"));
541 assert!(!anonymized.contains("123-45-6789"));
543 }
544
545 #[test]
546 fn test_should_anonymize() {
547 let mut config = PiiConfig::default();
548 config.enabled = true;
549 config.anonymize_storage = true;
550
551 let manager = PiiManager::new(config).unwrap();
552
553 assert!(manager.should_anonymize("SSN: 123-45-6789"));
555
556 assert!(!manager.should_anonymize("IP: 192.168.1.1"));
558
559 assert!(!manager.should_anonymize("This is normal text"));
561 }
562
563 #[test]
564 fn test_custom_patterns() {
565 let config = PiiConfig {
566 enabled: true,
567 detect_patterns: vec![
568 r"\bcustom-\d{6}\b".to_string(), ],
570 mask_in_logs: true,
571 mask_in_responses: false,
572 anonymize_storage: false,
573 };
574
575 let manager = PiiManager::new(config).unwrap();
576
577 let text = "Reference number: custom-123456";
578 let result = manager.detect_pii(text);
579
580 assert_eq!(result.found_patterns.len(), 1);
581 assert_eq!(result.found_patterns[0].pattern_name, "custom");
582 }
583
584 #[test]
585 fn test_disabled_pii_detection() {
586 let mut config = PiiConfig::default();
587 config.enabled = false;
588
589 let manager = PiiManager::new(config).unwrap();
590
591 let text = "SSN: 123-45-6789 and email: john@example.com";
592 let result = manager.detect_pii(text);
593
594 assert_eq!(result.found_patterns.len(), 0);
595 assert_eq!(result.masked_content, text);
596 assert!(!result.requires_action);
597 }
598
599 #[test]
600 fn test_mask_creation() {
601 let mut config = PiiConfig::default();
602 config.enabled = true;
603
604 let manager = PiiManager::new(config).unwrap();
605
606 let short_mask = manager.create_mask("abc", '*');
608 assert_eq!(short_mask, "a**");
609
610 let long_mask = manager.create_mask("1234567890", 'X');
612 assert_eq!(long_mask, "12XXXXXX90");
613
614 let email_mask = manager.create_mask("john.doe@example.com", '*');
616 assert_eq!(email_mask, "jo*************om");
617 }
618
619 #[test]
620 fn test_severity_comparison() {
621 let mut config = PiiConfig::default();
622 config.enabled = true;
623
624 let manager = PiiManager::new(config).unwrap();
625
626 assert!(matches!(
627 manager.max_severity(&PiiSeverity::Low, &PiiSeverity::High),
628 PiiSeverity::High
629 ));
630 assert!(matches!(
631 manager.max_severity(&PiiSeverity::Critical, &PiiSeverity::Medium),
632 PiiSeverity::Critical
633 ));
634 assert!(matches!(
635 manager.max_severity(&PiiSeverity::Low, &PiiSeverity::Low),
636 PiiSeverity::Low
637 ));
638 }
639}