source_map_php/
sanitizer.rs1use regex::Regex;
2
3#[derive(Debug, Clone)]
4pub struct Sanitizer {
5 secret_patterns: Vec<Regex>,
6 phi_patterns: Vec<Regex>,
7}
8
9impl Default for Sanitizer {
10 fn default() -> Self {
11 Self {
12 secret_patterns: vec![
13 Regex::new(r"-----BEGIN (RSA|EC|OPENSSH|PRIVATE) KEY-----").unwrap(),
14 Regex::new(r"(?i)\b(password|passwd|secret|token|api[-_]?key)\b\s*[:=]").unwrap(),
15 Regex::new(r"\beyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+\b").unwrap(),
16 Regex::new(r"(?i)\b(mysql|pgsql|postgres|redis|mongodb)://\S+").unwrap(),
17 ],
18 phi_patterns: vec![
19 Regex::new(r"(?i)\b\d{3}-\d{2}-\d{4}\b").unwrap(),
20 Regex::new(r"(?i)\b(?:mrn|medical record number|health card)\b").unwrap(),
21 Regex::new(r"\b\d{10,}\b").unwrap(),
22 Regex::new(r"(?i)\b\d{4}-\d{2}-\d{2}\b").unwrap(),
23 Regex::new(r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b").unwrap(),
24 Regex::new(r"(?i)\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b").unwrap(),
25 ],
26 }
27 }
28}
29
30impl Sanitizer {
31 pub fn sanitize_text(&self, input: &str) -> Option<String> {
32 let trimmed = input.trim();
33 if trimmed.is_empty() {
34 return None;
35 }
36 if self
37 .secret_patterns
38 .iter()
39 .chain(self.phi_patterns.iter())
40 .any(|pattern| pattern.is_match(trimmed))
41 {
42 return None;
43 }
44 Some(trimmed.to_string())
45 }
46
47 pub fn sanitize_many<'a>(&self, values: impl IntoIterator<Item = &'a str>) -> Vec<String> {
48 values
49 .into_iter()
50 .filter_map(|value| self.sanitize_text(value))
51 .collect()
52 }
53}
54
55#[cfg(test)]
56mod tests {
57 use super::Sanitizer;
58
59 #[test]
60 fn drops_secrets_and_phi() {
61 let sanitizer = Sanitizer::default();
62 assert!(sanitizer.sanitize_text("password=secret").is_none());
63 assert!(sanitizer.sanitize_text("patient@example.com").is_none());
64 assert!(sanitizer.sanitize_text("MRN 1234567890").is_none());
65 }
66
67 #[test]
68 fn keeps_safe_rule_comments() {
69 let sanitizer = Sanitizer::default();
70 assert_eq!(
71 sanitizer
72 .sanitize_text("Consent becomes immutable after signing.")
73 .as_deref(),
74 Some("Consent becomes immutable after signing.")
75 );
76 }
77}