Skip to main content

source_map_php/
sanitizer.rs

1use regex::Regex;
2
3#[derive(Debug, Clone)]
4pub struct Sanitizer {
5    secret_patterns: Vec<Regex>,
6    phi_patterns: Vec<Regex>,
7}
8
9impl Default for Sanitizer {
10    fn default() -> Self {
11        Self {
12            secret_patterns: vec![
13                Regex::new(r"-----BEGIN (RSA|EC|OPENSSH|PRIVATE) KEY-----").unwrap(),
14                Regex::new(r"(?i)\b(password|passwd|secret|token|api[-_]?key)\b\s*[:=]").unwrap(),
15                Regex::new(r"\beyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+\b").unwrap(),
16                Regex::new(r"(?i)\b(mysql|pgsql|postgres|redis|mongodb)://\S+").unwrap(),
17            ],
18            phi_patterns: vec![
19                Regex::new(r"(?i)\b\d{3}-\d{2}-\d{4}\b").unwrap(),
20                Regex::new(r"(?i)\b(?:mrn|medical record number|health card)\b").unwrap(),
21                Regex::new(r"\b\d{10,}\b").unwrap(),
22                Regex::new(r"(?i)\b\d{4}-\d{2}-\d{2}\b").unwrap(),
23                Regex::new(r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b").unwrap(),
24                Regex::new(r"(?i)\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b").unwrap(),
25            ],
26        }
27    }
28}
29
30impl Sanitizer {
31    pub fn sanitize_text(&self, input: &str) -> Option<String> {
32        let trimmed = input.trim();
33        if trimmed.is_empty() {
34            return None;
35        }
36        if self
37            .secret_patterns
38            .iter()
39            .chain(self.phi_patterns.iter())
40            .any(|pattern| pattern.is_match(trimmed))
41        {
42            return None;
43        }
44        Some(trimmed.to_string())
45    }
46
47    pub fn sanitize_many<'a>(&self, values: impl IntoIterator<Item = &'a str>) -> Vec<String> {
48        values
49            .into_iter()
50            .filter_map(|value| self.sanitize_text(value))
51            .collect()
52    }
53}
54
55#[cfg(test)]
56mod tests {
57    use super::Sanitizer;
58
59    #[test]
60    fn drops_secrets_and_phi() {
61        let sanitizer = Sanitizer::default();
62        assert!(sanitizer.sanitize_text("password=secret").is_none());
63        assert!(sanitizer.sanitize_text("patient@example.com").is_none());
64        assert!(sanitizer.sanitize_text("MRN 1234567890").is_none());
65    }
66
67    #[test]
68    fn keeps_safe_rule_comments() {
69        let sanitizer = Sanitizer::default();
70        assert_eq!(
71            sanitizer
72                .sanitize_text("Consent becomes immutable after signing.")
73                .as_deref(),
74            Some("Consent becomes immutable after signing.")
75        );
76    }
77}