use regex::Regex;
use std::sync::LazyLock;
fn never_matching_regex() -> Regex {
Regex::new(r"[^\s\S]").expect("static never-matching regex pattern is always valid")
}
pub static SSN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").unwrap_or_else(|e| {
tracing::error!("Failed to compile SSN regex: {}", e);
never_matching_regex()
})
});
pub static EMAIL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap_or_else(|e| {
tracing::error!("Failed to compile email regex: {}", e);
never_matching_regex()
})
});
pub static PHONE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\b\d{3}-\d{3}-\d{4}\b").unwrap_or_else(|e| {
tracing::error!("Failed to compile phone regex: {}", e);
never_matching_regex()
})
});
pub static CREDIT_CARD_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b").unwrap_or_else(|e| {
tracing::error!("Failed to compile credit card regex: {}", e);
never_matching_regex()
})
});
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ssn_pattern_valid() {
assert!(SSN_PATTERN.is_match("123-45-6789"));
assert!(SSN_PATTERN.is_match("000-00-0000"));
assert!(SSN_PATTERN.is_match("999-99-9999"));
}
#[test]
fn test_ssn_pattern_in_text() {
assert!(SSN_PATTERN.is_match("My SSN is 123-45-6789 and more text"));
assert!(SSN_PATTERN.is_match("SSN: 123-45-6789"));
}
#[test]
fn test_ssn_pattern_invalid() {
assert!(!SSN_PATTERN.is_match("123456789")); assert!(!SSN_PATTERN.is_match("12-345-6789")); assert!(!SSN_PATTERN.is_match("1234-56-789")); assert!(!SSN_PATTERN.is_match("123-45-678")); assert!(!SSN_PATTERN.is_match("123-45-67890")); }
#[test]
fn test_ssn_pattern_boundary() {
assert!(!SSN_PATTERN.is_match("a123-45-6789b"));
}
#[test]
fn test_email_pattern_valid() {
assert!(EMAIL_PATTERN.is_match("test@example.com"));
assert!(EMAIL_PATTERN.is_match("user.name@domain.org"));
assert!(EMAIL_PATTERN.is_match("user+tag@domain.co.uk"));
assert!(EMAIL_PATTERN.is_match("user123@sub.domain.com"));
}
#[test]
fn test_email_pattern_special_chars() {
assert!(EMAIL_PATTERN.is_match("user.name+tag@domain.com"));
assert!(EMAIL_PATTERN.is_match("user%tag@domain.com"));
assert!(EMAIL_PATTERN.is_match("user_name@domain.com"));
assert!(EMAIL_PATTERN.is_match("user-name@domain.com"));
}
#[test]
fn test_email_pattern_in_text() {
assert!(EMAIL_PATTERN.is_match("Contact me at user@example.com for more info"));
assert!(EMAIL_PATTERN.is_match("Email: admin@test.org"));
}
#[test]
fn test_email_pattern_invalid() {
assert!(!EMAIL_PATTERN.is_match("not an email"));
assert!(!EMAIL_PATTERN.is_match("@domain.com"));
assert!(!EMAIL_PATTERN.is_match("user@"));
assert!(!EMAIL_PATTERN.is_match("user@domain"));
}
#[test]
fn test_phone_pattern_valid() {
assert!(PHONE_PATTERN.is_match("123-456-7890"));
assert!(PHONE_PATTERN.is_match("000-000-0000"));
assert!(PHONE_PATTERN.is_match("999-999-9999"));
}
#[test]
fn test_phone_pattern_in_text() {
assert!(PHONE_PATTERN.is_match("Call me at 123-456-7890 anytime"));
assert!(PHONE_PATTERN.is_match("Phone: 123-456-7890"));
}
#[test]
fn test_phone_pattern_invalid() {
assert!(!PHONE_PATTERN.is_match("12345678901")); assert!(!PHONE_PATTERN.is_match("1234567890")); assert!(!PHONE_PATTERN.is_match("12-3456-7890")); assert!(!PHONE_PATTERN.is_match("(123) 456-7890")); assert!(!PHONE_PATTERN.is_match("123.456.7890")); }
#[test]
fn test_phone_pattern_boundary() {
assert!(!PHONE_PATTERN.is_match("a123-456-7890b"));
}
#[test]
fn test_credit_card_pattern_with_dashes() {
assert!(CREDIT_CARD_PATTERN.is_match("1234-5678-9012-3456"));
assert!(CREDIT_CARD_PATTERN.is_match("0000-0000-0000-0000"));
assert!(CREDIT_CARD_PATTERN.is_match("9999-9999-9999-9999"));
}
#[test]
fn test_credit_card_pattern_no_dashes() {
assert!(CREDIT_CARD_PATTERN.is_match("1234567890123456"));
assert!(CREDIT_CARD_PATTERN.is_match("0000000000000000"));
}
#[test]
fn test_credit_card_pattern_with_spaces() {
assert!(CREDIT_CARD_PATTERN.is_match("1234 5678 9012 3456"));
}
#[test]
fn test_credit_card_pattern_in_text() {
assert!(CREDIT_CARD_PATTERN.is_match("Card number: 1234-5678-9012-3456 expires 12/25"));
assert!(CREDIT_CARD_PATTERN.is_match("Pay with 1234567890123456"));
}
#[test]
fn test_credit_card_pattern_invalid() {
assert!(!CREDIT_CARD_PATTERN.is_match("123")); assert!(!CREDIT_CARD_PATTERN.is_match("1234-5678")); assert!(!CREDIT_CARD_PATTERN.is_match("1234-5678-9012")); assert!(!CREDIT_CARD_PATTERN.is_match("12345678901234567")); }
#[test]
fn test_all_patterns_compile() {
assert!(SSN_PATTERN.is_match("123-45-6789"));
assert!(EMAIL_PATTERN.is_match("test@example.com"));
assert!(PHONE_PATTERN.is_match("123-456-7890"));
assert!(CREDIT_CARD_PATTERN.is_match("1234-5678-9012-3456"));
}
#[test]
fn test_patterns_in_mixed_content() {
let content = "Contact John at john@example.com or 123-456-7890. SSN: 123-45-6789";
assert!(EMAIL_PATTERN.is_match(content));
assert!(PHONE_PATTERN.is_match(content));
assert!(SSN_PATTERN.is_match(content));
}
#[test]
fn test_no_false_positives_in_normal_text() {
let content = "This is normal text without any PII data";
assert!(!SSN_PATTERN.is_match(content));
assert!(!EMAIL_PATTERN.is_match(content));
assert!(!PHONE_PATTERN.is_match(content));
assert!(!CREDIT_CARD_PATTERN.is_match(content));
}
}