#![allow(clippy::expect_used)]
use regex::Regex;
pub fn mask_pii(text: &str) -> String {
let mut masked = text.to_string();
masked = EMAIL_REGEX.replace_all(&masked, "[EMAIL]").to_string();
masked = SSN_REGEX.replace_all(&masked, "[SSN]").to_string();
masked = CREDIT_CARD_REGEX
.replace_all(&masked, "[CREDIT_CARD]")
.to_string();
masked = PHONE_REGEX.replace_all(&masked, "[PHONE]").to_string();
masked = IPV4_REGEX.replace_all(&masked, "[IP_ADDRESS]").to_string();
masked = API_KEY_REGEX.replace_all(&masked, "[API_KEY]").to_string();
masked = TOKEN_REGEX.replace_all(&masked, "[TOKEN]").to_string();
masked
}
pub fn contains_pii(text: &str) -> bool {
EMAIL_REGEX.is_match(text)
|| SSN_REGEX.is_match(text)
|| PHONE_REGEX.is_match(text)
|| CREDIT_CARD_REGEX.is_match(text)
|| IPV4_REGEX.is_match(text)
|| API_KEY_REGEX.is_match(text)
|| TOKEN_REGEX.is_match(text)
}
static EMAIL_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").expect("invalid email regex")
});
static SSN_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b").expect("invalid SSN regex")
});
static PHONE_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(
r"(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b|\b\d{3}[-.\s]\d{4}\b|\b\d{10}\b",
)
.expect("invalid phone regex")
});
static CREDIT_CARD_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(
r"\b(?:\d{4}[-\s]?\d{6}[-\s]?\d{5}|\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}|\d{15,16})\b",
)
.expect("invalid credit card regex")
});
static IPV4_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b")
.expect("invalid IPv4 regex")
});
static API_KEY_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(
r#"(?i)\b(?:sk_live_[a-zA-Z0-9]{24,}|pk_test_[a-zA-Z0-9]{24,}|ghp_[a-zA-Z0-9]{36}|gho_[a-zA-Z0-9]{36}|AKIA[A-Z0-9]{16}|api[-_]?key[:=]\s*['"]?[a-zA-Z0-9_\-]{20,}['"]?)\b"#
)
.expect("invalid API key regex")
});
static TOKEN_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"\b[A-Za-z0-9_\-]{40,}\.[A-Za-z0-9_\-]{6,}\.[A-Za-z0-9_\-]{6,}\b")
.expect("invalid token regex")
});
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mask_email() {
let text = "Contact john.doe+tag@example.com for details";
let masked = mask_pii(text);
assert_eq!(masked, "Contact [EMAIL] for details");
}
#[test]
fn test_mask_multiple_emails() {
let text = "Email alice@test.com or bob@example.org";
let masked = mask_pii(text);
assert_eq!(masked, "Email [EMAIL] or [EMAIL]");
}
#[test]
fn test_mask_ssn() {
let text = "SSN: 123-45-6789";
let masked = mask_pii(text);
assert_eq!(masked, "SSN: [SSN]");
}
#[test]
fn test_mask_ssn_variations() {
assert_eq!(mask_pii("123-45-6789"), "[SSN]");
assert_eq!(mask_pii("123 45 6789"), "[SSN]");
assert_eq!(mask_pii("123456789"), "[SSN]");
}
#[test]
fn test_mask_phone() {
let text = "Call me at (555) 123-4567";
let masked = mask_pii(text);
assert_eq!(masked, "Call me at [PHONE]");
}
#[test]
fn test_mask_phone_variations() {
assert_eq!(mask_pii("555-123-4567"), "[PHONE]");
assert_eq!(mask_pii("+1-555-123-4567"), "[PHONE]");
assert_eq!(mask_pii("555.123.4567"), "[PHONE]");
assert_eq!(mask_pii("5551234567"), "[PHONE]");
}
#[test]
fn test_mask_credit_card() {
let text = "Card: 4532-1234-5678-9010";
let masked = mask_pii(text);
assert_eq!(masked, "Card: [CREDIT_CARD]");
}
#[test]
fn test_mask_credit_card_variations() {
assert_eq!(mask_pii("4532 1234 5678 9010"), "[CREDIT_CARD]");
assert_eq!(mask_pii("4532123456789010"), "[CREDIT_CARD]");
assert_eq!(mask_pii("3782-822463-10005"), "[CREDIT_CARD]");
}
#[test]
fn test_mask_ip_address() {
let text = "Server at 192.168.1.1";
let masked = mask_pii(text);
assert_eq!(masked, "Server at [IP_ADDRESS]");
}
#[test]
fn test_mask_api_key() {
let text = "Use key: api_key=abcdefghij1234567890xyz";
let masked = mask_pii(text);
assert_eq!(masked, "Use key: [API_KEY]");
}
#[test]
fn test_mask_multiple_pii_types() {
let text = "Contact john@example.com at 555-123-4567. SSN: 123-45-6789";
let masked = mask_pii(text);
assert_eq!(masked, "Contact [EMAIL] at [PHONE]. SSN: [SSN]");
}
#[test]
fn test_no_false_positives_on_normal_text() {
let text = "The year 2024 has 365 days.";
let masked = mask_pii(text);
assert_eq!(masked, text); }
#[test]
fn test_contains_pii() {
assert!(contains_pii("Email: john@example.com"));
assert!(contains_pii("SSN: 123-45-6789"));
assert!(contains_pii("Call 555-1234"));
assert!(!contains_pii("No PII here"));
assert!(!contains_pii("Just numbers: 12345"));
}
#[test]
fn test_preserves_non_pii_numbers() {
let text = "Invoice #12345 for $100.00";
let masked = mask_pii(text);
assert_eq!(masked, text); }
#[test]
fn test_preserves_dates() {
let text = "Meeting on 2024-01-15";
let masked = mask_pii(text);
assert_eq!(masked, text); }
}