use regex::Regex;
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PIIType {
Email,
CreditCard,
IPAddress,
MACAddress,
URL,
Custom(String),
}
impl PIIType {
pub fn as_str(&self) -> &str {
match self {
PIIType::Email => "EMAIL",
PIIType::CreditCard => "CREDIT_CARD",
PIIType::IPAddress => "IP_ADDRESS",
PIIType::MACAddress => "MAC_ADDRESS",
PIIType::URL => "URL",
PIIType::Custom(name) => name,
}
}
}
#[derive(Debug, Clone)]
pub struct PIIMatch {
pub start: usize,
pub end: usize,
pub matched_text: String,
pub pii_type: PIIType,
}
pub struct PIIDetector {
pii_type: PIIType,
regex: Option<Regex>,
}
impl PIIDetector {
pub fn new(pii_type: PIIType) -> Self {
let regex = match &pii_type {
PIIType::Email => {
Some(Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap())
}
PIIType::CreditCard => Some(Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").unwrap()),
PIIType::IPAddress => Some(
Regex::new(
r"\b(?:(?:\d{1,3}\.){3}\d{1,3}|(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4})\b",
)
.unwrap(),
),
PIIType::MACAddress => {
Some(Regex::new(r"\b(?:[0-9a-fA-F]{2}[:-]){5}[0-9a-fA-F]{2}\b").unwrap())
}
PIIType::URL => Some(Regex::new(r"https?://[^\s]+").unwrap()),
PIIType::Custom(_) => None,
};
Self { pii_type, regex }
}
pub fn with_custom_pattern(pii_type: PIIType, pattern: &str) -> Result<Self, regex::Error> {
let regex = Regex::new(pattern)?;
Ok(Self {
pii_type,
regex: Some(regex),
})
}
pub fn detect(&self, text: &str) -> Vec<PIIMatch> {
let mut matches = Vec::new();
if let Some(ref regex) = self.regex {
for cap in regex.captures_iter(text) {
if let Some(m) = cap.get(0) {
let matched_text = m.as_str().to_string();
if matches!(self.pii_type, PIIType::CreditCard) {
if !Self::validate_luhn(
&matched_text.replace(|c: char| !c.is_ascii_digit(), ""),
) {
continue;
}
}
matches.push(PIIMatch {
start: m.start(),
end: m.end(),
matched_text,
pii_type: self.pii_type.clone(),
});
}
}
}
matches
}
fn validate_luhn(card_number: &str) -> bool {
let digits: Vec<u32> = card_number.chars().filter_map(|c| c.to_digit(10)).collect();
if digits.len() < 13 || digits.len() > 19 {
return false;
}
let sum: u32 = digits
.iter()
.rev()
.enumerate()
.map(|(i, &digit)| {
if i % 2 == 1 {
let doubled = digit * 2;
if doubled > 9 {
doubled - 9
} else {
doubled
}
} else {
digit
}
})
.sum();
sum % 10 == 0
}
pub fn pii_type(&self) -> &PIIType {
&self.pii_type
}
}
pub fn detect_all_pii(text: &str) -> HashMap<PIIType, Vec<PIIMatch>> {
let mut results = HashMap::new();
let detectors = vec![
PIIDetector::new(PIIType::Email),
PIIDetector::new(PIIType::CreditCard),
PIIDetector::new(PIIType::IPAddress),
PIIDetector::new(PIIType::MACAddress),
PIIDetector::new(PIIType::URL),
];
for detector in detectors {
let matches = detector.detect(text);
if !matches.is_empty() {
results.insert(detector.pii_type().clone(), matches);
}
}
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_email_detection() {
let detector = PIIDetector::new(PIIType::Email);
let text = "Contact me at one@example.com or two@example.com";
let matches = detector.detect(text);
assert_eq!(matches.len(), 2);
assert!(matches.iter().any(|m| m.matched_text == "one@example.com"));
}
#[test]
fn test_credit_card_detection() {
let detector = PIIDetector::new(PIIType::CreditCard);
let text = "My card is 4532-1234-5678-9010";
let _matches = detector.detect(text);
}
#[test]
fn test_ip_detection() {
let detector = PIIDetector::new(PIIType::IPAddress);
let text = "Server at 192.168.1.1 is down";
let matches = detector.detect(text);
assert!(!matches.is_empty());
assert!(matches
.iter()
.any(|m| m.matched_text.contains("192.168.1.1")));
}
#[test]
fn test_url_detection() {
let detector = PIIDetector::new(PIIType::URL);
let text = "Visit https://example.com for more info";
let matches = detector.detect(text);
assert!(!matches.is_empty());
assert!(matches.iter().any(|m| m.matched_text.contains("https://")));
}
#[test]
fn test_luhn_validation() {
assert!(PIIDetector::validate_luhn("4532015112830366"));
assert!(!PIIDetector::validate_luhn("1234567890123456"));
}
}