use crate::error::PrivacyError;
use regex::Regex;
use serde::Serialize;
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
#[non_exhaustive]
pub enum PiiClassification {
Email,
PhoneNumber,
IpAddress,
DeviceIdentifier,
Custom(String),
None,
}
struct CustomPattern {
name: String,
regex: Regex,
}
pub struct PiiClassifier {
email_re: Regex,
phone_re: Regex,
ipv4_re: Regex,
imei_re: Regex,
custom_patterns: Vec<CustomPattern>,
}
impl PiiClassifier {
#[must_use]
pub fn new() -> Self {
Self {
email_re: Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap(),
phone_re: Regex::new(r"\+\d[\d\s\-]{6,}\d").unwrap(),
ipv4_re: Regex::new(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b").unwrap(),
imei_re: Regex::new(r"\b\d{15}\b").unwrap(),
custom_patterns: Vec::new(),
}
}
pub fn add_custom_pattern(&mut self, name: &str, pattern: &str) -> Result<(), PrivacyError> {
let regex = Regex::new(pattern).map_err(|e| PrivacyError::InvalidPattern(e.to_string()))?;
self.custom_patterns.push(CustomPattern {
name: name.to_string(),
regex,
});
Ok(())
}
#[must_use]
pub fn classify(&self, input: &str) -> PiiClassification {
if self.email_re.is_match(input) {
return PiiClassification::Email;
}
if self.imei_re.is_match(input) {
return PiiClassification::DeviceIdentifier;
}
if self.phone_re.is_match(input) {
return PiiClassification::PhoneNumber;
}
if self.ipv4_re.is_match(input) {
return PiiClassification::IpAddress;
}
for custom in &self.custom_patterns {
if custom.regex.is_match(input) {
return PiiClassification::Custom(custom.name.clone());
}
}
PiiClassification::None
}
}
impl Default for PiiClassifier {
fn default() -> Self {
Self::new()
}
}