use crate::recognizers::regex::RegexRecognizer;
use crate::recognizers::validator::{
iban_check, imei_check, itin_check, luhn_check, routing_check, ssn_check, tax_id_check,
ValidatorRecognizer,
};
use crate::recognizers::Recognizer;
use crate::types::EntityType;
use std::sync::Arc;
pub fn default_recognizers() -> Vec<Box<dyn Recognizer>> {
let mut recognizers: Vec<Box<dyn Recognizer>> = Vec::new();
if let Ok(recognizer) = RegexRecognizer::new(
"regex_email",
EntityType::Email,
r"(?i)[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}",
0.8,
"email",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_url",
EntityType::Url,
r"(?i)\bhttps?://[A-Z0-9.-]+\.[A-Z]{2,}(?:/[^\s]*)?",
0.7,
"url",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_domain",
EntityType::Domain,
r"(?i)\b(?:[A-Z0-9-]+\.)+[A-Z]{2,}\b",
0.5,
"domain",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_hostname",
EntityType::Hostname,
r"(?i)\b[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+\b",
0.5,
"hostname",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_phone",
EntityType::Phone,
r"\+?[0-9][0-9\s\-()]{7,}[0-9]",
0.6,
"phone",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_ip",
EntityType::IpAddress,
r"\b(?:\d{1,3}\.){3}\d{1,3}\b",
0.7,
"ipv4",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_ipv6",
EntityType::Ipv6,
r"\b(?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}\b",
0.7,
"ipv6",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_credit_card",
EntityType::CreditCard,
r"(?:\d[ -]*?){13,19}",
0.9,
"luhn",
Arc::new(luhn_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_iban",
EntityType::Iban,
r"\b[A-Z]{2}\d{2}[A-Z0-9]{11,30}\b",
0.8,
"iban",
Arc::new(iban_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_ssn",
EntityType::Ssn,
r"\b\d{3}[- ]?\d{2}[- ]?\d{4}\b",
0.8,
"ssn",
Arc::new(ssn_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_itin",
EntityType::Itin,
r"\b9\d{2}[- ]?\d{2}[- ]?\d{4}\b",
0.8,
"itin",
Arc::new(itin_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_tax_id",
EntityType::TaxId,
r"\b\d{2}-\d{7}\b",
0.75,
"tax_id",
Arc::new(tax_id_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_routing_number",
EntityType::RoutingNumber,
r"\b\d{9}\b",
0.8,
"routing_number",
Arc::new(routing_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = ValidatorRecognizer::new(
"validator_imei",
EntityType::Imei,
r"\b\d{15}\b",
0.75,
"imei",
Arc::new(imei_check),
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_passport",
EntityType::Passport,
r"(?i)\b(?:passport|pass)\s*(?:no\.?|number)?\s*[:#-]?\s*[A-Z0-9]{6,9}\b",
0.6,
"passport",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_driver_license",
EntityType::DriverLicense,
r"(?i)\b(?:driver(?:'s)?\s*license|dl)\s*[:#-]?\s*[A-Z0-9]{4,12}\b",
0.6,
"driver_license",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_bank_account",
EntityType::BankAccount,
r"(?i)\b(?:account|acct)\s*(?:number|no\.?)?\s*[:#-]?\s*\d{6,17}\b",
0.5,
"bank_account",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_crypto_address",
EntityType::CryptoAddress,
r"(?i)\b(?:0x[a-f0-9]{40}|bc1[0-9a-z]{25,71}|[13][a-km-zA-HJ-NP-Z1-9]{25,34})\b",
0.7,
"crypto_address",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_mac_address",
EntityType::MacAddress,
r"\b(?:[0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}\b",
0.7,
"mac_address",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_uuid",
EntityType::Uuid,
r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}\b",
0.7,
"uuid",
) {
recognizers.push(Box::new(recognizer));
}
if let Ok(recognizer) = RegexRecognizer::new(
"regex_vin",
EntityType::Vin,
r"\b[A-HJ-NPR-Z0-9]{17}\b",
0.6,
"vin",
) {
recognizers.push(Box::new(recognizer));
}
recognizers
}