use crate::recognizers::Recognizer;
use crate::types::{Detection, DetectionExplanation, EntityType, NlpArtifacts};
use regex::Regex;
use std::sync::Arc;
#[derive(Clone)]
pub struct ValidatorRecognizer {
name: String,
entity: EntityType,
regex: Regex,
score: f32,
validator: Arc<dyn Fn(&str) -> bool + Send + Sync>,
validator_name: String,
}
impl ValidatorRecognizer {
pub fn new(
name: impl Into<String>,
entity: EntityType,
pattern: &str,
score: f32,
validator_name: impl Into<String>,
validator: Arc<dyn Fn(&str) -> bool + Send + Sync>,
) -> Result<Self, regex::Error> {
Ok(Self {
name: name.into(),
entity,
regex: Regex::new(pattern)?,
score,
validator,
validator_name: validator_name.into(),
})
}
}
impl Recognizer for ValidatorRecognizer {
fn name(&self) -> &str {
&self.name
}
fn supported_entities(&self) -> &[EntityType] {
std::slice::from_ref(&self.entity)
}
fn analyze(&self, text: &str, _artifacts: &NlpArtifacts) -> Vec<Detection> {
self.regex
.find_iter(text)
.filter_map(|m| {
let value = &text[m.start()..m.end()];
let passed = (self.validator)(value);
if !passed {
return None;
}
Some(Detection {
entity_type: self.entity.clone(),
start: m.start(),
end: m.end(),
score: self.score,
recognizer: self.name.clone(),
explanation: DetectionExplanation::Validator {
validator: self.validator_name.clone(),
passed,
},
})
})
.collect()
}
}
pub fn luhn_check(value: &str) -> bool {
let digits: Vec<u32> = strip_digits(value);
if digits.len() < 12 {
return false;
}
let mut sum = 0;
let mut double = false;
for digit in digits.iter().rev() {
let mut val = *digit;
if double {
val *= 2;
if val > 9 {
val -= 9;
}
}
sum += val;
double = !double;
}
sum % 10 == 0
}
pub fn iban_check(value: &str) -> bool {
let mut cleaned = String::new();
for ch in value.chars() {
if ch.is_ascii_alphanumeric() {
cleaned.push(ch.to_ascii_uppercase());
}
}
if cleaned.len() < 15 || cleaned.len() > 34 {
return false;
}
let mut rearranged = cleaned[4..].to_string();
rearranged.push_str(&cleaned[..4]);
let mut remainder: u128 = 0;
for ch in rearranged.chars() {
let chunk = if ch.is_ascii_digit() {
ch.to_string()
} else {
let val = (ch as u32) - ('A' as u32) + 10;
val.to_string()
};
for digit in chunk.chars() {
let d = digit.to_digit(10).unwrap_or(0) as u128;
remainder = (remainder * 10 + d) % 97;
}
}
remainder == 1
}
pub fn routing_check(value: &str) -> bool {
let digits = strip_digits(value);
if digits.len() != 9 {
return false;
}
let weights = [3u32, 7u32, 1u32, 3u32, 7u32, 1u32, 3u32, 7u32, 1u32];
let sum: u32 = digits
.iter()
.zip(weights.iter())
.map(|(d, w)| d * w)
.sum();
sum % 10 == 0
}
pub fn ssn_check(value: &str) -> bool {
let digits = strip_digits(value);
if digits.len() != 9 {
return false;
}
let area = digits[0] * 100 + digits[1] * 10 + digits[2];
let group = digits[3] * 10 + digits[4];
let serial = digits[5] * 1000 + digits[6] * 100 + digits[7] * 10 + digits[8];
if area == 0 || area == 666 || area >= 900 {
return false;
}
if group == 0 || serial == 0 {
return false;
}
true
}
pub fn itin_check(value: &str) -> bool {
let digits = strip_digits(value);
if digits.len() != 9 {
return false;
}
if digits[0] != 9 {
return false;
}
let middle = digits[3] * 10 + digits[4];
matches!(middle, 70..=88 | 90..=92 | 94..=99)
}
pub fn tax_id_check(value: &str) -> bool {
let digits = strip_digits(value);
if digits.len() != 9 {
return false;
}
let prefix = digits[0] * 10 + digits[1];
prefix != 0
}
pub fn imei_check(value: &str) -> bool {
let digits = strip_digits(value);
if digits.len() != 15 {
return false;
}
luhn_check(value)
}
fn strip_digits(value: &str) -> Vec<u32> {
value
.chars()
.filter(|ch| ch.is_ascii_digit())
.filter_map(|ch| ch.to_digit(10))
.collect()
}