use crate::config::PolicyConfig;
use crate::context::ContextEnhancer;
use crate::decision::resolve;
use crate::error::PiiResult;
use crate::nlp::NlpEngine;
use crate::recognizers::Recognizer;
use crate::types::{AnalyzeResult, Detection, EntityType, Language};
pub struct Analyzer {
nlp: Box<dyn NlpEngine>,
recognizers: Vec<Box<dyn Recognizer>>,
enhancers: Vec<Box<dyn ContextEnhancer>>,
policy: PolicyConfig,
}
impl Analyzer {
pub fn new(
nlp: Box<dyn NlpEngine>,
recognizers: Vec<Box<dyn Recognizer>>,
enhancers: Vec<Box<dyn ContextEnhancer>>,
policy: PolicyConfig,
) -> Self {
Self {
nlp,
recognizers,
enhancers,
policy,
}
}
pub fn analyze(&self, text: &str, language: &Language) -> PiiResult<AnalyzeResult> {
let artifacts = self.nlp.analyze(text, language)?;
let mut candidates = Vec::new();
for recognizer in &self.recognizers {
let mut detected = recognizer.analyze(text, &artifacts);
detected.retain(|det| self.policy.is_enabled(&det.entity_type));
candidates.extend(detected);
}
for enhancer in &self.enhancers {
enhancer.enhance(&mut candidates, text, &artifacts);
}
let resolved = resolve(candidates, &|det: &Detection| {
self.policy.threshold_for(&det.entity_type)
});
Ok(AnalyzeResult {
language: artifacts.language.clone(),
entities: resolved,
capabilities: artifacts.capabilities.clone(),
})
}
}
pub fn default_threshold(entity: &EntityType) -> f32 {
match entity {
EntityType::Email => 0.6,
EntityType::Phone => 0.6,
EntityType::IpAddress => 0.6,
EntityType::Ipv6 => 0.6,
EntityType::CreditCard => 0.7,
EntityType::Iban => 0.7,
EntityType::Ssn => 0.7,
EntityType::Itin => 0.7,
EntityType::TaxId => 0.7,
EntityType::Passport => 0.6,
EntityType::DriverLicense => 0.6,
EntityType::BankAccount => 0.6,
EntityType::RoutingNumber => 0.7,
EntityType::CryptoAddress => 0.6,
EntityType::MacAddress => 0.6,
EntityType::Uuid => 0.6,
EntityType::Vin => 0.6,
EntityType::Imei => 0.7,
EntityType::Url => 0.5,
EntityType::Domain => 0.5,
EntityType::Hostname => 0.5,
EntityType::Person => 0.7,
EntityType::Location => 0.7,
EntityType::Organization => 0.7,
EntityType::Custom(_) => 0.5,
}
}