use crate::ScannerBuilder;
use crate::{EntityType, Locale, PiiEntity};
pub trait Recognizer: Send + Sync {
fn id(&self) -> &str;
fn entity_type(&self) -> EntityType;
fn supported_locales(&self) -> &[Locale];
fn scan(&self, text: &str) -> Vec<PiiEntity>;
fn validate(&self, _candidate: &str) -> bool {
true
}
}
#[derive(Default)]
pub struct RecognizerRegistry {
recognizers: Vec<Box<dyn Recognizer>>,
}
impl RecognizerRegistry {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn register<R>(&mut self, recognizer: R)
where
R: Recognizer + 'static,
{
self.recognizers.push(Box::new(recognizer));
}
pub fn register_boxed(&mut self, recognizer: Box<dyn Recognizer>) {
self.recognizers.push(recognizer);
}
#[must_use]
pub fn len(&self) -> usize {
self.recognizers.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.recognizers.is_empty()
}
#[must_use]
pub fn scan_all(&self, text: &str) -> Vec<PiiEntity> {
self.recognizers
.iter()
.flat_map(|recognizer| recognizer.scan(text))
.collect()
}
#[must_use]
pub fn scan_locale(&self, text: &str, locale: &Locale) -> Vec<PiiEntity> {
self.recognizers
.iter()
.filter(|recognizer| supports_locale(recognizer.supported_locales(), locale))
.flat_map(|recognizer| recognizer.scan(text))
.collect()
}
#[must_use]
pub fn scan_entity_type(&self, text: &str, entity_type: &EntityType) -> Vec<PiiEntity> {
self.recognizers
.iter()
.filter(|recognizer| &recognizer.entity_type() == entity_type)
.flat_map(|recognizer| recognizer.scan(text))
.collect()
}
#[must_use]
pub fn into_scanner_builder(self) -> ScannerBuilder {
ScannerBuilder::from_registry(self)
}
}
fn supports_locale(supported: &[Locale], requested: &Locale) -> bool {
supported.is_empty()
|| supported
.iter()
.any(|locale| locale == requested || locale == &Locale::Universal)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Confidence, Span};
struct TestRecognizer {
locale: Vec<Locale>,
}
impl Recognizer for TestRecognizer {
fn id(&self) -> &str {
"test_v1"
}
fn entity_type(&self) -> EntityType {
EntityType::Email
}
fn supported_locales(&self) -> &[Locale] {
&self.locale
}
fn scan(&self, text: &str) -> Vec<PiiEntity> {
vec![PiiEntity {
entity_type: EntityType::Email,
span: Span::new(0, text.len()),
text: text.to_string(),
confidence: Confidence::new(0.9).unwrap(),
recognizer_id: self.id().to_string(),
}]
}
}
#[test]
fn test_registry_new_is_empty() {
assert!(RecognizerRegistry::new().is_empty());
}
#[test]
fn test_registry_register_increases_len() {
let mut registry = RecognizerRegistry::new();
registry.register(TestRecognizer { locale: vec![] });
assert_eq!(registry.len(), 1);
}
#[test]
fn test_registry_scan_all_returns_findings() {
let mut registry = RecognizerRegistry::new();
registry.register(TestRecognizer { locale: vec![] });
assert_eq!(registry.scan_all("hello").len(), 1);
}
#[test]
fn test_registry_scan_locale_filters_by_requested_locale() {
let mut registry = RecognizerRegistry::new();
registry.register(TestRecognizer {
locale: vec![Locale::US],
});
assert_eq!(registry.scan_locale("hello", &Locale::NL).len(), 0);
}
#[test]
fn test_registry_scan_universal_locale_excludes_specific_locale() {
let mut registry = RecognizerRegistry::new();
registry.register(TestRecognizer {
locale: vec![Locale::US],
});
assert_eq!(registry.scan_locale("hello", &Locale::Universal).len(), 0);
}
}