use crate::recognizers::Recognizer;
use crate::types::{Detection, DetectionExplanation, EntityType, NlpArtifacts};
use regex::Regex;
#[derive(Clone, Debug)]
pub struct RegexRecognizer {
name: String,
entity: EntityType,
regex: Regex,
score: f32,
pattern_name: String,
}
impl RegexRecognizer {
pub fn new(
name: impl Into<String>,
entity: EntityType,
pattern: &str,
score: f32,
pattern_name: impl Into<String>,
) -> Result<Self, regex::Error> {
Ok(Self {
name: name.into(),
entity,
regex: Regex::new(pattern)?,
score,
pattern_name: pattern_name.into(),
})
}
}
impl Recognizer for RegexRecognizer {
fn name(&self) -> &str {
&self.name
}
fn supported_entities(&self) -> &[EntityType] {
std::slice::from_ref(&self.entity)
}
fn analyze(&self, text: &str, _artifacts: &NlpArtifacts) -> Vec<Detection> {
self.regex
.find_iter(text)
.map(|m| Detection {
entity_type: self.entity.clone(),
start: m.start(),
end: m.end(),
score: self.score,
recognizer: self.name.clone(),
explanation: DetectionExplanation::Regex {
pattern_name: self.pattern_name.clone(),
},
})
.collect()
}
}