use crate::anonymizers::{AnonymizerConfig, AnonymizerRegistry};
use crate::recognizers::{pattern::PatternRecognizer, RecognizerRegistry};
use crate::types::{AnalysisMetadata, AnalysisResult, AnonymizedResult, EntityType};
use anyhow::Result;
use std::sync::Arc;
use std::time::Instant;
#[derive(Debug, Clone)]
pub struct AnalyzerEngine {
recognizer_registry: RecognizerRegistry,
anonymizer_registry: AnonymizerRegistry,
default_language: String,
model_version: Option<String>,
}
impl AnalyzerEngine {
pub fn new() -> Self {
let mut recognizer_registry = RecognizerRegistry::new();
let pattern_recognizer = Arc::new(PatternRecognizer::new());
recognizer_registry.add_recognizer(pattern_recognizer);
Self {
recognizer_registry,
anonymizer_registry: AnonymizerRegistry::new(),
default_language: "en".to_string(),
model_version: None,
}
}
pub fn builder() -> AnalyzerEngineBuilder {
AnalyzerEngineBuilder::new()
}
pub fn with_language(mut self, language: impl Into<String>) -> Self {
self.default_language = language.into();
self
}
pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
self.model_version = Some(version.into());
self
}
pub fn recognizer_registry(&self) -> &RecognizerRegistry {
&self.recognizer_registry
}
pub fn recognizer_registry_mut(&mut self) -> &mut RecognizerRegistry {
&mut self.recognizer_registry
}
pub fn anonymizer_registry(&self) -> &AnonymizerRegistry {
&self.anonymizer_registry
}
pub fn anonymizer_registry_mut(&mut self) -> &mut AnonymizerRegistry {
&mut self.anonymizer_registry
}
pub fn analyze(&self, text: &str, language: Option<&str>) -> Result<AnalysisResult> {
let start = Instant::now();
let lang = language.unwrap_or(&self.default_language);
let detected_entities = self.recognizer_registry.analyze(text, lang)?;
let processing_time_ms = start.elapsed().as_millis() as u64;
Ok(AnalysisResult {
original_text: None,
detected_entities,
anonymized: None,
metadata: AnalysisMetadata {
recognizers_used: self.recognizer_registry.recognizers().len(),
processing_time_ms,
language: lang.to_string(),
model_version: self.model_version.clone(),
},
})
}
pub fn analyze_with_entities(
&self,
text: &str,
entity_types: &[EntityType],
language: Option<&str>,
) -> Result<AnalysisResult> {
let start = Instant::now();
let lang = language.unwrap_or(&self.default_language);
let detected_entities =
self.recognizer_registry
.analyze_with_entities(text, lang, entity_types)?;
let processing_time_ms = start.elapsed().as_millis() as u64;
Ok(AnalysisResult {
original_text: None,
detected_entities,
anonymized: None,
metadata: AnalysisMetadata {
recognizers_used: self.recognizer_registry.recognizers().len(),
processing_time_ms,
language: lang.to_string(),
model_version: self.model_version.clone(),
},
})
}
pub fn anonymize(
&self,
text: &str,
language: Option<&str>,
config: &AnonymizerConfig,
) -> Result<AnonymizedResult> {
let lang = language.unwrap_or(&self.default_language);
let analysis = self.analyze(text, Some(lang))?;
self.anonymizer_registry
.anonymize(text, analysis.detected_entities, config)
}
pub fn analyze_and_anonymize(
&self,
text: &str,
language: Option<&str>,
config: &AnonymizerConfig,
) -> Result<AnalysisResult> {
let start = Instant::now();
let lang = language.unwrap_or(&self.default_language);
let mut result = self.analyze(text, Some(lang))?;
let anonymized =
self.anonymizer_registry
.anonymize(text, result.detected_entities.clone(), config)?;
result.anonymized = Some(anonymized);
result.metadata.processing_time_ms = start.elapsed().as_millis() as u64;
Ok(result)
}
}
impl Default for AnalyzerEngine {
fn default() -> Self {
Self::new()
}
}
pub struct AnalyzerEngineBuilder {
recognizer_registry: RecognizerRegistry,
anonymizer_registry: AnonymizerRegistry,
default_language: String,
model_version: Option<String>,
}
impl AnalyzerEngineBuilder {
pub fn new() -> Self {
Self {
recognizer_registry: RecognizerRegistry::new(),
anonymizer_registry: AnonymizerRegistry::new(),
default_language: "en".to_string(),
model_version: None,
}
}
pub fn with_recognizer_registry(mut self, registry: RecognizerRegistry) -> Self {
self.recognizer_registry = registry;
self
}
pub fn with_anonymizer_registry(mut self, registry: AnonymizerRegistry) -> Self {
self.anonymizer_registry = registry;
self
}
pub fn with_language(mut self, language: impl Into<String>) -> Self {
self.default_language = language.into();
self
}
pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
self.model_version = Some(version.into());
self
}
pub fn build(self) -> AnalyzerEngine {
AnalyzerEngine {
recognizer_registry: self.recognizer_registry,
anonymizer_registry: self.anonymizer_registry,
default_language: self.default_language,
model_version: self.model_version,
}
}
}
impl Default for AnalyzerEngineBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::anonymizers::AnonymizationStrategy;
#[test]
fn test_analyzer_engine_new() {
let engine = AnalyzerEngine::new();
assert_eq!(engine.default_language, "en");
assert!(!engine.recognizer_registry.recognizers().is_empty());
}
#[test]
fn test_analyze() {
let engine = AnalyzerEngine::new();
let text = "Email: john@example.com, Phone: (555) 123-4567";
let result = engine.analyze(text, None).unwrap();
assert!(result.detected_entities.len() >= 2);
assert_eq!(result.metadata.language, "en");
assert!(result.metadata.processing_time_ms > 0);
}
#[test]
fn test_analyze_with_entities() {
let engine = AnalyzerEngine::new();
let text = "Email: john@example.com, Phone: (555) 123-4567";
let result = engine
.analyze_with_entities(text, &[EntityType::EmailAddress], None)
.unwrap();
assert!(result
.detected_entities
.iter()
.all(|e| e.entity_type == EntityType::EmailAddress));
}
#[test]
fn test_anonymize() {
let engine = AnalyzerEngine::new();
let text = "Email: john@example.com";
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let result = engine.anonymize(text, None, &config).unwrap();
assert!(result.text.contains("[EMAIL_ADDRESS]"));
}
#[test]
fn test_analyze_and_anonymize() {
let engine = AnalyzerEngine::new();
let text = "Email: john@example.com, SSN: 123-45-6789";
let config = AnonymizerConfig {
strategy: AnonymizationStrategy::Replace,
..Default::default()
};
let result = engine.analyze_and_anonymize(text, None, &config).unwrap();
assert!(result.detected_entities.len() >= 2);
assert!(result.anonymized.is_some());
let anonymized = result.anonymized.unwrap();
assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
assert!(anonymized.text.contains("[US_SSN]"));
}
#[test]
fn test_builder() {
let engine = AnalyzerEngine::builder()
.with_language("es")
.with_model_version("v1.0.0")
.build();
assert_eq!(engine.default_language, "es");
assert_eq!(engine.model_version, Some("v1.0.0".to_string()));
}
}