1use crate::anonymizers::{AnonymizerConfig, AnonymizerRegistry};
6use crate::recognizers::{pattern::PatternRecognizer, RecognizerRegistry};
7use crate::types::{AnalysisMetadata, AnalysisResult, AnonymizedResult, EntityType};
8use anyhow::Result;
9use std::sync::Arc;
10use std::time::Instant;
11
12#[derive(Debug, Clone)]
14pub struct AnalyzerEngine {
15 recognizer_registry: RecognizerRegistry,
16 anonymizer_registry: AnonymizerRegistry,
17 default_language: String,
18 model_version: Option<String>,
19}
20
21impl AnalyzerEngine {
22 pub fn new() -> Self {
24 let mut recognizer_registry = RecognizerRegistry::new();
25
26 let pattern_recognizer = Arc::new(PatternRecognizer::new());
28 recognizer_registry.add_recognizer(pattern_recognizer);
29
30 Self {
31 recognizer_registry,
32 anonymizer_registry: AnonymizerRegistry::new(),
33 default_language: "en".to_string(),
34 model_version: None,
35 }
36 }
37
38 pub fn builder() -> AnalyzerEngineBuilder {
40 AnalyzerEngineBuilder::new()
41 }
42
43 pub fn with_language(mut self, language: impl Into<String>) -> Self {
45 self.default_language = language.into();
46 self
47 }
48
49 pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
51 self.model_version = Some(version.into());
52 self
53 }
54
55 pub fn recognizer_registry(&self) -> &RecognizerRegistry {
57 &self.recognizer_registry
58 }
59
60 pub fn recognizer_registry_mut(&mut self) -> &mut RecognizerRegistry {
62 &mut self.recognizer_registry
63 }
64
65 pub fn anonymizer_registry(&self) -> &AnonymizerRegistry {
67 &self.anonymizer_registry
68 }
69
70 pub fn anonymizer_registry_mut(&mut self) -> &mut AnonymizerRegistry {
72 &mut self.anonymizer_registry
73 }
74
75 pub fn analyze(&self, text: &str, language: Option<&str>) -> Result<AnalysisResult> {
77 let start = Instant::now();
78 let lang = language.unwrap_or(&self.default_language);
79
80 let detected_entities = self.recognizer_registry.analyze(text, lang)?;
81
82 let processing_time_ms = start.elapsed().as_millis() as u64;
83
84 Ok(AnalysisResult {
85 original_text: None,
86 detected_entities,
87 anonymized: None,
88 metadata: AnalysisMetadata {
89 recognizers_used: self.recognizer_registry.recognizers().len(),
90 processing_time_ms,
91 language: lang.to_string(),
92 model_version: self.model_version.clone(),
93 },
94 })
95 }
96
97 pub fn analyze_with_entities(
99 &self,
100 text: &str,
101 entity_types: &[EntityType],
102 language: Option<&str>,
103 ) -> Result<AnalysisResult> {
104 let start = Instant::now();
105 let lang = language.unwrap_or(&self.default_language);
106
107 let detected_entities =
108 self.recognizer_registry
109 .analyze_with_entities(text, lang, entity_types)?;
110
111 let processing_time_ms = start.elapsed().as_millis() as u64;
112
113 Ok(AnalysisResult {
114 original_text: None,
115 detected_entities,
116 anonymized: None,
117 metadata: AnalysisMetadata {
118 recognizers_used: self.recognizer_registry.recognizers().len(),
119 processing_time_ms,
120 language: lang.to_string(),
121 model_version: self.model_version.clone(),
122 },
123 })
124 }
125
126 pub fn anonymize(
128 &self,
129 text: &str,
130 language: Option<&str>,
131 config: &AnonymizerConfig,
132 ) -> Result<AnonymizedResult> {
133 let lang = language.unwrap_or(&self.default_language);
134
135 let analysis = self.analyze(text, Some(lang))?;
137
138 self.anonymizer_registry
140 .anonymize(text, analysis.detected_entities, config)
141 }
142
143 pub fn analyze_and_anonymize(
145 &self,
146 text: &str,
147 language: Option<&str>,
148 config: &AnonymizerConfig,
149 ) -> Result<AnalysisResult> {
150 let start = Instant::now();
151 let lang = language.unwrap_or(&self.default_language);
152
153 let mut result = self.analyze(text, Some(lang))?;
155
156 let anonymized =
158 self.anonymizer_registry
159 .anonymize(text, result.detected_entities.clone(), config)?;
160
161 result.anonymized = Some(anonymized);
162 result.metadata.processing_time_ms = start.elapsed().as_millis() as u64;
163
164 Ok(result)
165 }
166}
167
168impl Default for AnalyzerEngine {
169 fn default() -> Self {
170 Self::new()
171 }
172}
173
174pub struct AnalyzerEngineBuilder {
176 recognizer_registry: RecognizerRegistry,
177 anonymizer_registry: AnonymizerRegistry,
178 default_language: String,
179 model_version: Option<String>,
180}
181
182impl AnalyzerEngineBuilder {
183 pub fn new() -> Self {
184 Self {
185 recognizer_registry: RecognizerRegistry::new(),
186 anonymizer_registry: AnonymizerRegistry::new(),
187 default_language: "en".to_string(),
188 model_version: None,
189 }
190 }
191
192 pub fn with_recognizer_registry(mut self, registry: RecognizerRegistry) -> Self {
193 self.recognizer_registry = registry;
194 self
195 }
196
197 pub fn with_anonymizer_registry(mut self, registry: AnonymizerRegistry) -> Self {
198 self.anonymizer_registry = registry;
199 self
200 }
201
202 pub fn with_language(mut self, language: impl Into<String>) -> Self {
203 self.default_language = language.into();
204 self
205 }
206
207 pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
208 self.model_version = Some(version.into());
209 self
210 }
211
212 pub fn build(self) -> AnalyzerEngine {
213 AnalyzerEngine {
214 recognizer_registry: self.recognizer_registry,
215 anonymizer_registry: self.anonymizer_registry,
216 default_language: self.default_language,
217 model_version: self.model_version,
218 }
219 }
220}
221
222impl Default for AnalyzerEngineBuilder {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231 use crate::anonymizers::AnonymizationStrategy;
232
233 #[test]
234 fn test_analyzer_engine_new() {
235 let engine = AnalyzerEngine::new();
236 assert_eq!(engine.default_language, "en");
237 assert!(!engine.recognizer_registry.recognizers().is_empty());
238 }
239
240 #[test]
241 fn test_analyze() {
242 let engine = AnalyzerEngine::new();
243 let text = "Email: john@example.com, Phone: (555) 123-4567";
244
245 let result = engine.analyze(text, None).unwrap();
246
247 assert!(result.detected_entities.len() >= 2);
248 assert_eq!(result.metadata.language, "en");
249 assert!(result.metadata.processing_time_ms > 0);
250 }
251
252 #[test]
253 fn test_analyze_with_entities() {
254 let engine = AnalyzerEngine::new();
255 let text = "Email: john@example.com, Phone: (555) 123-4567";
256
257 let result = engine
258 .analyze_with_entities(text, &[EntityType::EmailAddress], None)
259 .unwrap();
260
261 assert!(result
262 .detected_entities
263 .iter()
264 .all(|e| e.entity_type == EntityType::EmailAddress));
265 }
266
267 #[test]
268 fn test_anonymize() {
269 let engine = AnalyzerEngine::new();
270 let text = "Email: john@example.com";
271 let config = AnonymizerConfig {
272 strategy: AnonymizationStrategy::Replace,
273 ..Default::default()
274 };
275
276 let result = engine.anonymize(text, None, &config).unwrap();
277
278 assert!(result.text.contains("[EMAIL_ADDRESS]"));
279 }
280
281 #[test]
282 fn test_analyze_and_anonymize() {
283 let engine = AnalyzerEngine::new();
284 let text = "Email: john@example.com, SSN: 123-45-6789";
285 let config = AnonymizerConfig {
286 strategy: AnonymizationStrategy::Replace,
287 ..Default::default()
288 };
289
290 let result = engine.analyze_and_anonymize(text, None, &config).unwrap();
291
292 assert!(result.detected_entities.len() >= 2);
293 assert!(result.anonymized.is_some());
294
295 let anonymized = result.anonymized.unwrap();
296 assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
297 assert!(anonymized.text.contains("[US_SSN]"));
298 }
299
300 #[test]
301 fn test_builder() {
302 let engine = AnalyzerEngine::builder()
303 .with_language("es")
304 .with_model_version("v1.0.0")
305 .build();
306
307 assert_eq!(engine.default_language, "es");
308 assert_eq!(engine.model_version, Some("v1.0.0".to_string()));
309 }
310}