1use crate::anonymizers::{AnonymizerConfig, AnonymizerRegistry};
7use crate::recognizers::{pattern::PatternRecognizer, RecognizerRegistry};
8use crate::types::{AnalysisMetadata, AnalysisResult, AnonymizedResult, EntityType};
9use anyhow::Result;
10use std::sync::Arc;
11use std::time::Instant;
12
13#[derive(Debug, Clone)]
15pub struct AnalyzerEngine {
16 recognizer_registry: RecognizerRegistry,
17 anonymizer_registry: AnonymizerRegistry,
18 default_language: String,
19 model_version: Option<String>,
20}
21
22impl AnalyzerEngine {
23 pub fn new() -> Self {
25 let mut recognizer_registry = RecognizerRegistry::new();
26
27 let pattern_recognizer = Arc::new(PatternRecognizer::new());
29 recognizer_registry.add_recognizer(pattern_recognizer);
30
31 Self {
32 recognizer_registry,
33 anonymizer_registry: AnonymizerRegistry::new(),
34 default_language: "en".to_string(),
35 model_version: None,
36 }
37 }
38
39 pub fn builder() -> AnalyzerEngineBuilder {
41 AnalyzerEngineBuilder::new()
42 }
43
44 pub fn with_language(mut self, language: impl Into<String>) -> Self {
46 self.default_language = language.into();
47 self
48 }
49
50 pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
52 self.model_version = Some(version.into());
53 self
54 }
55
56 pub fn recognizer_registry(&self) -> &RecognizerRegistry {
58 &self.recognizer_registry
59 }
60
61 pub fn recognizer_registry_mut(&mut self) -> &mut RecognizerRegistry {
63 &mut self.recognizer_registry
64 }
65
66 pub fn anonymizer_registry(&self) -> &AnonymizerRegistry {
68 &self.anonymizer_registry
69 }
70
71 pub fn anonymizer_registry_mut(&mut self) -> &mut AnonymizerRegistry {
73 &mut self.anonymizer_registry
74 }
75
76 pub fn analyze(&self, text: &str, language: Option<&str>) -> Result<AnalysisResult> {
78 let start = Instant::now();
79 let lang = language.unwrap_or(&self.default_language);
80
81 let detected_entities = self.recognizer_registry.analyze(text, lang)?;
82
83 let processing_time_ms = start.elapsed().as_millis() as u64;
84
85 Ok(AnalysisResult {
86 original_text: None,
87 detected_entities,
88 anonymized: None,
89 metadata: AnalysisMetadata {
90 recognizers_used: self.recognizer_registry.recognizers().len(),
91 processing_time_ms,
92 language: lang.to_string(),
93 model_version: self.model_version.clone(),
94 },
95 })
96 }
97
98 pub fn analyze_with_entities(
100 &self,
101 text: &str,
102 entity_types: &[EntityType],
103 language: Option<&str>,
104 ) -> Result<AnalysisResult> {
105 let start = Instant::now();
106 let lang = language.unwrap_or(&self.default_language);
107
108 let detected_entities =
109 self.recognizer_registry
110 .analyze_with_entities(text, lang, entity_types)?;
111
112 let processing_time_ms = start.elapsed().as_millis() as u64;
113
114 Ok(AnalysisResult {
115 original_text: None,
116 detected_entities,
117 anonymized: None,
118 metadata: AnalysisMetadata {
119 recognizers_used: self.recognizer_registry.recognizers().len(),
120 processing_time_ms,
121 language: lang.to_string(),
122 model_version: self.model_version.clone(),
123 },
124 })
125 }
126
127 pub fn anonymize(
129 &self,
130 text: &str,
131 language: Option<&str>,
132 config: &AnonymizerConfig,
133 ) -> Result<AnonymizedResult> {
134 let lang = language.unwrap_or(&self.default_language);
135
136 let analysis = self.analyze(text, Some(lang))?;
138
139 self.anonymizer_registry
141 .anonymize(text, analysis.detected_entities, config)
142 }
143
144 pub fn analyze_and_anonymize(
146 &self,
147 text: &str,
148 language: Option<&str>,
149 config: &AnonymizerConfig,
150 ) -> Result<AnalysisResult> {
151 let start = Instant::now();
152 let lang = language.unwrap_or(&self.default_language);
153
154 let mut result = self.analyze(text, Some(lang))?;
156
157 let anonymized =
159 self.anonymizer_registry
160 .anonymize(text, result.detected_entities.clone(), config)?;
161
162 result.anonymized = Some(anonymized);
163 result.metadata.processing_time_ms = start.elapsed().as_millis() as u64;
164
165 Ok(result)
166 }
167}
168
169impl Default for AnalyzerEngine {
170 fn default() -> Self {
171 Self::new()
172 }
173}
174
175pub struct AnalyzerEngineBuilder {
177 recognizer_registry: RecognizerRegistry,
178 anonymizer_registry: AnonymizerRegistry,
179 default_language: String,
180 model_version: Option<String>,
181}
182
183impl AnalyzerEngineBuilder {
184 pub fn new() -> Self {
185 Self {
186 recognizer_registry: RecognizerRegistry::new(),
187 anonymizer_registry: AnonymizerRegistry::new(),
188 default_language: "en".to_string(),
189 model_version: None,
190 }
191 }
192
193 pub fn with_recognizer_registry(mut self, registry: RecognizerRegistry) -> Self {
194 self.recognizer_registry = registry;
195 self
196 }
197
198 pub fn with_anonymizer_registry(mut self, registry: AnonymizerRegistry) -> Self {
199 self.anonymizer_registry = registry;
200 self
201 }
202
203 pub fn with_language(mut self, language: impl Into<String>) -> Self {
204 self.default_language = language.into();
205 self
206 }
207
208 pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
209 self.model_version = Some(version.into());
210 self
211 }
212
213 pub fn build(self) -> AnalyzerEngine {
214 AnalyzerEngine {
215 recognizer_registry: self.recognizer_registry,
216 anonymizer_registry: self.anonymizer_registry,
217 default_language: self.default_language,
218 model_version: self.model_version,
219 }
220 }
221}
222
223impl Default for AnalyzerEngineBuilder {
224 fn default() -> Self {
225 Self::new()
226 }
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232 use crate::anonymizers::AnonymizationStrategy;
233
234 #[test]
235 fn test_analyzer_engine_new() {
236 let engine = AnalyzerEngine::new();
237 assert_eq!(engine.default_language, "en");
238 assert!(!engine.recognizer_registry.recognizers().is_empty());
239 }
240
241 #[test]
242 fn test_analyze() {
243 let engine = AnalyzerEngine::new();
244 let text = "Email: john@example.com, Phone: (555) 123-4567";
245
246 let result = engine.analyze(text, None).unwrap();
247
248 assert!(result.detected_entities.len() >= 2);
249 assert_eq!(result.metadata.language, "en");
250 assert!(result.metadata.processing_time_ms > 0);
251 }
252
253 #[test]
254 fn test_analyze_with_entities() {
255 let engine = AnalyzerEngine::new();
256 let text = "Email: john@example.com, Phone: (555) 123-4567";
257
258 let result = engine
259 .analyze_with_entities(text, &[EntityType::EmailAddress], None)
260 .unwrap();
261
262 assert!(result
263 .detected_entities
264 .iter()
265 .all(|e| e.entity_type == EntityType::EmailAddress));
266 }
267
268 #[test]
269 fn test_anonymize() {
270 let engine = AnalyzerEngine::new();
271 let text = "Email: john@example.com";
272 let config = AnonymizerConfig {
273 strategy: AnonymizationStrategy::Replace,
274 ..Default::default()
275 };
276
277 let result = engine.anonymize(text, None, &config).unwrap();
278
279 assert!(result.text.contains("[EMAIL_ADDRESS]"));
280 }
281
282 #[test]
283 fn test_analyze_and_anonymize() {
284 let engine = AnalyzerEngine::new();
285 let text = "Email: john@example.com, SSN: 123-45-6789";
286 let config = AnonymizerConfig {
287 strategy: AnonymizationStrategy::Replace,
288 ..Default::default()
289 };
290
291 let result = engine.analyze_and_anonymize(text, None, &config).unwrap();
292
293 assert!(result.detected_entities.len() >= 2);
294 assert!(result.anonymized.is_some());
295
296 let anonymized = result.anonymized.unwrap();
297 assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
298 assert!(anonymized.text.contains("[US_SSN]"));
299 }
300
301 #[test]
302 fn test_builder() {
303 let engine = AnalyzerEngine::builder()
304 .with_language("es")
305 .with_model_version("v1.0.0")
306 .build();
307
308 assert_eq!(engine.default_language, "es");
309 assert_eq!(engine.model_version, Some("v1.0.0".to_string()));
310 }
311}