Skip to main content

redact_core/engine/
mod.rs

1// Copyright (c) 2026 Censgate LLC.
2// Licensed under the Business Source License 1.1 (BUSL-1.1).
3// See the LICENSE file in the project root for license details,
4// including the Additional Use Grant, Change Date, and Change License.
5
6use crate::anonymizers::{AnonymizerConfig, AnonymizerRegistry};
7use crate::recognizers::{pattern::PatternRecognizer, RecognizerRegistry};
8use crate::types::{AnalysisMetadata, AnalysisResult, AnonymizedResult, EntityType};
9use anyhow::Result;
10use std::sync::Arc;
11use std::time::Instant;
12
13/// Main analyzer engine that coordinates recognition and anonymization
14#[derive(Debug, Clone)]
15pub struct AnalyzerEngine {
16    recognizer_registry: RecognizerRegistry,
17    anonymizer_registry: AnonymizerRegistry,
18    default_language: String,
19    model_version: Option<String>,
20}
21
22impl AnalyzerEngine {
23    /// Create a new analyzer engine with default recognizers
24    pub fn new() -> Self {
25        let mut recognizer_registry = RecognizerRegistry::new();
26
27        // Add default pattern recognizer
28        let pattern_recognizer = Arc::new(PatternRecognizer::new());
29        recognizer_registry.add_recognizer(pattern_recognizer);
30
31        Self {
32            recognizer_registry,
33            anonymizer_registry: AnonymizerRegistry::new(),
34            default_language: "en".to_string(),
35            model_version: None,
36        }
37    }
38
39    /// Create a builder for custom configuration
40    pub fn builder() -> AnalyzerEngineBuilder {
41        AnalyzerEngineBuilder::new()
42    }
43
44    /// Set the default language
45    pub fn with_language(mut self, language: impl Into<String>) -> Self {
46        self.default_language = language.into();
47        self
48    }
49
50    /// Set the model version (for NER)
51    pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
52        self.model_version = Some(version.into());
53        self
54    }
55
56    /// Get the recognizer registry
57    pub fn recognizer_registry(&self) -> &RecognizerRegistry {
58        &self.recognizer_registry
59    }
60
61    /// Get mutable access to the recognizer registry
62    pub fn recognizer_registry_mut(&mut self) -> &mut RecognizerRegistry {
63        &mut self.recognizer_registry
64    }
65
66    /// Get the anonymizer registry
67    pub fn anonymizer_registry(&self) -> &AnonymizerRegistry {
68        &self.anonymizer_registry
69    }
70
71    /// Get mutable access to the anonymizer registry
72    pub fn anonymizer_registry_mut(&mut self) -> &mut AnonymizerRegistry {
73        &mut self.anonymizer_registry
74    }
75
76    /// Analyze text and detect PII entities
77    pub fn analyze(&self, text: &str, language: Option<&str>) -> Result<AnalysisResult> {
78        let start = Instant::now();
79        let lang = language.unwrap_or(&self.default_language);
80
81        let detected_entities = self.recognizer_registry.analyze(text, lang)?;
82
83        let processing_time_ms = start.elapsed().as_millis() as u64;
84
85        Ok(AnalysisResult {
86            original_text: None,
87            detected_entities,
88            anonymized: None,
89            metadata: AnalysisMetadata {
90                recognizers_used: self.recognizer_registry.recognizers().len(),
91                processing_time_ms,
92                language: lang.to_string(),
93                model_version: self.model_version.clone(),
94            },
95        })
96    }
97
98    /// Analyze text with specific entity types
99    pub fn analyze_with_entities(
100        &self,
101        text: &str,
102        entity_types: &[EntityType],
103        language: Option<&str>,
104    ) -> Result<AnalysisResult> {
105        let start = Instant::now();
106        let lang = language.unwrap_or(&self.default_language);
107
108        let detected_entities =
109            self.recognizer_registry
110                .analyze_with_entities(text, lang, entity_types)?;
111
112        let processing_time_ms = start.elapsed().as_millis() as u64;
113
114        Ok(AnalysisResult {
115            original_text: None,
116            detected_entities,
117            anonymized: None,
118            metadata: AnalysisMetadata {
119                recognizers_used: self.recognizer_registry.recognizers().len(),
120                processing_time_ms,
121                language: lang.to_string(),
122                model_version: self.model_version.clone(),
123            },
124        })
125    }
126
127    /// Anonymize text based on detected entities
128    pub fn anonymize(
129        &self,
130        text: &str,
131        language: Option<&str>,
132        config: &AnonymizerConfig,
133    ) -> Result<AnonymizedResult> {
134        let lang = language.unwrap_or(&self.default_language);
135
136        // First, analyze to detect entities
137        let analysis = self.analyze(text, Some(lang))?;
138
139        // Then anonymize
140        self.anonymizer_registry
141            .anonymize(text, analysis.detected_entities, config)
142    }
143
144    /// Analyze and anonymize in one call
145    pub fn analyze_and_anonymize(
146        &self,
147        text: &str,
148        language: Option<&str>,
149        config: &AnonymizerConfig,
150    ) -> Result<AnalysisResult> {
151        let start = Instant::now();
152        let lang = language.unwrap_or(&self.default_language);
153
154        // Analyze
155        let mut result = self.analyze(text, Some(lang))?;
156
157        // Anonymize
158        let anonymized =
159            self.anonymizer_registry
160                .anonymize(text, result.detected_entities.clone(), config)?;
161
162        result.anonymized = Some(anonymized);
163        result.metadata.processing_time_ms = start.elapsed().as_millis() as u64;
164
165        Ok(result)
166    }
167}
168
169impl Default for AnalyzerEngine {
170    fn default() -> Self {
171        Self::new()
172    }
173}
174
175/// Builder for AnalyzerEngine
176pub struct AnalyzerEngineBuilder {
177    recognizer_registry: RecognizerRegistry,
178    anonymizer_registry: AnonymizerRegistry,
179    default_language: String,
180    model_version: Option<String>,
181}
182
183impl AnalyzerEngineBuilder {
184    pub fn new() -> Self {
185        Self {
186            recognizer_registry: RecognizerRegistry::new(),
187            anonymizer_registry: AnonymizerRegistry::new(),
188            default_language: "en".to_string(),
189            model_version: None,
190        }
191    }
192
193    pub fn with_recognizer_registry(mut self, registry: RecognizerRegistry) -> Self {
194        self.recognizer_registry = registry;
195        self
196    }
197
198    pub fn with_anonymizer_registry(mut self, registry: AnonymizerRegistry) -> Self {
199        self.anonymizer_registry = registry;
200        self
201    }
202
203    pub fn with_language(mut self, language: impl Into<String>) -> Self {
204        self.default_language = language.into();
205        self
206    }
207
208    pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
209        self.model_version = Some(version.into());
210        self
211    }
212
213    pub fn build(self) -> AnalyzerEngine {
214        AnalyzerEngine {
215            recognizer_registry: self.recognizer_registry,
216            anonymizer_registry: self.anonymizer_registry,
217            default_language: self.default_language,
218            model_version: self.model_version,
219        }
220    }
221}
222
223impl Default for AnalyzerEngineBuilder {
224    fn default() -> Self {
225        Self::new()
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use crate::anonymizers::AnonymizationStrategy;
233
234    #[test]
235    fn test_analyzer_engine_new() {
236        let engine = AnalyzerEngine::new();
237        assert_eq!(engine.default_language, "en");
238        assert!(!engine.recognizer_registry.recognizers().is_empty());
239    }
240
241    #[test]
242    fn test_analyze() {
243        let engine = AnalyzerEngine::new();
244        let text = "Email: john@example.com, Phone: (555) 123-4567";
245
246        let result = engine.analyze(text, None).unwrap();
247
248        assert!(result.detected_entities.len() >= 2);
249        assert_eq!(result.metadata.language, "en");
250        assert!(result.metadata.processing_time_ms > 0);
251    }
252
253    #[test]
254    fn test_analyze_with_entities() {
255        let engine = AnalyzerEngine::new();
256        let text = "Email: john@example.com, Phone: (555) 123-4567";
257
258        let result = engine
259            .analyze_with_entities(text, &[EntityType::EmailAddress], None)
260            .unwrap();
261
262        assert!(result
263            .detected_entities
264            .iter()
265            .all(|e| e.entity_type == EntityType::EmailAddress));
266    }
267
268    #[test]
269    fn test_anonymize() {
270        let engine = AnalyzerEngine::new();
271        let text = "Email: john@example.com";
272        let config = AnonymizerConfig {
273            strategy: AnonymizationStrategy::Replace,
274            ..Default::default()
275        };
276
277        let result = engine.anonymize(text, None, &config).unwrap();
278
279        assert!(result.text.contains("[EMAIL_ADDRESS]"));
280    }
281
282    #[test]
283    fn test_analyze_and_anonymize() {
284        let engine = AnalyzerEngine::new();
285        let text = "Email: john@example.com, SSN: 123-45-6789";
286        let config = AnonymizerConfig {
287            strategy: AnonymizationStrategy::Replace,
288            ..Default::default()
289        };
290
291        let result = engine.analyze_and_anonymize(text, None, &config).unwrap();
292
293        assert!(result.detected_entities.len() >= 2);
294        assert!(result.anonymized.is_some());
295
296        let anonymized = result.anonymized.unwrap();
297        assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
298        assert!(anonymized.text.contains("[US_SSN]"));
299    }
300
301    #[test]
302    fn test_builder() {
303        let engine = AnalyzerEngine::builder()
304            .with_language("es")
305            .with_model_version("v1.0.0")
306            .build();
307
308        assert_eq!(engine.default_language, "es");
309        assert_eq!(engine.model_version, Some("v1.0.0".to_string()));
310    }
311}