Skip to main content

redact_core/engine/
mod.rs

1// Copyright 2026 Censgate LLC.
2// Licensed under the Apache License, Version 2.0. See the LICENSE file
3// in the project root for license information.
4
5use crate::anonymizers::{AnonymizerConfig, AnonymizerRegistry};
6use crate::recognizers::{pattern::PatternRecognizer, RecognizerRegistry};
7use crate::types::{AnalysisMetadata, AnalysisResult, AnonymizedResult, EntityType};
8use anyhow::Result;
9use std::sync::Arc;
10use std::time::Instant;
11
12/// Main analyzer engine that coordinates recognition and anonymization
13#[derive(Debug, Clone)]
14pub struct AnalyzerEngine {
15    recognizer_registry: RecognizerRegistry,
16    anonymizer_registry: AnonymizerRegistry,
17    default_language: String,
18    model_version: Option<String>,
19}
20
21impl AnalyzerEngine {
22    /// Create a new analyzer engine with default recognizers
23    pub fn new() -> Self {
24        let mut recognizer_registry = RecognizerRegistry::new();
25
26        // Add default pattern recognizer
27        let pattern_recognizer = Arc::new(PatternRecognizer::new());
28        recognizer_registry.add_recognizer(pattern_recognizer);
29
30        Self {
31            recognizer_registry,
32            anonymizer_registry: AnonymizerRegistry::new(),
33            default_language: "en".to_string(),
34            model_version: None,
35        }
36    }
37
38    /// Create a builder for custom configuration
39    pub fn builder() -> AnalyzerEngineBuilder {
40        AnalyzerEngineBuilder::new()
41    }
42
43    /// Set the default language
44    pub fn with_language(mut self, language: impl Into<String>) -> Self {
45        self.default_language = language.into();
46        self
47    }
48
49    /// Set the model version (for NER)
50    pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
51        self.model_version = Some(version.into());
52        self
53    }
54
55    /// Get the recognizer registry
56    pub fn recognizer_registry(&self) -> &RecognizerRegistry {
57        &self.recognizer_registry
58    }
59
60    /// Get mutable access to the recognizer registry
61    pub fn recognizer_registry_mut(&mut self) -> &mut RecognizerRegistry {
62        &mut self.recognizer_registry
63    }
64
65    /// Get the anonymizer registry
66    pub fn anonymizer_registry(&self) -> &AnonymizerRegistry {
67        &self.anonymizer_registry
68    }
69
70    /// Get mutable access to the anonymizer registry
71    pub fn anonymizer_registry_mut(&mut self) -> &mut AnonymizerRegistry {
72        &mut self.anonymizer_registry
73    }
74
75    /// Analyze text and detect PII entities
76    pub fn analyze(&self, text: &str, language: Option<&str>) -> Result<AnalysisResult> {
77        let start = Instant::now();
78        let lang = language.unwrap_or(&self.default_language);
79
80        let detected_entities = self.recognizer_registry.analyze(text, lang)?;
81
82        let processing_time_ms = start.elapsed().as_millis() as u64;
83
84        Ok(AnalysisResult {
85            original_text: None,
86            detected_entities,
87            anonymized: None,
88            metadata: AnalysisMetadata {
89                recognizers_used: self.recognizer_registry.recognizers().len(),
90                processing_time_ms,
91                language: lang.to_string(),
92                model_version: self.model_version.clone(),
93            },
94        })
95    }
96
97    /// Analyze text with specific entity types
98    pub fn analyze_with_entities(
99        &self,
100        text: &str,
101        entity_types: &[EntityType],
102        language: Option<&str>,
103    ) -> Result<AnalysisResult> {
104        let start = Instant::now();
105        let lang = language.unwrap_or(&self.default_language);
106
107        let detected_entities =
108            self.recognizer_registry
109                .analyze_with_entities(text, lang, entity_types)?;
110
111        let processing_time_ms = start.elapsed().as_millis() as u64;
112
113        Ok(AnalysisResult {
114            original_text: None,
115            detected_entities,
116            anonymized: None,
117            metadata: AnalysisMetadata {
118                recognizers_used: self.recognizer_registry.recognizers().len(),
119                processing_time_ms,
120                language: lang.to_string(),
121                model_version: self.model_version.clone(),
122            },
123        })
124    }
125
126    /// Anonymize text based on detected entities
127    pub fn anonymize(
128        &self,
129        text: &str,
130        language: Option<&str>,
131        config: &AnonymizerConfig,
132    ) -> Result<AnonymizedResult> {
133        let lang = language.unwrap_or(&self.default_language);
134
135        // First, analyze to detect entities
136        let analysis = self.analyze(text, Some(lang))?;
137
138        // Then anonymize
139        self.anonymizer_registry
140            .anonymize(text, analysis.detected_entities, config)
141    }
142
143    /// Analyze and anonymize in one call
144    pub fn analyze_and_anonymize(
145        &self,
146        text: &str,
147        language: Option<&str>,
148        config: &AnonymizerConfig,
149    ) -> Result<AnalysisResult> {
150        let start = Instant::now();
151        let lang = language.unwrap_or(&self.default_language);
152
153        // Analyze
154        let mut result = self.analyze(text, Some(lang))?;
155
156        // Anonymize
157        let anonymized =
158            self.anonymizer_registry
159                .anonymize(text, result.detected_entities.clone(), config)?;
160
161        result.anonymized = Some(anonymized);
162        result.metadata.processing_time_ms = start.elapsed().as_millis() as u64;
163
164        Ok(result)
165    }
166}
167
168impl Default for AnalyzerEngine {
169    fn default() -> Self {
170        Self::new()
171    }
172}
173
174/// Builder for AnalyzerEngine
175pub struct AnalyzerEngineBuilder {
176    recognizer_registry: RecognizerRegistry,
177    anonymizer_registry: AnonymizerRegistry,
178    default_language: String,
179    model_version: Option<String>,
180}
181
182impl AnalyzerEngineBuilder {
183    pub fn new() -> Self {
184        Self {
185            recognizer_registry: RecognizerRegistry::new(),
186            anonymizer_registry: AnonymizerRegistry::new(),
187            default_language: "en".to_string(),
188            model_version: None,
189        }
190    }
191
192    pub fn with_recognizer_registry(mut self, registry: RecognizerRegistry) -> Self {
193        self.recognizer_registry = registry;
194        self
195    }
196
197    pub fn with_anonymizer_registry(mut self, registry: AnonymizerRegistry) -> Self {
198        self.anonymizer_registry = registry;
199        self
200    }
201
202    pub fn with_language(mut self, language: impl Into<String>) -> Self {
203        self.default_language = language.into();
204        self
205    }
206
207    pub fn with_model_version(mut self, version: impl Into<String>) -> Self {
208        self.model_version = Some(version.into());
209        self
210    }
211
212    pub fn build(self) -> AnalyzerEngine {
213        AnalyzerEngine {
214            recognizer_registry: self.recognizer_registry,
215            anonymizer_registry: self.anonymizer_registry,
216            default_language: self.default_language,
217            model_version: self.model_version,
218        }
219    }
220}
221
222impl Default for AnalyzerEngineBuilder {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231    use crate::anonymizers::AnonymizationStrategy;
232
233    #[test]
234    fn test_analyzer_engine_new() {
235        let engine = AnalyzerEngine::new();
236        assert_eq!(engine.default_language, "en");
237        assert!(!engine.recognizer_registry.recognizers().is_empty());
238    }
239
240    #[test]
241    fn test_analyze() {
242        let engine = AnalyzerEngine::new();
243        let text = "Email: john@example.com, Phone: (555) 123-4567";
244
245        let result = engine.analyze(text, None).unwrap();
246
247        assert!(result.detected_entities.len() >= 2);
248        assert_eq!(result.metadata.language, "en");
249        assert!(result.metadata.processing_time_ms > 0);
250    }
251
252    #[test]
253    fn test_analyze_with_entities() {
254        let engine = AnalyzerEngine::new();
255        let text = "Email: john@example.com, Phone: (555) 123-4567";
256
257        let result = engine
258            .analyze_with_entities(text, &[EntityType::EmailAddress], None)
259            .unwrap();
260
261        assert!(result
262            .detected_entities
263            .iter()
264            .all(|e| e.entity_type == EntityType::EmailAddress));
265    }
266
267    #[test]
268    fn test_anonymize() {
269        let engine = AnalyzerEngine::new();
270        let text = "Email: john@example.com";
271        let config = AnonymizerConfig {
272            strategy: AnonymizationStrategy::Replace,
273            ..Default::default()
274        };
275
276        let result = engine.anonymize(text, None, &config).unwrap();
277
278        assert!(result.text.contains("[EMAIL_ADDRESS]"));
279    }
280
281    #[test]
282    fn test_analyze_and_anonymize() {
283        let engine = AnalyzerEngine::new();
284        let text = "Email: john@example.com, SSN: 123-45-6789";
285        let config = AnonymizerConfig {
286            strategy: AnonymizationStrategy::Replace,
287            ..Default::default()
288        };
289
290        let result = engine.analyze_and_anonymize(text, None, &config).unwrap();
291
292        assert!(result.detected_entities.len() >= 2);
293        assert!(result.anonymized.is_some());
294
295        let anonymized = result.anonymized.unwrap();
296        assert!(anonymized.text.contains("[EMAIL_ADDRESS]"));
297        assert!(anonymized.text.contains("[US_SSN]"));
298    }
299
300    #[test]
301    fn test_builder() {
302        let engine = AnalyzerEngine::builder()
303            .with_language("es")
304            .with_model_version("v1.0.0")
305            .build();
306
307        assert_eq!(engine.default_language, "es");
308        assert_eq!(engine.model_version, Some("v1.0.0".to_string()));
309    }
310}