cloakpipe_core/detector/
mod.rs1pub mod patterns;
10pub mod financial;
11pub mod custom;
12
13#[cfg(feature = "ner")]
14pub mod ner;
15
16use crate::{DetectedEntity, config::DetectionConfig};
17use anyhow::Result;
18
19pub struct Detector {
21 pattern_detector: patterns::PatternDetector,
22 financial_detector: financial::FinancialDetector,
23 custom_detector: custom::CustomDetector,
24 #[cfg(feature = "ner")]
25 ner_detector: Option<ner::NerDetector>,
26 preserve_list: Vec<String>,
28 force_list: Vec<String>,
30}
31
32impl Detector {
33 pub fn from_config(config: &DetectionConfig) -> Result<Self> {
35 Ok(Self {
36 pattern_detector: patterns::PatternDetector::new(config)?,
37 financial_detector: financial::FinancialDetector::new(config)?,
38 custom_detector: custom::CustomDetector::new(config)?,
39 #[cfg(feature = "ner")]
40 ner_detector: if config.ner.enabled {
41 Some(ner::NerDetector::new(&config.ner)?)
42 } else {
43 None
44 },
45 preserve_list: config.overrides.preserve.clone(),
46 force_list: config.overrides.force.clone(),
47 })
48 }
49
50 pub fn detect(&self, text: &str) -> Result<Vec<DetectedEntity>> {
53 let mut entities = Vec::new();
54
55 entities.extend(self.pattern_detector.detect(text)?);
57
58 entities.extend(self.financial_detector.detect(text)?);
60
61 #[cfg(feature = "ner")]
63 if let Some(ref ner) = self.ner_detector {
64 entities.extend(ner.detect(text)?);
65 }
66
67 entities.extend(self.custom_detector.detect(text)?);
69
70 entities.retain(|e| !self.preserve_list.contains(&e.original));
72
73 for forced in &self.force_list {
75 if let Some(start) = text.find(forced.as_str()) {
76 entities.push(DetectedEntity {
77 original: forced.clone(),
78 start,
79 end: start + forced.len(),
80 category: crate::EntityCategory::Custom("FORCED".into()),
81 confidence: 1.0,
82 source: crate::DetectionSource::Custom,
83 });
84 }
85 }
86
87 entities.sort_by_key(|e| e.start);
89 entities = Self::deduplicate_spans(entities);
90
91 Ok(entities)
92 }
93
94 fn deduplicate_spans(entities: Vec<DetectedEntity>) -> Vec<DetectedEntity> {
96 let mut result: Vec<DetectedEntity> = Vec::new();
97 for entity in entities {
98 if let Some(last) = result.last() {
99 if entity.start < last.end {
100 if entity.confidence > last.confidence {
102 result.pop();
103 result.push(entity);
104 }
105 continue;
106 }
107 }
108 result.push(entity);
109 }
110 result
111 }
112}