redact_core/anonymizers/
mod.rs1pub mod encrypt;
7pub mod hash;
8pub mod mask;
9pub mod registry;
10pub mod replace;
11
12pub use registry::AnonymizerRegistry;
13
14use crate::types::{AnonymizedResult, RecognizerResult};
15use anyhow::Result;
16use serde::{Deserialize, Serialize};
17use std::fmt::Debug;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
21#[serde(rename_all = "lowercase")]
22pub enum AnonymizationStrategy {
23 #[default]
25 Replace,
26 Mask,
28 Hash,
30 Encrypt,
32 Redact,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct AnonymizerConfig {
39 pub strategy: AnonymizationStrategy,
41
42 #[serde(default = "default_mask_char")]
44 pub mask_char: char,
45
46 #[serde(default)]
48 pub mask_start_chars: usize,
49
50 #[serde(default)]
52 pub mask_end_chars: usize,
53
54 #[serde(skip_serializing_if = "Option::is_none")]
56 pub encryption_key: Option<String>,
57
58 #[serde(skip_serializing_if = "Option::is_none")]
60 pub hash_salt: Option<String>,
61
62 #[serde(default)]
64 pub preserve_format: bool,
65}
66
67fn default_mask_char() -> char {
68 '*'
69}
70
71impl Default for AnonymizerConfig {
72 fn default() -> Self {
73 Self {
74 strategy: AnonymizationStrategy::Replace,
75 mask_char: '*',
76 mask_start_chars: 0,
77 mask_end_chars: 0,
78 encryption_key: None,
79 hash_salt: None,
80 preserve_format: false,
81 }
82 }
83}
84
85pub trait Anonymizer: Send + Sync + Debug {
87 fn name(&self) -> &str;
89
90 fn anonymize(
92 &self,
93 text: &str,
94 entities: Vec<RecognizerResult>,
95 config: &AnonymizerConfig,
96 ) -> Result<AnonymizedResult>;
97}
98
99pub fn apply_anonymization(
101 text: &str,
102 entities: &[RecognizerResult],
103 replacement_fn: impl Fn(&RecognizerResult, &str) -> String,
104) -> String {
105 if entities.is_empty() {
106 return text.to_string();
107 }
108
109 let mut result = String::with_capacity(text.len());
110 let mut last_end = 0;
111
112 let mut sorted_entities = entities.to_vec();
114 sorted_entities.sort_by_key(|e| e.start);
115
116 for entity in sorted_entities {
117 if entity.start > last_end {
119 result.push_str(&text[last_end..entity.start]);
120 }
121
122 let original = if entity.end <= text.len() {
124 &text[entity.start..entity.end]
125 } else {
126 ""
127 };
128
129 result.push_str(&replacement_fn(&entity, original));
131
132 last_end = entity.end;
133 }
134
135 if last_end < text.len() {
137 result.push_str(&text[last_end..]);
138 }
139
140 result
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146 use crate::types::EntityType;
147
148 #[test]
149 fn test_apply_anonymization() {
150 let text = "Email: john@example.com, Phone: 555-1234";
151 let entities = vec![
152 RecognizerResult::new(EntityType::EmailAddress, 7, 23, 0.9, "test"),
153 RecognizerResult::new(EntityType::PhoneNumber, 32, 40, 0.8, "test"), ];
155
156 let result = apply_anonymization(text, &entities, |e, _| {
157 format!("[{}]", e.entity_type.as_str())
158 });
159
160 assert_eq!(result, "Email: [EMAIL_ADDRESS], Phone: [PHONE_NUMBER]");
161 }
162
163 #[test]
164 fn test_apply_anonymization_empty() {
165 let text = "No PII here";
166 let entities = vec![];
167
168 let result = apply_anonymization(text, &entities, |e, _| {
169 format!("[{}]", e.entity_type.as_str())
170 });
171
172 assert_eq!(result, text);
173 }
174
175 #[test]
176 fn test_apply_anonymization_adjacent() {
177 let text = "AB";
178 let entities = vec![
179 RecognizerResult::new(EntityType::Person, 0, 1, 0.9, "test"),
180 RecognizerResult::new(EntityType::Person, 1, 2, 0.9, "test"),
181 ];
182
183 let result = apply_anonymization(text, &entities, |_, _| "X".to_string());
184
185 assert_eq!(result, "XX");
186 }
187}