redact_core/anonymizers/
mod.rs1pub mod encrypt;
6pub mod hash;
7pub mod mask;
8pub mod registry;
9pub mod replace;
10
11pub use registry::AnonymizerRegistry;
12
13use crate::types::{AnonymizedResult, RecognizerResult};
14use anyhow::Result;
15use serde::{Deserialize, Serialize};
16use std::fmt::Debug;
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
20#[serde(rename_all = "lowercase")]
21pub enum AnonymizationStrategy {
22 #[default]
24 Replace,
25 Mask,
27 Hash,
29 Encrypt,
31 Redact,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct AnonymizerConfig {
38 pub strategy: AnonymizationStrategy,
40
41 #[serde(default = "default_mask_char")]
43 pub mask_char: char,
44
45 #[serde(default)]
47 pub mask_start_chars: usize,
48
49 #[serde(default)]
51 pub mask_end_chars: usize,
52
53 #[serde(skip_serializing_if = "Option::is_none")]
55 pub encryption_key: Option<String>,
56
57 #[serde(skip_serializing_if = "Option::is_none")]
59 pub hash_salt: Option<String>,
60
61 #[serde(default)]
63 pub preserve_format: bool,
64}
65
66fn default_mask_char() -> char {
67 '*'
68}
69
70impl Default for AnonymizerConfig {
71 fn default() -> Self {
72 Self {
73 strategy: AnonymizationStrategy::Replace,
74 mask_char: '*',
75 mask_start_chars: 0,
76 mask_end_chars: 0,
77 encryption_key: None,
78 hash_salt: None,
79 preserve_format: false,
80 }
81 }
82}
83
84pub trait Anonymizer: Send + Sync + Debug {
86 fn name(&self) -> &str;
88
89 fn anonymize(
91 &self,
92 text: &str,
93 entities: Vec<RecognizerResult>,
94 config: &AnonymizerConfig,
95 ) -> Result<AnonymizedResult>;
96}
97
98pub fn apply_anonymization(
100 text: &str,
101 entities: &[RecognizerResult],
102 replacement_fn: impl Fn(&RecognizerResult, &str) -> String,
103) -> String {
104 if entities.is_empty() {
105 return text.to_string();
106 }
107
108 let mut result = String::with_capacity(text.len());
109 let mut last_end = 0;
110
111 let mut sorted_entities = entities.to_vec();
113 sorted_entities.sort_by_key(|e| e.start);
114
115 for entity in sorted_entities {
116 if entity.start > last_end {
118 result.push_str(&text[last_end..entity.start]);
119 }
120
121 let original = if entity.end <= text.len() {
123 &text[entity.start..entity.end]
124 } else {
125 ""
126 };
127
128 result.push_str(&replacement_fn(&entity, original));
130
131 last_end = entity.end;
132 }
133
134 if last_end < text.len() {
136 result.push_str(&text[last_end..]);
137 }
138
139 result
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145 use crate::types::EntityType;
146
147 #[test]
148 fn test_apply_anonymization() {
149 let text = "Email: john@example.com, Phone: 555-1234";
150 let entities = vec![
151 RecognizerResult::new(EntityType::EmailAddress, 7, 23, 0.9, "test"),
152 RecognizerResult::new(EntityType::PhoneNumber, 32, 40, 0.8, "test"), ];
154
155 let result = apply_anonymization(text, &entities, |e, _| {
156 format!("[{}]", e.entity_type.as_str())
157 });
158
159 assert_eq!(result, "Email: [EMAIL_ADDRESS], Phone: [PHONE_NUMBER]");
160 }
161
162 #[test]
163 fn test_apply_anonymization_empty() {
164 let text = "No PII here";
165 let entities = vec![];
166
167 let result = apply_anonymization(text, &entities, |e, _| {
168 format!("[{}]", e.entity_type.as_str())
169 });
170
171 assert_eq!(result, text);
172 }
173
174 #[test]
175 fn test_apply_anonymization_adjacent() {
176 let text = "AB";
177 let entities = vec![
178 RecognizerResult::new(EntityType::Person, 0, 1, 0.9, "test"),
179 RecognizerResult::new(EntityType::Person, 1, 2, 0.9, "test"),
180 ];
181
182 let result = apply_anonymization(text, &entities, |_, _| "X".to_string());
183
184 assert_eq!(result, "XX");
185 }
186}