redact_core/anonymizers/
encrypt.rs1use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
6use crate::types::{AnonymizedResult, RecognizerResult, Token};
7use aes_gcm::{
8 aead::{Aead, KeyInit},
9 Aes256Gcm, Nonce,
10};
11use anyhow::{anyhow, Result};
12use pbkdf2::pbkdf2_hmac;
13use rand::RngExt;
14use sha2::Sha256;
15use uuid::Uuid;
16
17#[derive(Debug, Clone)]
19pub struct EncryptAnonymizer {
20 key_derivation_iterations: u32,
21}
22
23impl EncryptAnonymizer {
24 pub fn new() -> Self {
25 Self {
26 key_derivation_iterations: 100_000,
27 }
28 }
29
30 pub fn with_iterations(mut self, iterations: u32) -> Self {
31 self.key_derivation_iterations = iterations;
32 self
33 }
34
35 fn derive_key(&self, password: &str, salt: &[u8]) -> [u8; 32] {
37 let mut key = [0u8; 32];
38 pbkdf2_hmac::<Sha256>(
39 password.as_bytes(),
40 salt,
41 self.key_derivation_iterations,
42 &mut key,
43 );
44 key
45 }
46
47 fn encrypt_value(&self, value: &str, password: &str) -> Result<(String, Vec<u8>)> {
49 let mut rng = rand::rng();
51 let salt: [u8; 16] = rng.random();
52
53 let key_bytes = self.derive_key(password, &salt);
55 let cipher = Aes256Gcm::new((&key_bytes).into());
56
57 let nonce_bytes: [u8; 12] = rng.random();
59 let nonce = Nonce::from_slice(&nonce_bytes);
60
61 let ciphertext = cipher
63 .encrypt(nonce, value.as_bytes())
64 .map_err(|e| anyhow!("Encryption failed: {}", e))?;
65
66 let mut encrypted = Vec::new();
68 encrypted.extend_from_slice(&salt);
69 encrypted.extend_from_slice(&nonce_bytes);
70 encrypted.extend_from_slice(&ciphertext);
71
72 let encoded = base64_encode(&encrypted);
74
75 Ok((encoded, encrypted))
76 }
77
78 pub fn decrypt_value(&self, encrypted: &[u8], password: &str) -> Result<String> {
80 if encrypted.len() < 28 {
81 return Err(anyhow!("Invalid encrypted data"));
83 }
84
85 let salt = &encrypted[0..16];
87 let nonce_bytes = &encrypted[16..28];
88 let ciphertext = &encrypted[28..];
89
90 let key_bytes = self.derive_key(password, salt);
92 let cipher = Aes256Gcm::new((&key_bytes).into());
93 let nonce = Nonce::from_slice(nonce_bytes);
94
95 let plaintext = cipher
97 .decrypt(nonce, ciphertext)
98 .map_err(|e| anyhow!("Decryption failed: {}", e))?;
99
100 String::from_utf8(plaintext).map_err(|e| anyhow!("Invalid UTF-8: {}", e))
101 }
102}
103
104impl Default for EncryptAnonymizer {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110impl Anonymizer for EncryptAnonymizer {
111 fn name(&self) -> &str {
112 "EncryptAnonymizer"
113 }
114
115 fn anonymize(
116 &self,
117 text: &str,
118 entities: Vec<RecognizerResult>,
119 config: &AnonymizerConfig,
120 ) -> Result<AnonymizedResult> {
121 let password = config
122 .encryption_key
123 .as_ref()
124 .ok_or_else(|| anyhow!("Encryption key not provided"))?;
125
126 let mut tokens = Vec::new();
128 let entity_map: std::collections::HashMap<(usize, usize), String> = entities
129 .iter()
130 .map(|entity| {
131 let token_id = Uuid::new_v4().to_string();
132 let original = &text[entity.start..entity.end];
133
134 let encrypted = self
136 .encrypt_value(original, password)
137 .unwrap_or_else(|_| (base64_encode(original.as_bytes()), vec![]));
138
139 tokens.push(Token {
141 token_id: token_id.clone(),
142 original_value: encrypted.0,
143 entity_type: entity.entity_type.clone(),
144 start: entity.start,
145 end: entity.end,
146 expires_at: None,
147 });
148
149 ((entity.start, entity.end), format!("<TOKEN_{}>", token_id))
150 })
151 .collect();
152
153 let anonymized_text = apply_anonymization(text, &entities, |entity, _original| {
154 entity_map
155 .get(&(entity.start, entity.end))
156 .cloned()
157 .unwrap_or_else(|| format!("<TOKEN_{}>", Uuid::new_v4()))
158 });
159
160 Ok(AnonymizedResult {
161 text: anonymized_text,
162 entities,
163 tokens: Some(tokens),
164 })
165 }
166}
167
168fn base64_encode(bytes: &[u8]) -> String {
170 const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
171 let mut result = String::new();
172
173 for chunk in bytes.chunks(3) {
174 let mut buf = [0u8; 3];
175 buf[..chunk.len()].copy_from_slice(chunk);
176
177 let b1 = (buf[0] >> 2) as usize;
178 let b2 = (((buf[0] & 0x03) << 4) | (buf[1] >> 4)) as usize;
179 let b3 = (((buf[1] & 0x0F) << 2) | (buf[2] >> 6)) as usize;
180 let b4 = (buf[2] & 0x3F) as usize;
181
182 result.push(CHARSET[b1] as char);
183 result.push(CHARSET[b2] as char);
184 result.push(if chunk.len() > 1 {
185 CHARSET[b3] as char
186 } else {
187 '='
188 });
189 result.push(if chunk.len() > 2 {
190 CHARSET[b4] as char
191 } else {
192 '='
193 });
194 }
195
196 result
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202 use crate::types::EntityType;
203
204 #[test]
205 fn test_encrypt_anonymizer() {
206 let anonymizer = EncryptAnonymizer::new();
207 let text = "Email: john@example.com";
208 let entities = vec![RecognizerResult::new(
209 EntityType::EmailAddress,
210 7,
211 23,
212 0.9,
213 "test",
214 )];
215 let config = AnonymizerConfig {
216 encryption_key: Some("test_password".to_string()),
217 ..Default::default()
218 };
219
220 let result = anonymizer.anonymize(text, entities, &config).unwrap();
221
222 assert!(result.text.contains("<TOKEN_"));
223 assert!(result.tokens.is_some());
224 assert_eq!(result.tokens.unwrap().len(), 1);
225 }
226
227 #[test]
228 fn test_encrypt_decrypt() {
229 let anonymizer = EncryptAnonymizer::new();
230 let password = "test_password";
231 let original = "sensitive_data";
232
233 let (encrypted, encrypted_bytes) = anonymizer.encrypt_value(original, password).unwrap();
234
235 assert!(!encrypted.is_empty());
236 assert_ne!(encrypted, original);
237
238 let decrypted = anonymizer
239 .decrypt_value(&encrypted_bytes, password)
240 .unwrap();
241 assert_eq!(decrypted, original);
242 }
243
244 #[test]
245 fn test_encrypt_without_key() {
246 let anonymizer = EncryptAnonymizer::new();
247 let text = "Email: john@example.com";
248 let entities = vec![RecognizerResult::new(
249 EntityType::EmailAddress,
250 7,
251 23,
252 0.9,
253 "test",
254 )];
255 let config = AnonymizerConfig::default(); let result = anonymizer.anonymize(text, entities, &config);
258 assert!(result.is_err());
259 }
260}