Skip to main content

redact_core/anonymizers/
encrypt.rs

1// Copyright 2026 Censgate LLC.
2// Licensed under the Apache License, Version 2.0. See the LICENSE file
3// in the project root for license information.
4
5use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
6use crate::types::{AnonymizedResult, RecognizerResult, Token};
7use aes_gcm::{
8    aead::{Aead, KeyInit},
9    Aes256Gcm, Nonce,
10};
11use anyhow::{anyhow, Result};
12use pbkdf2::pbkdf2_hmac;
13use rand::RngExt;
14use sha2::Sha256;
15use uuid::Uuid;
16
17/// Encrypt anonymizer for reversible anonymization
18#[derive(Debug, Clone)]
19pub struct EncryptAnonymizer {
20    key_derivation_iterations: u32,
21}
22
23impl EncryptAnonymizer {
24    pub fn new() -> Self {
25        Self {
26            key_derivation_iterations: 100_000,
27        }
28    }
29
30    pub fn with_iterations(mut self, iterations: u32) -> Self {
31        self.key_derivation_iterations = iterations;
32        self
33    }
34
35    /// Derive encryption key from password
36    fn derive_key(&self, password: &str, salt: &[u8]) -> [u8; 32] {
37        let mut key = [0u8; 32];
38        pbkdf2_hmac::<Sha256>(
39            password.as_bytes(),
40            salt,
41            self.key_derivation_iterations,
42            &mut key,
43        );
44        key
45    }
46
47    /// Encrypt a value
48    fn encrypt_value(&self, value: &str, password: &str) -> Result<(String, Vec<u8>)> {
49        // Generate cryptographically secure random salt
50        let mut rng = rand::rng();
51        let salt: [u8; 16] = rng.random();
52
53        // Derive key
54        let key_bytes = self.derive_key(password, &salt);
55        let cipher = Aes256Gcm::new((&key_bytes).into());
56
57        // Generate cryptographically secure random nonce
58        let nonce_bytes: [u8; 12] = rng.random();
59        let nonce = Nonce::from_slice(&nonce_bytes);
60
61        // Encrypt
62        let ciphertext = cipher
63            .encrypt(nonce, value.as_bytes())
64            .map_err(|e| anyhow!("Encryption failed: {}", e))?;
65
66        // Combine salt + nonce + ciphertext
67        let mut encrypted = Vec::new();
68        encrypted.extend_from_slice(&salt);
69        encrypted.extend_from_slice(&nonce_bytes);
70        encrypted.extend_from_slice(&ciphertext);
71
72        // Encode to base64
73        let encoded = base64_encode(&encrypted);
74
75        Ok((encoded, encrypted))
76    }
77
78    /// Decrypt a value
79    pub fn decrypt_value(&self, encrypted: &[u8], password: &str) -> Result<String> {
80        if encrypted.len() < 28 {
81            // 16 (salt) + 12 (nonce) minimum
82            return Err(anyhow!("Invalid encrypted data"));
83        }
84
85        // Extract components
86        let salt = &encrypted[0..16];
87        let nonce_bytes = &encrypted[16..28];
88        let ciphertext = &encrypted[28..];
89
90        // Derive key
91        let key_bytes = self.derive_key(password, salt);
92        let cipher = Aes256Gcm::new((&key_bytes).into());
93        let nonce = Nonce::from_slice(nonce_bytes);
94
95        // Decrypt
96        let plaintext = cipher
97            .decrypt(nonce, ciphertext)
98            .map_err(|e| anyhow!("Decryption failed: {}", e))?;
99
100        String::from_utf8(plaintext).map_err(|e| anyhow!("Invalid UTF-8: {}", e))
101    }
102}
103
104impl Default for EncryptAnonymizer {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110impl Anonymizer for EncryptAnonymizer {
111    fn name(&self) -> &str {
112        "EncryptAnonymizer"
113    }
114
115    fn anonymize(
116        &self,
117        text: &str,
118        entities: Vec<RecognizerResult>,
119        config: &AnonymizerConfig,
120    ) -> Result<AnonymizedResult> {
121        let password = config
122            .encryption_key
123            .as_ref()
124            .ok_or_else(|| anyhow!("Encryption key not provided"))?;
125
126        // Pre-encrypt all values and build tokens
127        let mut tokens = Vec::new();
128        let entity_map: std::collections::HashMap<(usize, usize), String> = entities
129            .iter()
130            .map(|entity| {
131                let token_id = Uuid::new_v4().to_string();
132                let original = &text[entity.start..entity.end];
133
134                // Encrypt the original value
135                let encrypted = self
136                    .encrypt_value(original, password)
137                    .unwrap_or_else(|_| (base64_encode(original.as_bytes()), vec![]));
138
139                // Create token
140                tokens.push(Token {
141                    token_id: token_id.clone(),
142                    original_value: encrypted.0,
143                    entity_type: entity.entity_type.clone(),
144                    start: entity.start,
145                    end: entity.end,
146                    expires_at: None,
147                });
148
149                ((entity.start, entity.end), format!("<TOKEN_{}>", token_id))
150            })
151            .collect();
152
153        let anonymized_text = apply_anonymization(text, &entities, |entity, _original| {
154            entity_map
155                .get(&(entity.start, entity.end))
156                .cloned()
157                .unwrap_or_else(|| format!("<TOKEN_{}>", Uuid::new_v4()))
158        });
159
160        Ok(AnonymizedResult {
161            text: anonymized_text,
162            entities,
163            tokens: Some(tokens),
164        })
165    }
166}
167
168// Simple base64 encoding
169fn base64_encode(bytes: &[u8]) -> String {
170    const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
171    let mut result = String::new();
172
173    for chunk in bytes.chunks(3) {
174        let mut buf = [0u8; 3];
175        buf[..chunk.len()].copy_from_slice(chunk);
176
177        let b1 = (buf[0] >> 2) as usize;
178        let b2 = (((buf[0] & 0x03) << 4) | (buf[1] >> 4)) as usize;
179        let b3 = (((buf[1] & 0x0F) << 2) | (buf[2] >> 6)) as usize;
180        let b4 = (buf[2] & 0x3F) as usize;
181
182        result.push(CHARSET[b1] as char);
183        result.push(CHARSET[b2] as char);
184        result.push(if chunk.len() > 1 {
185            CHARSET[b3] as char
186        } else {
187            '='
188        });
189        result.push(if chunk.len() > 2 {
190            CHARSET[b4] as char
191        } else {
192            '='
193        });
194    }
195
196    result
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::types::EntityType;
203
204    #[test]
205    fn test_encrypt_anonymizer() {
206        let anonymizer = EncryptAnonymizer::new();
207        let text = "Email: john@example.com";
208        let entities = vec![RecognizerResult::new(
209            EntityType::EmailAddress,
210            7,
211            23,
212            0.9,
213            "test",
214        )];
215        let config = AnonymizerConfig {
216            encryption_key: Some("test_password".to_string()),
217            ..Default::default()
218        };
219
220        let result = anonymizer.anonymize(text, entities, &config).unwrap();
221
222        assert!(result.text.contains("<TOKEN_"));
223        assert!(result.tokens.is_some());
224        assert_eq!(result.tokens.unwrap().len(), 1);
225    }
226
227    #[test]
228    fn test_encrypt_decrypt() {
229        let anonymizer = EncryptAnonymizer::new();
230        let password = "test_password";
231        let original = "sensitive_data";
232
233        let (encrypted, encrypted_bytes) = anonymizer.encrypt_value(original, password).unwrap();
234
235        assert!(!encrypted.is_empty());
236        assert_ne!(encrypted, original);
237
238        let decrypted = anonymizer
239            .decrypt_value(&encrypted_bytes, password)
240            .unwrap();
241        assert_eq!(decrypted, original);
242    }
243
244    #[test]
245    fn test_encrypt_without_key() {
246        let anonymizer = EncryptAnonymizer::new();
247        let text = "Email: john@example.com";
248        let entities = vec![RecognizerResult::new(
249            EntityType::EmailAddress,
250            7,
251            23,
252            0.9,
253            "test",
254        )];
255        let config = AnonymizerConfig::default(); // No encryption key
256
257        let result = anonymizer.anonymize(text, entities, &config);
258        assert!(result.is_err());
259    }
260}