use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
use crate::types::{AnonymizedResult, RecognizerResult, Token};
use aes_gcm::{
aead::{Aead, KeyInit},
Aes256Gcm, Nonce,
};
use anyhow::{anyhow, Result};
use pbkdf2::pbkdf2_hmac;
use rand::RngExt;
use sha2::Sha256;
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct EncryptAnonymizer {
key_derivation_iterations: u32,
}
impl EncryptAnonymizer {
pub fn new() -> Self {
Self {
key_derivation_iterations: 100_000,
}
}
pub fn with_iterations(mut self, iterations: u32) -> Self {
self.key_derivation_iterations = iterations;
self
}
fn derive_key(&self, password: &str, salt: &[u8]) -> [u8; 32] {
let mut key = [0u8; 32];
pbkdf2_hmac::<Sha256>(
password.as_bytes(),
salt,
self.key_derivation_iterations,
&mut key,
);
key
}
fn encrypt_value(&self, value: &str, password: &str) -> Result<(String, Vec<u8>)> {
let mut rng = rand::rng();
let salt: [u8; 16] = rng.random();
let key_bytes = self.derive_key(password, &salt);
let cipher = Aes256Gcm::new((&key_bytes).into());
let nonce_bytes: [u8; 12] = rng.random();
let nonce = Nonce::from_slice(&nonce_bytes);
let ciphertext = cipher
.encrypt(nonce, value.as_bytes())
.map_err(|e| anyhow!("Encryption failed: {}", e))?;
let mut encrypted = Vec::new();
encrypted.extend_from_slice(&salt);
encrypted.extend_from_slice(&nonce_bytes);
encrypted.extend_from_slice(&ciphertext);
let encoded = base64_encode(&encrypted);
Ok((encoded, encrypted))
}
pub fn decrypt_value(&self, encrypted: &[u8], password: &str) -> Result<String> {
if encrypted.len() < 28 {
return Err(anyhow!("Invalid encrypted data"));
}
let salt = &encrypted[0..16];
let nonce_bytes = &encrypted[16..28];
let ciphertext = &encrypted[28..];
let key_bytes = self.derive_key(password, salt);
let cipher = Aes256Gcm::new((&key_bytes).into());
let nonce = Nonce::from_slice(nonce_bytes);
let plaintext = cipher
.decrypt(nonce, ciphertext)
.map_err(|e| anyhow!("Decryption failed: {}", e))?;
String::from_utf8(plaintext).map_err(|e| anyhow!("Invalid UTF-8: {}", e))
}
}
impl Default for EncryptAnonymizer {
fn default() -> Self {
Self::new()
}
}
impl Anonymizer for EncryptAnonymizer {
fn name(&self) -> &str {
"EncryptAnonymizer"
}
fn anonymize(
&self,
text: &str,
entities: Vec<RecognizerResult>,
config: &AnonymizerConfig,
) -> Result<AnonymizedResult> {
let password = config
.encryption_key
.as_ref()
.ok_or_else(|| anyhow!("Encryption key not provided"))?;
let mut tokens = Vec::new();
let entity_map: std::collections::HashMap<(usize, usize), String> = entities
.iter()
.map(|entity| {
let token_id = Uuid::new_v4().to_string();
let original = &text[entity.start..entity.end];
let encrypted = self
.encrypt_value(original, password)
.unwrap_or_else(|_| (base64_encode(original.as_bytes()), vec![]));
tokens.push(Token {
token_id: token_id.clone(),
original_value: encrypted.0,
entity_type: entity.entity_type.clone(),
start: entity.start,
end: entity.end,
expires_at: None,
});
((entity.start, entity.end), format!("<TOKEN_{}>", token_id))
})
.collect();
let anonymized_text = apply_anonymization(text, &entities, |entity, _original| {
entity_map
.get(&(entity.start, entity.end))
.cloned()
.unwrap_or_else(|| format!("<TOKEN_{}>", Uuid::new_v4()))
});
Ok(AnonymizedResult {
text: anonymized_text,
entities,
tokens: Some(tokens),
})
}
}
fn base64_encode(bytes: &[u8]) -> String {
const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut result = String::new();
for chunk in bytes.chunks(3) {
let mut buf = [0u8; 3];
buf[..chunk.len()].copy_from_slice(chunk);
let b1 = (buf[0] >> 2) as usize;
let b2 = (((buf[0] & 0x03) << 4) | (buf[1] >> 4)) as usize;
let b3 = (((buf[1] & 0x0F) << 2) | (buf[2] >> 6)) as usize;
let b4 = (buf[2] & 0x3F) as usize;
result.push(CHARSET[b1] as char);
result.push(CHARSET[b2] as char);
result.push(if chunk.len() > 1 {
CHARSET[b3] as char
} else {
'='
});
result.push(if chunk.len() > 2 {
CHARSET[b4] as char
} else {
'='
});
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::EntityType;
#[test]
fn test_encrypt_anonymizer() {
let anonymizer = EncryptAnonymizer::new();
let text = "Email: john@example.com";
let entities = vec![RecognizerResult::new(
EntityType::EmailAddress,
7,
23,
0.9,
"test",
)];
let config = AnonymizerConfig {
encryption_key: Some("test_password".to_string()),
..Default::default()
};
let result = anonymizer.anonymize(text, entities, &config).unwrap();
assert!(result.text.contains("<TOKEN_"));
assert!(result.tokens.is_some());
assert_eq!(result.tokens.unwrap().len(), 1);
}
#[test]
fn test_encrypt_decrypt() {
let anonymizer = EncryptAnonymizer::new();
let password = "test_password";
let original = "sensitive_data";
let (encrypted, encrypted_bytes) = anonymizer.encrypt_value(original, password).unwrap();
assert!(!encrypted.is_empty());
assert_ne!(encrypted, original);
let decrypted = anonymizer
.decrypt_value(&encrypted_bytes, password)
.unwrap();
assert_eq!(decrypted, original);
}
#[test]
fn test_encrypt_without_key() {
let anonymizer = EncryptAnonymizer::new();
let text = "Email: john@example.com";
let entities = vec![RecognizerResult::new(
EntityType::EmailAddress,
7,
23,
0.9,
"test",
)];
let config = AnonymizerConfig::default();
let result = anonymizer.anonymize(text, entities, &config);
assert!(result.is_err());
}
}