redact_core/anonymizers/
hash.rs1use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
7use crate::types::{AnonymizedResult, RecognizerResult};
8use anyhow::Result;
9use sha2::{Digest, Sha256};
10
11#[derive(Debug, Clone)]
13pub struct HashAnonymizer {
14 algorithm: HashAlgorithm,
15}
16
17#[derive(Debug, Clone, Copy)]
18pub enum HashAlgorithm {
19 Sha256,
20 Blake3,
21}
22
23impl HashAnonymizer {
24 pub fn new() -> Self {
25 Self {
26 algorithm: HashAlgorithm::Sha256,
27 }
28 }
29
30 pub fn with_algorithm(mut self, algorithm: HashAlgorithm) -> Self {
31 self.algorithm = algorithm;
32 self
33 }
34
35 fn hash_value(&self, value: &str, salt: Option<&str>) -> String {
36 let input = if let Some(salt) = salt {
37 format!("{}{}", value, salt)
38 } else {
39 value.to_string()
40 };
41
42 match self.algorithm {
43 HashAlgorithm::Sha256 => {
44 let mut hasher = Sha256::new();
45 hasher.update(input.as_bytes());
46 let result = hasher.finalize();
47 hex::encode(&result[..8]) }
49 HashAlgorithm::Blake3 => {
50 let hash = blake3::hash(input.as_bytes());
51 hex::encode(&hash.as_bytes()[..8]) }
53 }
54 }
55}
56
57impl Default for HashAnonymizer {
58 fn default() -> Self {
59 Self::new()
60 }
61}
62
63impl Anonymizer for HashAnonymizer {
64 fn name(&self) -> &str {
65 "HashAnonymizer"
66 }
67
68 fn anonymize(
69 &self,
70 text: &str,
71 entities: Vec<RecognizerResult>,
72 config: &AnonymizerConfig,
73 ) -> Result<AnonymizedResult> {
74 let salt = config.hash_salt.as_deref();
75
76 let anonymized_text = apply_anonymization(text, &entities, |entity, original| {
77 let hash = self.hash_value(original, salt);
78 format!("[{}_{}]", entity.entity_type.as_str(), hash)
79 });
80
81 Ok(AnonymizedResult {
82 text: anonymized_text,
83 entities,
84 tokens: None,
85 })
86 }
87}
88
89mod hex {
91 pub fn encode(bytes: &[u8]) -> String {
92 bytes.iter().map(|b| format!("{:02x}", b)).collect()
93 }
94}
95
96#[cfg(test)]
97mod tests {
98 use super::*;
99 use crate::types::EntityType;
100
101 #[test]
102 fn test_hash_anonymizer() {
103 let anonymizer = HashAnonymizer::new();
104 let text = "Email: john@example.com";
105 let entities = vec![RecognizerResult::new(
106 EntityType::EmailAddress,
107 7,
108 23,
109 0.9,
110 "test",
111 )];
112 let config = AnonymizerConfig::default();
113
114 let result = anonymizer.anonymize(text, entities, &config).unwrap();
115
116 assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
117 assert!(result.text.ends_with("]"));
118 assert_ne!(result.text, text);
119 }
120
121 #[test]
122 fn test_hash_consistency() {
123 let anonymizer = HashAnonymizer::new();
124 let text = "test@example.com";
125
126 let hash1 = anonymizer.hash_value(text, None);
127 let hash2 = anonymizer.hash_value(text, None);
128
129 assert_eq!(hash1, hash2, "Hash should be consistent");
130 }
131
132 #[test]
133 fn test_hash_with_salt() {
134 let anonymizer = HashAnonymizer::new();
135 let text = "Email: john@example.com";
136 let entities = vec![RecognizerResult::new(
137 EntityType::EmailAddress,
138 7,
139 23,
140 0.9,
141 "test",
142 )];
143 let config = AnonymizerConfig {
144 hash_salt: Some("my_salt".to_string()),
145 ..Default::default()
146 };
147
148 let result = anonymizer.anonymize(text, entities, &config).unwrap();
149
150 assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
151 }
152
153 #[test]
154 fn test_hash_different_values() {
155 let anonymizer = HashAnonymizer::new();
156
157 let hash1 = anonymizer.hash_value("test1@example.com", None);
158 let hash2 = anonymizer.hash_value("test2@example.com", None);
159
160 assert_ne!(
161 hash1, hash2,
162 "Different values should produce different hashes"
163 );
164 }
165}