redact_core/anonymizers/
hash.rs1use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
6use crate::types::{AnonymizedResult, RecognizerResult};
7use anyhow::Result;
8use sha2::{Digest, Sha256};
9
10#[derive(Debug, Clone)]
12pub struct HashAnonymizer {
13 algorithm: HashAlgorithm,
14}
15
16#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18 Sha256,
19 Blake3,
20}
21
22impl HashAnonymizer {
23 pub fn new() -> Self {
24 Self {
25 algorithm: HashAlgorithm::Sha256,
26 }
27 }
28
29 pub fn with_algorithm(mut self, algorithm: HashAlgorithm) -> Self {
30 self.algorithm = algorithm;
31 self
32 }
33
34 fn hash_value(&self, value: &str, salt: Option<&str>) -> String {
35 let input = if let Some(salt) = salt {
36 format!("{}{}", value, salt)
37 } else {
38 value.to_string()
39 };
40
41 match self.algorithm {
42 HashAlgorithm::Sha256 => {
43 let mut hasher = Sha256::new();
44 hasher.update(input.as_bytes());
45 let result = hasher.finalize();
46 hex::encode(&result[..8]) }
48 HashAlgorithm::Blake3 => {
49 let hash = blake3::hash(input.as_bytes());
50 hex::encode(&hash.as_bytes()[..8]) }
52 }
53 }
54}
55
56impl Default for HashAnonymizer {
57 fn default() -> Self {
58 Self::new()
59 }
60}
61
62impl Anonymizer for HashAnonymizer {
63 fn name(&self) -> &str {
64 "HashAnonymizer"
65 }
66
67 fn anonymize(
68 &self,
69 text: &str,
70 entities: Vec<RecognizerResult>,
71 config: &AnonymizerConfig,
72 ) -> Result<AnonymizedResult> {
73 let salt = config.hash_salt.as_deref();
74
75 let anonymized_text = apply_anonymization(text, &entities, |entity, original| {
76 let hash = self.hash_value(original, salt);
77 format!("[{}_{}]", entity.entity_type.as_str(), hash)
78 });
79
80 Ok(AnonymizedResult {
81 text: anonymized_text,
82 entities,
83 tokens: None,
84 })
85 }
86}
87
88mod hex {
90 pub fn encode(bytes: &[u8]) -> String {
91 bytes.iter().map(|b| format!("{:02x}", b)).collect()
92 }
93}
94
95#[cfg(test)]
96mod tests {
97 use super::*;
98 use crate::types::EntityType;
99
100 #[test]
101 fn test_hash_anonymizer() {
102 let anonymizer = HashAnonymizer::new();
103 let text = "Email: john@example.com";
104 let entities = vec![RecognizerResult::new(
105 EntityType::EmailAddress,
106 7,
107 23,
108 0.9,
109 "test",
110 )];
111 let config = AnonymizerConfig::default();
112
113 let result = anonymizer.anonymize(text, entities, &config).unwrap();
114
115 assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
116 assert!(result.text.ends_with("]"));
117 assert_ne!(result.text, text);
118 }
119
120 #[test]
121 fn test_hash_consistency() {
122 let anonymizer = HashAnonymizer::new();
123 let text = "test@example.com";
124
125 let hash1 = anonymizer.hash_value(text, None);
126 let hash2 = anonymizer.hash_value(text, None);
127
128 assert_eq!(hash1, hash2, "Hash should be consistent");
129 }
130
131 #[test]
132 fn test_hash_with_salt() {
133 let anonymizer = HashAnonymizer::new();
134 let text = "Email: john@example.com";
135 let entities = vec![RecognizerResult::new(
136 EntityType::EmailAddress,
137 7,
138 23,
139 0.9,
140 "test",
141 )];
142 let config = AnonymizerConfig {
143 hash_salt: Some("my_salt".to_string()),
144 ..Default::default()
145 };
146
147 let result = anonymizer.anonymize(text, entities, &config).unwrap();
148
149 assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
150 }
151
152 #[test]
153 fn test_hash_different_values() {
154 let anonymizer = HashAnonymizer::new();
155
156 let hash1 = anonymizer.hash_value("test1@example.com", None);
157 let hash2 = anonymizer.hash_value("test2@example.com", None);
158
159 assert_ne!(
160 hash1, hash2,
161 "Different values should produce different hashes"
162 );
163 }
164}