Skip to main content

redact_core/anonymizers/
hash.rs

1// Copyright (c) 2026 Censgate LLC.
2// Licensed under the Business Source License 1.1 (BUSL-1.1).
3// See the LICENSE file in the project root for license details,
4// including the Additional Use Grant, Change Date, and Change License.
5
6use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
7use crate::types::{AnonymizedResult, RecognizerResult};
8use anyhow::Result;
9use sha2::{Digest, Sha256};
10
11/// Hash anonymizer for irreversible anonymization
12#[derive(Debug, Clone)]
13pub struct HashAnonymizer {
14    algorithm: HashAlgorithm,
15}
16
17#[derive(Debug, Clone, Copy)]
18pub enum HashAlgorithm {
19    Sha256,
20    Blake3,
21}
22
23impl HashAnonymizer {
24    pub fn new() -> Self {
25        Self {
26            algorithm: HashAlgorithm::Sha256,
27        }
28    }
29
30    pub fn with_algorithm(mut self, algorithm: HashAlgorithm) -> Self {
31        self.algorithm = algorithm;
32        self
33    }
34
35    fn hash_value(&self, value: &str, salt: Option<&str>) -> String {
36        let input = if let Some(salt) = salt {
37            format!("{}{}", value, salt)
38        } else {
39            value.to_string()
40        };
41
42        match self.algorithm {
43            HashAlgorithm::Sha256 => {
44                let mut hasher = Sha256::new();
45                hasher.update(input.as_bytes());
46                let result = hasher.finalize();
47                hex::encode(&result[..8]) // Use first 8 bytes for readability
48            }
49            HashAlgorithm::Blake3 => {
50                let hash = blake3::hash(input.as_bytes());
51                hex::encode(&hash.as_bytes()[..8]) // Use first 8 bytes for readability
52            }
53        }
54    }
55}
56
57impl Default for HashAnonymizer {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl Anonymizer for HashAnonymizer {
64    fn name(&self) -> &str {
65        "HashAnonymizer"
66    }
67
68    fn anonymize(
69        &self,
70        text: &str,
71        entities: Vec<RecognizerResult>,
72        config: &AnonymizerConfig,
73    ) -> Result<AnonymizedResult> {
74        let salt = config.hash_salt.as_deref();
75
76        let anonymized_text = apply_anonymization(text, &entities, |entity, original| {
77            let hash = self.hash_value(original, salt);
78            format!("[{}_{}]", entity.entity_type.as_str(), hash)
79        });
80
81        Ok(AnonymizedResult {
82            text: anonymized_text,
83            entities,
84            tokens: None,
85        })
86    }
87}
88
89// Add hex dependency placeholder (we'll add it to Cargo.toml)
90mod hex {
91    pub fn encode(bytes: &[u8]) -> String {
92        bytes.iter().map(|b| format!("{:02x}", b)).collect()
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99    use crate::types::EntityType;
100
101    #[test]
102    fn test_hash_anonymizer() {
103        let anonymizer = HashAnonymizer::new();
104        let text = "Email: john@example.com";
105        let entities = vec![RecognizerResult::new(
106            EntityType::EmailAddress,
107            7,
108            23,
109            0.9,
110            "test",
111        )];
112        let config = AnonymizerConfig::default();
113
114        let result = anonymizer.anonymize(text, entities, &config).unwrap();
115
116        assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
117        assert!(result.text.ends_with("]"));
118        assert_ne!(result.text, text);
119    }
120
121    #[test]
122    fn test_hash_consistency() {
123        let anonymizer = HashAnonymizer::new();
124        let text = "test@example.com";
125
126        let hash1 = anonymizer.hash_value(text, None);
127        let hash2 = anonymizer.hash_value(text, None);
128
129        assert_eq!(hash1, hash2, "Hash should be consistent");
130    }
131
132    #[test]
133    fn test_hash_with_salt() {
134        let anonymizer = HashAnonymizer::new();
135        let text = "Email: john@example.com";
136        let entities = vec![RecognizerResult::new(
137            EntityType::EmailAddress,
138            7,
139            23,
140            0.9,
141            "test",
142        )];
143        let config = AnonymizerConfig {
144            hash_salt: Some("my_salt".to_string()),
145            ..Default::default()
146        };
147
148        let result = anonymizer.anonymize(text, entities, &config).unwrap();
149
150        assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
151    }
152
153    #[test]
154    fn test_hash_different_values() {
155        let anonymizer = HashAnonymizer::new();
156
157        let hash1 = anonymizer.hash_value("test1@example.com", None);
158        let hash2 = anonymizer.hash_value("test2@example.com", None);
159
160        assert_ne!(
161            hash1, hash2,
162            "Different values should produce different hashes"
163        );
164    }
165}