Skip to main content

redact_core/anonymizers/
hash.rs

1// Copyright (c) 2026 Censgate LLC.
2// Licensed under the Business Source License 1.1 (BUSL-1.1).
3// See the LICENSE file in the project root for license details,
4// including the Additional Use Grant, Change Date, and Change License.
5
6use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
7use crate::types::{AnonymizedResult, RecognizerResult};
8use anyhow::Result;
9use sha2::{Digest, Sha256};
10
11/// Hash anonymizer for irreversible anonymization
12#[derive(Debug, Clone)]
13pub struct HashAnonymizer {
14    algorithm: HashAlgorithm,
15}
16
17#[derive(Debug, Clone, Copy)]
18pub enum HashAlgorithm {
19    Sha256,
20    Blake3,
21}
22
23impl HashAnonymizer {
24    pub fn new() -> Self {
25        Self {
26            algorithm: HashAlgorithm::Sha256,
27        }
28    }
29
30    pub fn with_algorithm(mut self, algorithm: HashAlgorithm) -> Self {
31        self.algorithm = algorithm;
32        self
33    }
34
35    fn hash_value(&self, value: &str, salt: Option<&str>) -> String {
36        let input = if let Some(salt) = salt {
37            format!("{}{}", value, salt)
38        } else {
39            value.to_string()
40        };
41
42        match self.algorithm {
43            HashAlgorithm::Sha256 => {
44                let mut hasher = Sha256::new();
45                hasher.update(input.as_bytes());
46                let result = hasher.finalize();
47                hex::encode(&result[..8]) // Use first 8 bytes for readability
48            }
49            HashAlgorithm::Blake3 => {
50                let hash = blake3::hash(input.as_bytes());
51                hex::encode(&hash.as_bytes()[..8]) // Use first 8 bytes for readability
52            }
53        }
54    }
55}
56
57impl Default for HashAnonymizer {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl Anonymizer for HashAnonymizer {
64    fn name(&self) -> &str {
65        "HashAnonymizer"
66    }
67
68    fn anonymize(
69        &self,
70        text: &str,
71        entities: Vec<RecognizerResult>,
72        config: &AnonymizerConfig,
73    ) -> Result<AnonymizedResult> {
74        let salt = config.hash_salt.as_deref();
75
76        let anonymized_text = apply_anonymization(text, &entities, |entity, original| {
77            let hash = self.hash_value(original, salt);
78            format!("[{}_{}]", entity.entity_type.as_str(), hash)
79        });
80
81        Ok(AnonymizedResult {
82            text: anonymized_text,
83            entities,
84            tokens: None,
85        })
86    }
87}
88
89// Add hex dependency placeholder (we'll add it to Cargo.toml)
90mod hex {
91    pub fn encode(bytes: &[u8]) -> String {
92        bytes.iter().map(|b| format!("{:02x}", b)).collect()
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99    use crate::types::EntityType;
100
101    #[test]
102    fn test_hash_anonymizer() {
103        let anonymizer = HashAnonymizer::new();
104        let text = "Email: john@example.com";
105        let entities = vec![RecognizerResult::new(
106            EntityType::EmailAddress,
107            7,
108            23,
109            0.9,
110            "test",
111        )];
112        let config = AnonymizerConfig::default();
113
114        let result = anonymizer.anonymize(text, entities, &config).unwrap();
115
116        assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
117        assert!(result.text.ends_with("]"));
118        assert_ne!(result.text, text);
119    }
120
121    #[test]
122    fn test_hash_consistency() {
123        let anonymizer = HashAnonymizer::new();
124        let text = "test@example.com";
125        let config = AnonymizerConfig::default();
126
127        let hash1 = anonymizer.hash_value(text, None);
128        let hash2 = anonymizer.hash_value(text, None);
129
130        assert_eq!(hash1, hash2, "Hash should be consistent");
131    }
132
133    #[test]
134    fn test_hash_with_salt() {
135        let anonymizer = HashAnonymizer::new();
136        let text = "Email: john@example.com";
137        let entities = vec![RecognizerResult::new(
138            EntityType::EmailAddress,
139            7,
140            23,
141            0.9,
142            "test",
143        )];
144        let config = AnonymizerConfig {
145            hash_salt: Some("my_salt".to_string()),
146            ..Default::default()
147        };
148
149        let result = anonymizer.anonymize(text, entities, &config).unwrap();
150
151        assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
152    }
153
154    #[test]
155    fn test_hash_different_values() {
156        let anonymizer = HashAnonymizer::new();
157
158        let hash1 = anonymizer.hash_value("test1@example.com", None);
159        let hash2 = anonymizer.hash_value("test2@example.com", None);
160
161        assert_ne!(
162            hash1, hash2,
163            "Different values should produce different hashes"
164        );
165    }
166}