Skip to main content

redact_core/anonymizers/
hash.rs

1// Copyright 2026 Censgate LLC.
2// Licensed under the Apache License, Version 2.0. See the LICENSE file
3// in the project root for license information.
4
5use super::{apply_anonymization, Anonymizer, AnonymizerConfig};
6use crate::types::{AnonymizedResult, RecognizerResult};
7use anyhow::Result;
8use sha2::{Digest, Sha256};
9
10/// Hash anonymizer for irreversible anonymization
11#[derive(Debug, Clone)]
12pub struct HashAnonymizer {
13    algorithm: HashAlgorithm,
14}
15
16#[derive(Debug, Clone, Copy)]
17pub enum HashAlgorithm {
18    Sha256,
19    Blake3,
20}
21
22impl HashAnonymizer {
23    pub fn new() -> Self {
24        Self {
25            algorithm: HashAlgorithm::Sha256,
26        }
27    }
28
29    pub fn with_algorithm(mut self, algorithm: HashAlgorithm) -> Self {
30        self.algorithm = algorithm;
31        self
32    }
33
34    fn hash_value(&self, value: &str, salt: Option<&str>) -> String {
35        let input = if let Some(salt) = salt {
36            format!("{}{}", value, salt)
37        } else {
38            value.to_string()
39        };
40
41        match self.algorithm {
42            HashAlgorithm::Sha256 => {
43                let mut hasher = Sha256::new();
44                hasher.update(input.as_bytes());
45                let result = hasher.finalize();
46                hex::encode(&result[..8]) // Use first 8 bytes for readability
47            }
48            HashAlgorithm::Blake3 => {
49                let hash = blake3::hash(input.as_bytes());
50                hex::encode(&hash.as_bytes()[..8]) // Use first 8 bytes for readability
51            }
52        }
53    }
54}
55
56impl Default for HashAnonymizer {
57    fn default() -> Self {
58        Self::new()
59    }
60}
61
62impl Anonymizer for HashAnonymizer {
63    fn name(&self) -> &str {
64        "HashAnonymizer"
65    }
66
67    fn anonymize(
68        &self,
69        text: &str,
70        entities: Vec<RecognizerResult>,
71        config: &AnonymizerConfig,
72    ) -> Result<AnonymizedResult> {
73        let salt = config.hash_salt.as_deref();
74
75        let anonymized_text = apply_anonymization(text, &entities, |entity, original| {
76            let hash = self.hash_value(original, salt);
77            format!("[{}_{}]", entity.entity_type.as_str(), hash)
78        });
79
80        Ok(AnonymizedResult {
81            text: anonymized_text,
82            entities,
83            tokens: None,
84        })
85    }
86}
87
88// Add hex dependency placeholder (we'll add it to Cargo.toml)
89mod hex {
90    pub fn encode(bytes: &[u8]) -> String {
91        bytes.iter().map(|b| format!("{:02x}", b)).collect()
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use crate::types::EntityType;
99
100    #[test]
101    fn test_hash_anonymizer() {
102        let anonymizer = HashAnonymizer::new();
103        let text = "Email: john@example.com";
104        let entities = vec![RecognizerResult::new(
105            EntityType::EmailAddress,
106            7,
107            23,
108            0.9,
109            "test",
110        )];
111        let config = AnonymizerConfig::default();
112
113        let result = anonymizer.anonymize(text, entities, &config).unwrap();
114
115        assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
116        assert!(result.text.ends_with("]"));
117        assert_ne!(result.text, text);
118    }
119
120    #[test]
121    fn test_hash_consistency() {
122        let anonymizer = HashAnonymizer::new();
123        let text = "test@example.com";
124
125        let hash1 = anonymizer.hash_value(text, None);
126        let hash2 = anonymizer.hash_value(text, None);
127
128        assert_eq!(hash1, hash2, "Hash should be consistent");
129    }
130
131    #[test]
132    fn test_hash_with_salt() {
133        let anonymizer = HashAnonymizer::new();
134        let text = "Email: john@example.com";
135        let entities = vec![RecognizerResult::new(
136            EntityType::EmailAddress,
137            7,
138            23,
139            0.9,
140            "test",
141        )];
142        let config = AnonymizerConfig {
143            hash_salt: Some("my_salt".to_string()),
144            ..Default::default()
145        };
146
147        let result = anonymizer.anonymize(text, entities, &config).unwrap();
148
149        assert!(result.text.starts_with("Email: [EMAIL_ADDRESS_"));
150    }
151
152    #[test]
153    fn test_hash_different_values() {
154        let anonymizer = HashAnonymizer::new();
155
156        let hash1 = anonymizer.hash_value("test1@example.com", None);
157        let hash2 = anonymizer.hash_value("test2@example.com", None);
158
159        assert_ne!(
160            hash1, hash2,
161            "Different values should produce different hashes"
162        );
163    }
164}