Skip to main content

redact_core/anonymizers/
mod.rs

1// Copyright (c) 2026 Censgate LLC.
2// Licensed under the Business Source License 1.1 (BUSL-1.1).
3// See the LICENSE file in the project root for license details,
4// including the Additional Use Grant, Change Date, and Change License.
5
6pub mod encrypt;
7pub mod hash;
8pub mod mask;
9pub mod registry;
10pub mod replace;
11
12pub use registry::AnonymizerRegistry;
13
14use crate::types::{AnonymizedResult, RecognizerResult};
15use anyhow::Result;
16use serde::{Deserialize, Serialize};
17use std::fmt::Debug;
18
19/// Strategy for anonymization
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
21#[serde(rename_all = "lowercase")]
22pub enum AnonymizationStrategy {
23    /// Simple text replacement
24    #[default]
25    Replace,
26    /// Partial masking (e.g., ***@***.com)
27    Mask,
28    /// Irreversible hashing
29    Hash,
30    /// Reversible encryption
31    Encrypt,
32    /// Remove entirely
33    Redact,
34}
35
36/// Configuration for anonymization
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct AnonymizerConfig {
39    /// Default strategy to use
40    pub strategy: AnonymizationStrategy,
41
42    /// Masking character (for mask strategy)
43    #[serde(default = "default_mask_char")]
44    pub mask_char: char,
45
46    /// Number of characters to show at start (for mask strategy)
47    #[serde(default)]
48    pub mask_start_chars: usize,
49
50    /// Number of characters to show at end (for mask strategy)
51    #[serde(default)]
52    pub mask_end_chars: usize,
53
54    /// Encryption key (for encrypt strategy)
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub encryption_key: Option<String>,
57
58    /// Salt for hashing (for hash strategy)
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub hash_salt: Option<String>,
61
62    /// Whether to preserve format (e.g., XXX-XX-XXXX for SSN)
63    #[serde(default)]
64    pub preserve_format: bool,
65}
66
67fn default_mask_char() -> char {
68    '*'
69}
70
71impl Default for AnonymizerConfig {
72    fn default() -> Self {
73        Self {
74            strategy: AnonymizationStrategy::Replace,
75            mask_char: '*',
76            mask_start_chars: 0,
77            mask_end_chars: 0,
78            encryption_key: None,
79            hash_salt: None,
80            preserve_format: false,
81        }
82    }
83}
84
85/// Trait for all anonymizers
86pub trait Anonymizer: Send + Sync + Debug {
87    /// Get the name of this anonymizer
88    fn name(&self) -> &str;
89
90    /// Anonymize text based on recognized entities
91    fn anonymize(
92        &self,
93        text: &str,
94        entities: Vec<RecognizerResult>,
95        config: &AnonymizerConfig,
96    ) -> Result<AnonymizedResult>;
97}
98
99/// Helper to apply anonymization to text
100pub fn apply_anonymization(
101    text: &str,
102    entities: &[RecognizerResult],
103    replacement_fn: impl Fn(&RecognizerResult, &str) -> String,
104) -> String {
105    if entities.is_empty() {
106        return text.to_string();
107    }
108
109    let mut result = String::with_capacity(text.len());
110    let mut last_end = 0;
111
112    // Sort entities by start position
113    let mut sorted_entities = entities.to_vec();
114    sorted_entities.sort_by_key(|e| e.start);
115
116    for entity in sorted_entities {
117        // Add text before this entity
118        if entity.start > last_end {
119            result.push_str(&text[last_end..entity.start]);
120        }
121
122        // Get original text
123        let original = if entity.end <= text.len() {
124            &text[entity.start..entity.end]
125        } else {
126            ""
127        };
128
129        // Add replacement
130        result.push_str(&replacement_fn(&entity, original));
131
132        last_end = entity.end;
133    }
134
135    // Add remaining text
136    if last_end < text.len() {
137        result.push_str(&text[last_end..]);
138    }
139
140    result
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use crate::types::EntityType;
147
148    #[test]
149    fn test_apply_anonymization() {
150        let text = "Email: john@example.com, Phone: 555-1234";
151        let entities = vec![
152            RecognizerResult::new(EntityType::EmailAddress, 7, 23, 0.9, "test"),
153            RecognizerResult::new(EntityType::PhoneNumber, 32, 40, 0.8, "test"), // Fixed positions
154        ];
155
156        let result = apply_anonymization(text, &entities, |e, _| {
157            format!("[{}]", e.entity_type.as_str())
158        });
159
160        assert_eq!(result, "Email: [EMAIL_ADDRESS], Phone: [PHONE_NUMBER]");
161    }
162
163    #[test]
164    fn test_apply_anonymization_empty() {
165        let text = "No PII here";
166        let entities = vec![];
167
168        let result = apply_anonymization(text, &entities, |e, _| {
169            format!("[{}]", e.entity_type.as_str())
170        });
171
172        assert_eq!(result, text);
173    }
174
175    #[test]
176    fn test_apply_anonymization_adjacent() {
177        let text = "AB";
178        let entities = vec![
179            RecognizerResult::new(EntityType::Person, 0, 1, 0.9, "test"),
180            RecognizerResult::new(EntityType::Person, 1, 2, 0.9, "test"),
181        ];
182
183        let result = apply_anonymization(text, &entities, |_, _| "X".to_string());
184
185        assert_eq!(result, "XX");
186    }
187}