Skip to main content

redact_core/anonymizers/
mod.rs

1// Copyright 2026 Censgate LLC.
2// Licensed under the Apache License, Version 2.0. See the LICENSE file
3// in the project root for license information.
4
5pub mod encrypt;
6pub mod hash;
7pub mod mask;
8pub mod registry;
9pub mod replace;
10
11pub use registry::AnonymizerRegistry;
12
13use crate::types::{AnonymizedResult, RecognizerResult};
14use anyhow::Result;
15use serde::{Deserialize, Serialize};
16use std::fmt::Debug;
17
18/// Strategy for anonymization
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
20#[serde(rename_all = "lowercase")]
21pub enum AnonymizationStrategy {
22    /// Simple text replacement
23    #[default]
24    Replace,
25    /// Partial masking (e.g., ***@***.com)
26    Mask,
27    /// Irreversible hashing
28    Hash,
29    /// Reversible encryption
30    Encrypt,
31    /// Remove entirely
32    Redact,
33}
34
35/// Configuration for anonymization
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct AnonymizerConfig {
38    /// Default strategy to use
39    pub strategy: AnonymizationStrategy,
40
41    /// Masking character (for mask strategy)
42    #[serde(default = "default_mask_char")]
43    pub mask_char: char,
44
45    /// Number of characters to show at start (for mask strategy)
46    #[serde(default)]
47    pub mask_start_chars: usize,
48
49    /// Number of characters to show at end (for mask strategy)
50    #[serde(default)]
51    pub mask_end_chars: usize,
52
53    /// Encryption key (for encrypt strategy)
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub encryption_key: Option<String>,
56
57    /// Salt for hashing (for hash strategy)
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub hash_salt: Option<String>,
60
61    /// Whether to preserve format (e.g., XXX-XX-XXXX for SSN)
62    #[serde(default)]
63    pub preserve_format: bool,
64}
65
66fn default_mask_char() -> char {
67    '*'
68}
69
70impl Default for AnonymizerConfig {
71    fn default() -> Self {
72        Self {
73            strategy: AnonymizationStrategy::Replace,
74            mask_char: '*',
75            mask_start_chars: 0,
76            mask_end_chars: 0,
77            encryption_key: None,
78            hash_salt: None,
79            preserve_format: false,
80        }
81    }
82}
83
84/// Trait for all anonymizers
85pub trait Anonymizer: Send + Sync + Debug {
86    /// Get the name of this anonymizer
87    fn name(&self) -> &str;
88
89    /// Anonymize text based on recognized entities
90    fn anonymize(
91        &self,
92        text: &str,
93        entities: Vec<RecognizerResult>,
94        config: &AnonymizerConfig,
95    ) -> Result<AnonymizedResult>;
96}
97
98/// Helper to apply anonymization to text
99pub fn apply_anonymization(
100    text: &str,
101    entities: &[RecognizerResult],
102    replacement_fn: impl Fn(&RecognizerResult, &str) -> String,
103) -> String {
104    if entities.is_empty() {
105        return text.to_string();
106    }
107
108    let mut result = String::with_capacity(text.len());
109    let mut last_end = 0;
110
111    // Sort entities by start position
112    let mut sorted_entities = entities.to_vec();
113    sorted_entities.sort_by_key(|e| e.start);
114
115    for entity in sorted_entities {
116        // Add text before this entity
117        if entity.start > last_end {
118            result.push_str(&text[last_end..entity.start]);
119        }
120
121        // Get original text
122        let original = if entity.end <= text.len() {
123            &text[entity.start..entity.end]
124        } else {
125            ""
126        };
127
128        // Add replacement
129        result.push_str(&replacement_fn(&entity, original));
130
131        last_end = entity.end;
132    }
133
134    // Add remaining text
135    if last_end < text.len() {
136        result.push_str(&text[last_end..]);
137    }
138
139    result
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use crate::types::EntityType;
146
147    #[test]
148    fn test_apply_anonymization() {
149        let text = "Email: john@example.com, Phone: 555-1234";
150        let entities = vec![
151            RecognizerResult::new(EntityType::EmailAddress, 7, 23, 0.9, "test"),
152            RecognizerResult::new(EntityType::PhoneNumber, 32, 40, 0.8, "test"), // Fixed positions
153        ];
154
155        let result = apply_anonymization(text, &entities, |e, _| {
156            format!("[{}]", e.entity_type.as_str())
157        });
158
159        assert_eq!(result, "Email: [EMAIL_ADDRESS], Phone: [PHONE_NUMBER]");
160    }
161
162    #[test]
163    fn test_apply_anonymization_empty() {
164        let text = "No PII here";
165        let entities = vec![];
166
167        let result = apply_anonymization(text, &entities, |e, _| {
168            format!("[{}]", e.entity_type.as_str())
169        });
170
171        assert_eq!(result, text);
172    }
173
174    #[test]
175    fn test_apply_anonymization_adjacent() {
176        let text = "AB";
177        let entities = vec![
178            RecognizerResult::new(EntityType::Person, 0, 1, 0.9, "test"),
179            RecognizerResult::new(EntityType::Person, 1, 2, 0.9, "test"),
180        ];
181
182        let result = apply_anonymization(text, &entities, |_, _| "X".to_string());
183
184        assert_eq!(result, "XX");
185    }
186}