Skip to main content

sql_splitter/redactor/strategy/
mod.rs

1//! Redaction strategies for anonymizing data.
2//!
3//! Each strategy implements a different approach to redacting values:
4//! - `null`: Replace with NULL
5//! - `constant`: Replace with a fixed value
6//! - `hash`: One-way hash (deterministic)
7//! - `mask`: Partial masking with pattern
8//! - `shuffle`: Redistribute values within column
9//! - `fake`: Generate realistic fake data
10//! - `skip`: No redaction (passthrough)
11
12mod constant;
13mod fake;
14mod hash;
15mod mask;
16mod null;
17mod shuffle;
18mod skip;
19
20pub use constant::ConstantStrategy;
21pub use fake::FakeStrategy;
22pub use hash::HashStrategy;
23pub use mask::MaskStrategy;
24pub use null::NullStrategy;
25#[allow(unused_imports)] // Public API re-export
26pub use shuffle::ShuffleStrategy;
27#[allow(unused_imports)] // Public API re-export
28pub use skip::SkipStrategy;
29
30use serde::{Deserialize, Serialize};
31
32/// Redaction strategy kind with associated configuration
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34#[serde(tag = "strategy", rename_all = "snake_case")]
35pub enum StrategyKind {
36    /// Replace value with NULL
37    Null,
38
39    /// Replace with a constant value
40    Constant {
41        /// The constant value to use
42        value: String,
43    },
44
45    /// One-way hash (SHA256)
46    Hash {
47        /// Preserve email domain (user@domain.com → hash@domain.com)
48        #[serde(default)]
49        preserve_domain: bool,
50    },
51
52    /// Partial masking with pattern
53    Mask {
54        /// Pattern: * = asterisk, X = keep, # = random digit
55        pattern: String,
56    },
57
58    /// Shuffle values within the column
59    Shuffle,
60
61    /// Generate fake data
62    Fake {
63        /// Generator name: email, name, phone, address, etc.
64        generator: String,
65    },
66
67    /// No redaction (passthrough)
68    #[default]
69    Skip,
70}
71
72impl StrategyKind {
73    /// Validate the strategy configuration
74    pub fn validate(&self) -> anyhow::Result<()> {
75        match self {
76            StrategyKind::Null => Ok(()),
77            StrategyKind::Constant { value } => {
78                if value.is_empty() {
79                    anyhow::bail!("Constant strategy requires a non-empty value");
80                }
81                Ok(())
82            }
83            StrategyKind::Hash { .. } => Ok(()),
84            StrategyKind::Mask { pattern } => {
85                if pattern.is_empty() {
86                    anyhow::bail!("Mask strategy requires a non-empty pattern");
87                }
88                // Validate pattern characters
89                for c in pattern.chars() {
90                    if !matches!(c, '*' | 'X' | '#' | '-' | ' ' | '.' | '@' | '(' | ')') {
91                        // Allow common separator chars
92                    }
93                }
94                Ok(())
95            }
96            StrategyKind::Shuffle => Ok(()),
97            StrategyKind::Fake { generator } => {
98                if !is_valid_generator(generator) {
99                    anyhow::bail!("Unknown fake generator: {}. Use: email, name, first_name, last_name, phone, address, city, zip, company, ip, uuid, date, etc.", generator);
100                }
101                Ok(())
102            }
103            StrategyKind::Skip => Ok(()),
104        }
105    }
106}
107
108/// Check if a fake generator name is valid
109fn is_valid_generator(name: &str) -> bool {
110    matches!(
111        name.to_lowercase().as_str(),
112        "email"
113            | "safe_email"
114            | "name"
115            | "first_name"
116            | "last_name"
117            | "full_name"
118            | "phone"
119            | "phone_number"
120            | "address"
121            | "street_address"
122            | "city"
123            | "state"
124            | "zip"
125            | "zip_code"
126            | "postal_code"
127            | "country"
128            | "company"
129            | "company_name"
130            | "job_title"
131            | "username"
132            | "user_name"
133            | "url"
134            | "ip"
135            | "ip_address"
136            | "ipv4"
137            | "ipv6"
138            | "uuid"
139            | "date"
140            | "date_time"
141            | "datetime"
142            | "time"
143            | "credit_card"
144            | "iban"
145            | "lorem"
146            | "paragraph"
147            | "sentence"
148            | "word"
149            | "ssn"
150    )
151}
152
153/// Value representation for redaction
154#[derive(Debug, Clone)]
155pub enum RedactValue {
156    /// NULL value
157    Null,
158    /// String value (may contain SQL escaping)
159    String(String),
160    /// Integer value
161    Integer(i64),
162    /// Raw bytes (for binary data)
163    Bytes(Vec<u8>),
164}
165
166impl RedactValue {
167    /// Check if this is a NULL value
168    pub fn is_null(&self) -> bool {
169        matches!(self, RedactValue::Null)
170    }
171
172    /// Get as string, or None if NULL
173    pub fn as_str(&self) -> Option<&str> {
174        match self {
175            RedactValue::String(s) => Some(s),
176            _ => None,
177        }
178    }
179}
180
181/// Trait for redaction strategies
182pub trait Strategy: Send + Sync {
183    /// Apply the strategy to redact a value
184    fn apply(&self, value: &RedactValue, rng: &mut dyn rand::RngCore) -> RedactValue;
185
186    /// Get the strategy kind
187    fn kind(&self) -> StrategyKind;
188}