Skip to main content

sql_splitter/redactor/strategy/
mod.rs

1//! Redaction strategies for anonymizing data.
2//!
3//! Each strategy implements a different approach to redacting values:
4//! - `null`: Replace with NULL
5//! - `constant`: Replace with a fixed value
6//! - `hash`: One-way hash (deterministic)
7//! - `mask`: Partial masking with pattern
8//! - `shuffle`: Redistribute values within column
9//! - `fake`: Generate realistic fake data
10//! - `skip`: No redaction (passthrough)
11
12mod constant;
13mod fake;
14mod hash;
15mod mask;
16mod null;
17mod shuffle;
18mod skip;
19
20// Strategy structs - will be used in Phase 3 when INSERT/COPY rewriting is implemented
21#[allow(unused_imports)]
22pub use constant::ConstantStrategy;
23#[allow(unused_imports)]
24pub use fake::FakeStrategy;
25#[allow(unused_imports)]
26pub use hash::HashStrategy;
27#[allow(unused_imports)]
28pub use mask::MaskStrategy;
29#[allow(unused_imports)]
30pub use null::NullStrategy;
31#[allow(unused_imports)]
32pub use shuffle::ShuffleStrategy;
33#[allow(unused_imports)]
34pub use skip::SkipStrategy;
35
36use serde::{Deserialize, Serialize};
37
38/// Redaction strategy kind with associated configuration
39#[derive(Debug, Clone, Default, Serialize, Deserialize)]
40#[serde(tag = "strategy", rename_all = "snake_case")]
41pub enum StrategyKind {
42    /// Replace value with NULL
43    Null,
44
45    /// Replace with a constant value
46    Constant {
47        /// The constant value to use
48        value: String,
49    },
50
51    /// One-way hash (SHA256)
52    Hash {
53        /// Preserve email domain (user@domain.com → hash@domain.com)
54        #[serde(default)]
55        preserve_domain: bool,
56    },
57
58    /// Partial masking with pattern
59    Mask {
60        /// Pattern: * = asterisk, X = keep, # = random digit
61        pattern: String,
62    },
63
64    /// Shuffle values within the column
65    Shuffle,
66
67    /// Generate fake data
68    Fake {
69        /// Generator name: email, name, phone, address, etc.
70        generator: String,
71    },
72
73    /// No redaction (passthrough)
74    #[default]
75    Skip,
76}
77
78impl StrategyKind {
79    /// Validate the strategy configuration
80    pub fn validate(&self) -> anyhow::Result<()> {
81        match self {
82            StrategyKind::Null => Ok(()),
83            StrategyKind::Constant { value } => {
84                if value.is_empty() {
85                    anyhow::bail!("Constant strategy requires a non-empty value");
86                }
87                Ok(())
88            }
89            StrategyKind::Hash { .. } => Ok(()),
90            StrategyKind::Mask { pattern } => {
91                if pattern.is_empty() {
92                    anyhow::bail!("Mask strategy requires a non-empty pattern");
93                }
94                // Validate pattern characters
95                for c in pattern.chars() {
96                    if !matches!(c, '*' | 'X' | '#' | '-' | ' ' | '.' | '@' | '(' | ')') {
97                        // Allow common separator chars
98                    }
99                }
100                Ok(())
101            }
102            StrategyKind::Shuffle => Ok(()),
103            StrategyKind::Fake { generator } => {
104                if !is_valid_generator(generator) {
105                    anyhow::bail!("Unknown fake generator: {}. Use: email, name, first_name, last_name, phone, address, city, zip, company, ip, uuid, date, etc.", generator);
106                }
107                Ok(())
108            }
109            StrategyKind::Skip => Ok(()),
110        }
111    }
112
113    /// Get the YAML string representation of this strategy
114    pub fn to_yaml_str(&self) -> &'static str {
115        match self {
116            StrategyKind::Null => "null",
117            StrategyKind::Constant { .. } => "constant",
118            StrategyKind::Hash { .. } => "hash",
119            StrategyKind::Mask { .. } => "mask",
120            StrategyKind::Shuffle => "shuffle",
121            StrategyKind::Fake { .. } => "fake",
122            StrategyKind::Skip => "skip",
123        }
124    }
125}
126
127/// Check if a fake generator name is valid
128fn is_valid_generator(name: &str) -> bool {
129    matches!(
130        name.to_lowercase().as_str(),
131        "email"
132            | "safe_email"
133            | "name"
134            | "first_name"
135            | "last_name"
136            | "full_name"
137            | "phone"
138            | "phone_number"
139            | "address"
140            | "street_address"
141            | "city"
142            | "state"
143            | "zip"
144            | "zip_code"
145            | "postal_code"
146            | "country"
147            | "company"
148            | "company_name"
149            | "job_title"
150            | "username"
151            | "user_name"
152            | "url"
153            | "ip"
154            | "ip_address"
155            | "ipv4"
156            | "ipv6"
157            | "uuid"
158            | "date"
159            | "date_time"
160            | "datetime"
161            | "time"
162            | "credit_card"
163            | "iban"
164            | "lorem"
165            | "paragraph"
166            | "sentence"
167            | "word"
168            | "ssn"
169    )
170}
171
172/// Value representation for redaction
173#[derive(Debug, Clone)]
174pub enum RedactValue {
175    /// NULL value
176    Null,
177    /// String value (may contain SQL escaping)
178    String(String),
179    /// Integer value
180    Integer(i64),
181    /// Raw bytes (for binary data)
182    Bytes(Vec<u8>),
183}
184
185impl RedactValue {
186    /// Check if this is a NULL value
187    pub fn is_null(&self) -> bool {
188        matches!(self, RedactValue::Null)
189    }
190
191    /// Get as string, or None if NULL
192    pub fn as_str(&self) -> Option<&str> {
193        match self {
194            RedactValue::String(s) => Some(s),
195            _ => None,
196        }
197    }
198
199    /// Convert to string representation
200    pub fn to_string_value(&self) -> String {
201        match self {
202            RedactValue::Null => "NULL".to_string(),
203            RedactValue::String(s) => s.clone(),
204            RedactValue::Integer(i) => i.to_string(),
205            RedactValue::Bytes(b) => String::from_utf8_lossy(b).into_owned(),
206        }
207    }
208}
209
210/// Trait for redaction strategies
211pub trait Strategy: Send + Sync {
212    /// Apply the strategy to redact a value
213    fn apply(&self, value: &RedactValue, rng: &mut dyn rand::RngCore) -> RedactValue;
214
215    /// Get the strategy kind
216    fn kind(&self) -> StrategyKind;
217}