sql_splitter/redactor/
config.rs

1//! Configuration types for the redactor.
2
3use crate::parser::SqlDialect;
4use crate::redactor::StrategyKind;
5use serde::{Deserialize, Serialize};
6use std::path::PathBuf;
7
8/// Runtime configuration for redaction
9#[derive(Debug)]
10pub struct RedactConfig {
11    /// Input SQL file
12    pub input: PathBuf,
13    /// Output SQL file (None for stdout)
14    pub output: Option<PathBuf>,
15    /// SQL dialect
16    pub dialect: SqlDialect,
17    /// Redaction rules
18    pub rules: Vec<Rule>,
19    /// Default strategy for unmatched columns
20    pub default_strategy: StrategyKind,
21    /// Random seed for reproducibility
22    pub seed: Option<u64>,
23    /// Locale for fake data
24    pub locale: String,
25    /// Tables to include (None = all)
26    pub tables_filter: Option<Vec<String>>,
27    /// Tables to exclude
28    pub exclude: Vec<String>,
29    /// Fail on warnings
30    pub strict: bool,
31    /// Show progress
32    pub progress: bool,
33    /// Dry run mode
34    pub dry_run: bool,
35}
36
37impl RedactConfig {
38    /// Create a new builder
39    pub fn builder() -> RedactConfigBuilder {
40        RedactConfigBuilder::default()
41    }
42
43    /// Validate the configuration
44    pub fn validate(&self) -> anyhow::Result<()> {
45        // Check that input file exists
46        if !self.input.exists() {
47            anyhow::bail!("Input file not found: {:?}", self.input);
48        }
49
50        // Validate locale
51        if !is_valid_locale(&self.locale) && self.strict {
52            anyhow::bail!("Unsupported locale: {}. Use --locale with a supported value.", self.locale);
53        }
54
55        // Validate rules
56        for rule in &self.rules {
57            rule.validate()?;
58        }
59
60        Ok(())
61    }
62}
63
64/// Check if a locale is valid
65fn is_valid_locale(locale: &str) -> bool {
66    matches!(
67        locale.to_lowercase().as_str(),
68        "en" | "en_us" | "de_de" | "fr_fr" | "zh_cn" | "zh_tw" | "ja_jp" | "pt_br" | "ar_sa"
69    )
70}
71
72/// Builder for RedactConfig
73#[derive(Default)]
74pub struct RedactConfigBuilder {
75    input: Option<PathBuf>,
76    output: Option<PathBuf>,
77    dialect: Option<SqlDialect>,
78    config_file: Option<PathBuf>,
79    null_patterns: Vec<String>,
80    hash_patterns: Vec<String>,
81    fake_patterns: Vec<String>,
82    mask_patterns: Vec<String>,
83    constant_patterns: Vec<String>,
84    seed: Option<u64>,
85    locale: String,
86    tables_filter: Option<Vec<String>>,
87    exclude: Vec<String>,
88    strict: bool,
89    progress: bool,
90    dry_run: bool,
91}
92
93impl RedactConfigBuilder {
94    pub fn input(mut self, path: PathBuf) -> Self {
95        self.input = Some(path);
96        self
97    }
98
99    pub fn output(mut self, path: Option<PathBuf>) -> Self {
100        self.output = path;
101        self
102    }
103
104    pub fn dialect(mut self, dialect: SqlDialect) -> Self {
105        self.dialect = Some(dialect);
106        self
107    }
108
109    pub fn config_file(mut self, path: Option<PathBuf>) -> Self {
110        self.config_file = path;
111        self
112    }
113
114    pub fn null_patterns(mut self, patterns: Vec<String>) -> Self {
115        self.null_patterns = patterns;
116        self
117    }
118
119    pub fn hash_patterns(mut self, patterns: Vec<String>) -> Self {
120        self.hash_patterns = patterns;
121        self
122    }
123
124    pub fn fake_patterns(mut self, patterns: Vec<String>) -> Self {
125        self.fake_patterns = patterns;
126        self
127    }
128
129    pub fn mask_patterns(mut self, patterns: Vec<String>) -> Self {
130        self.mask_patterns = patterns;
131        self
132    }
133
134    pub fn constant_patterns(mut self, patterns: Vec<String>) -> Self {
135        self.constant_patterns = patterns;
136        self
137    }
138
139    pub fn seed(mut self, seed: Option<u64>) -> Self {
140        self.seed = seed;
141        self
142    }
143
144    pub fn locale(mut self, locale: String) -> Self {
145        self.locale = locale;
146        self
147    }
148
149    pub fn tables_filter(mut self, tables: Option<Vec<String>>) -> Self {
150        self.tables_filter = tables;
151        self
152    }
153
154    pub fn exclude(mut self, exclude: Vec<String>) -> Self {
155        self.exclude = exclude;
156        self
157    }
158
159    pub fn strict(mut self, strict: bool) -> Self {
160        self.strict = strict;
161        self
162    }
163
164    pub fn progress(mut self, progress: bool) -> Self {
165        self.progress = progress;
166        self
167    }
168
169    pub fn dry_run(mut self, dry_run: bool) -> Self {
170        self.dry_run = dry_run;
171        self
172    }
173
174    /// Build the RedactConfig
175    pub fn build(self) -> anyhow::Result<RedactConfig> {
176        let input = self.input.ok_or_else(|| anyhow::anyhow!("Input file is required"))?;
177        let dialect = self.dialect.unwrap_or(SqlDialect::MySql);
178        let locale = if self.locale.is_empty() { "en".to_string() } else { self.locale };
179
180        // Load YAML config if specified
181        let yaml_config = if let Some(ref path) = self.config_file {
182            Some(RedactYamlConfig::load(path)?)
183        } else {
184            None
185        };
186
187        // Build rules from YAML + CLI patterns
188        let mut rules = Vec::new();
189
190        // Add rules from YAML config
191        if let Some(ref yaml) = yaml_config {
192            rules.extend(yaml.rules.clone());
193        }
194
195        // Add CLI patterns as rules (CLI takes precedence)
196        for pattern in &self.null_patterns {
197            rules.push(Rule {
198                column: pattern.clone(),
199                strategy: StrategyKind::Null,
200            });
201        }
202
203        for pattern in &self.hash_patterns {
204            rules.push(Rule {
205                column: pattern.clone(),
206                strategy: StrategyKind::Hash {
207                    preserve_domain: false,
208                },
209            });
210        }
211
212        for pattern in &self.fake_patterns {
213            rules.push(Rule {
214                column: pattern.clone(),
215                strategy: StrategyKind::Fake {
216                    generator: "name".to_string(),
217                },
218            });
219        }
220
221        for pattern in &self.mask_patterns {
222            // Parse "pattern=column" format
223            if let Some((mask_pattern, column)) = pattern.split_once('=') {
224                rules.push(Rule {
225                    column: column.to_string(),
226                    strategy: StrategyKind::Mask {
227                        pattern: mask_pattern.to_string(),
228                    },
229                });
230            }
231        }
232
233        for pattern in &self.constant_patterns {
234            // Parse "column=value" format
235            if let Some((column, value)) = pattern.split_once('=') {
236                rules.push(Rule {
237                    column: column.to_string(),
238                    strategy: StrategyKind::Constant {
239                        value: value.to_string(),
240                    },
241                });
242            }
243        }
244
245        // Determine default strategy
246        let default_strategy = yaml_config
247            .as_ref()
248            .and_then(|y| y.defaults.as_ref())
249            .map(|d| d.strategy.clone())
250            .unwrap_or(StrategyKind::Skip);
251
252        // Merge seed (CLI overrides YAML)
253        let seed = self.seed.or_else(|| yaml_config.as_ref().and_then(|y| y.seed));
254
255        // Merge locale (CLI overrides YAML)
256        let locale = if locale != "en" {
257            locale
258        } else {
259            yaml_config
260                .as_ref()
261                .and_then(|y| y.locale.clone())
262                .unwrap_or(locale)
263        };
264
265        // Merge skip_tables
266        let mut exclude = self.exclude;
267        if let Some(ref yaml) = yaml_config {
268            if let Some(ref skip) = yaml.skip_tables {
269                exclude.extend(skip.iter().cloned());
270            }
271        }
272
273        Ok(RedactConfig {
274            input,
275            output: self.output,
276            dialect,
277            rules,
278            default_strategy,
279            seed,
280            locale,
281            tables_filter: self.tables_filter,
282            exclude,
283            strict: self.strict,
284            progress: self.progress,
285            dry_run: self.dry_run,
286        })
287    }
288}
289
290/// A redaction rule
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct Rule {
293    /// Column pattern (glob pattern like "*.email" or "users.ssn")
294    pub column: String,
295    /// Redaction strategy
296    #[serde(flatten)]
297    pub strategy: StrategyKind,
298}
299
300impl Rule {
301    /// Validate the rule
302    pub fn validate(&self) -> anyhow::Result<()> {
303        if self.column.is_empty() {
304            anyhow::bail!("Rule column pattern cannot be empty");
305        }
306        self.strategy.validate()
307    }
308}
309
310/// YAML configuration file structure
311#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct RedactYamlConfig {
313    /// Random seed for reproducibility
314    #[serde(skip_serializing_if = "Option::is_none")]
315    pub seed: Option<u64>,
316
317    /// Locale for fake data
318    #[serde(skip_serializing_if = "Option::is_none")]
319    pub locale: Option<String>,
320
321    /// Default settings
322    #[serde(skip_serializing_if = "Option::is_none")]
323    pub defaults: Option<Defaults>,
324
325    /// Redaction rules
326    #[serde(default)]
327    pub rules: Vec<Rule>,
328
329    /// Tables to skip entirely
330    #[serde(skip_serializing_if = "Option::is_none")]
331    pub skip_tables: Option<Vec<String>>,
332}
333
334impl RedactYamlConfig {
335    /// Load configuration from a YAML file
336    pub fn load(path: &PathBuf) -> anyhow::Result<Self> {
337        let content = std::fs::read_to_string(path)?;
338        let config: Self = serde_yaml::from_str(&content)?;
339        Ok(config)
340    }
341
342    /// Save configuration to a YAML file
343    pub fn save(&self, path: &PathBuf) -> anyhow::Result<()> {
344        let content = serde_yaml::to_string(self)?;
345        std::fs::write(path, content)?;
346        Ok(())
347    }
348}
349
350/// Default settings in YAML config
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct Defaults {
353    /// Default strategy for columns not matching any rule
354    pub strategy: StrategyKind,
355}