Skip to main content

sql_splitter/redactor/
config.rs

1//! Configuration types for the redactor.
2
3use crate::parser::SqlDialect;
4use crate::redactor::StrategyKind;
5use serde::{Deserialize, Serialize};
6use std::path::PathBuf;
7
8/// Runtime configuration for redaction
9#[derive(Debug)]
10pub struct RedactConfig {
11    /// Input SQL file
12    pub input: PathBuf,
13    /// Output SQL file (None for stdout)
14    pub output: Option<PathBuf>,
15    /// SQL dialect
16    pub dialect: SqlDialect,
17    /// Redaction rules
18    pub rules: Vec<Rule>,
19    /// Default strategy for unmatched columns
20    pub default_strategy: StrategyKind,
21    /// Random seed for reproducibility
22    pub seed: Option<u64>,
23    /// Locale for fake data
24    pub locale: String,
25    /// Tables to include (None = all)
26    pub tables_filter: Option<Vec<String>>,
27    /// Tables to exclude
28    pub exclude: Vec<String>,
29    /// Fail on warnings
30    pub strict: bool,
31    /// Show progress
32    pub progress: bool,
33    /// Dry run mode
34    pub dry_run: bool,
35}
36
37impl RedactConfig {
38    /// Create a new builder
39    pub fn builder() -> RedactConfigBuilder {
40        RedactConfigBuilder::default()
41    }
42
43    /// Validate the configuration
44    pub fn validate(&self) -> anyhow::Result<()> {
45        // Check that input file exists
46        if !self.input.exists() {
47            anyhow::bail!("Input file not found: {:?}", self.input);
48        }
49
50        // Validate locale
51        if !is_valid_locale(&self.locale) && self.strict {
52            anyhow::bail!(
53                "Unsupported locale: {}. Use --locale with a supported value.",
54                self.locale
55            );
56        }
57
58        // Validate rules
59        for rule in &self.rules {
60            rule.validate()?;
61        }
62
63        Ok(())
64    }
65}
66
67/// Check if a locale is valid
68fn is_valid_locale(locale: &str) -> bool {
69    matches!(
70        locale.to_lowercase().as_str(),
71        "en" | "en_us" | "de_de" | "fr_fr" | "zh_cn" | "zh_tw" | "ja_jp" | "pt_br" | "ar_sa"
72    )
73}
74
75/// Builder for RedactConfig
76#[derive(Default)]
77pub struct RedactConfigBuilder {
78    input: Option<PathBuf>,
79    output: Option<PathBuf>,
80    dialect: Option<SqlDialect>,
81    config_file: Option<PathBuf>,
82    null_patterns: Vec<String>,
83    hash_patterns: Vec<String>,
84    fake_patterns: Vec<String>,
85    mask_patterns: Vec<String>,
86    constant_patterns: Vec<String>,
87    seed: Option<u64>,
88    locale: String,
89    tables_filter: Option<Vec<String>>,
90    exclude: Vec<String>,
91    strict: bool,
92    progress: bool,
93    dry_run: bool,
94}
95
96impl RedactConfigBuilder {
97    pub fn input(mut self, path: PathBuf) -> Self {
98        self.input = Some(path);
99        self
100    }
101
102    pub fn output(mut self, path: Option<PathBuf>) -> Self {
103        self.output = path;
104        self
105    }
106
107    pub fn dialect(mut self, dialect: SqlDialect) -> Self {
108        self.dialect = Some(dialect);
109        self
110    }
111
112    pub fn config_file(mut self, path: Option<PathBuf>) -> Self {
113        self.config_file = path;
114        self
115    }
116
117    pub fn null_patterns(mut self, patterns: Vec<String>) -> Self {
118        self.null_patterns = patterns;
119        self
120    }
121
122    pub fn hash_patterns(mut self, patterns: Vec<String>) -> Self {
123        self.hash_patterns = patterns;
124        self
125    }
126
127    pub fn fake_patterns(mut self, patterns: Vec<String>) -> Self {
128        self.fake_patterns = patterns;
129        self
130    }
131
132    pub fn mask_patterns(mut self, patterns: Vec<String>) -> Self {
133        self.mask_patterns = patterns;
134        self
135    }
136
137    pub fn constant_patterns(mut self, patterns: Vec<String>) -> Self {
138        self.constant_patterns = patterns;
139        self
140    }
141
142    pub fn seed(mut self, seed: Option<u64>) -> Self {
143        self.seed = seed;
144        self
145    }
146
147    pub fn locale(mut self, locale: String) -> Self {
148        self.locale = locale;
149        self
150    }
151
152    pub fn tables_filter(mut self, tables: Option<Vec<String>>) -> Self {
153        self.tables_filter = tables;
154        self
155    }
156
157    pub fn exclude(mut self, exclude: Vec<String>) -> Self {
158        self.exclude = exclude;
159        self
160    }
161
162    pub fn strict(mut self, strict: bool) -> Self {
163        self.strict = strict;
164        self
165    }
166
167    pub fn progress(mut self, progress: bool) -> Self {
168        self.progress = progress;
169        self
170    }
171
172    pub fn dry_run(mut self, dry_run: bool) -> Self {
173        self.dry_run = dry_run;
174        self
175    }
176
177    /// Build the RedactConfig
178    pub fn build(self) -> anyhow::Result<RedactConfig> {
179        let input = self
180            .input
181            .ok_or_else(|| anyhow::anyhow!("Input file is required"))?;
182        let dialect = self.dialect.unwrap_or(SqlDialect::MySql);
183        let locale = if self.locale.is_empty() {
184            "en".to_string()
185        } else {
186            self.locale
187        };
188
189        // Load YAML config if specified
190        let yaml_config = if let Some(ref path) = self.config_file {
191            Some(RedactYamlConfig::load(path)?)
192        } else {
193            None
194        };
195
196        // Build rules from YAML + CLI patterns
197        let mut rules = Vec::new();
198
199        // Add rules from YAML config
200        if let Some(ref yaml) = yaml_config {
201            rules.extend(yaml.rules.clone());
202        }
203
204        // Add CLI patterns as rules (CLI takes precedence)
205        for pattern in &self.null_patterns {
206            rules.push(Rule {
207                column: pattern.clone(),
208                strategy: StrategyKind::Null,
209            });
210        }
211
212        for pattern in &self.hash_patterns {
213            rules.push(Rule {
214                column: pattern.clone(),
215                strategy: StrategyKind::Hash {
216                    preserve_domain: false,
217                },
218            });
219        }
220
221        for pattern in &self.fake_patterns {
222            rules.push(Rule {
223                column: pattern.clone(),
224                strategy: StrategyKind::Fake {
225                    generator: "name".to_string(),
226                },
227            });
228        }
229
230        for pattern in &self.mask_patterns {
231            // Parse "pattern=column" format
232            if let Some((mask_pattern, column)) = pattern.split_once('=') {
233                rules.push(Rule {
234                    column: column.to_string(),
235                    strategy: StrategyKind::Mask {
236                        pattern: mask_pattern.to_string(),
237                    },
238                });
239            }
240        }
241
242        for pattern in &self.constant_patterns {
243            // Parse "column=value" format
244            if let Some((column, value)) = pattern.split_once('=') {
245                rules.push(Rule {
246                    column: column.to_string(),
247                    strategy: StrategyKind::Constant {
248                        value: value.to_string(),
249                    },
250                });
251            }
252        }
253
254        // Determine default strategy
255        let default_strategy = yaml_config
256            .as_ref()
257            .and_then(|y| y.defaults.as_ref())
258            .map(|d| d.strategy.clone())
259            .unwrap_or(StrategyKind::Skip);
260
261        // Merge seed (CLI overrides YAML)
262        let seed = self
263            .seed
264            .or_else(|| yaml_config.as_ref().and_then(|y| y.seed));
265
266        // Merge locale (CLI overrides YAML)
267        let locale = if locale != "en" {
268            locale
269        } else {
270            yaml_config
271                .as_ref()
272                .and_then(|y| y.locale.clone())
273                .unwrap_or(locale)
274        };
275
276        // Merge skip_tables
277        let mut exclude = self.exclude;
278        if let Some(ref yaml) = yaml_config {
279            if let Some(ref skip) = yaml.skip_tables {
280                exclude.extend(skip.iter().cloned());
281            }
282        }
283
284        Ok(RedactConfig {
285            input,
286            output: self.output,
287            dialect,
288            rules,
289            default_strategy,
290            seed,
291            locale,
292            tables_filter: self.tables_filter,
293            exclude,
294            strict: self.strict,
295            progress: self.progress,
296            dry_run: self.dry_run,
297        })
298    }
299}
300
301/// A redaction rule
302#[derive(Debug, Clone, Serialize, Deserialize)]
303pub struct Rule {
304    /// Column pattern (glob pattern like "*.email" or "users.ssn")
305    pub column: String,
306    /// Redaction strategy
307    #[serde(flatten)]
308    pub strategy: StrategyKind,
309}
310
311impl Rule {
312    /// Validate the rule
313    pub fn validate(&self) -> anyhow::Result<()> {
314        if self.column.is_empty() {
315            anyhow::bail!("Rule column pattern cannot be empty");
316        }
317        self.strategy.validate()
318    }
319}
320
321/// YAML configuration file structure
322#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct RedactYamlConfig {
324    /// Random seed for reproducibility
325    #[serde(skip_serializing_if = "Option::is_none")]
326    pub seed: Option<u64>,
327
328    /// Locale for fake data
329    #[serde(skip_serializing_if = "Option::is_none")]
330    pub locale: Option<String>,
331
332    /// Default settings
333    #[serde(skip_serializing_if = "Option::is_none")]
334    pub defaults: Option<Defaults>,
335
336    /// Redaction rules
337    #[serde(default)]
338    pub rules: Vec<Rule>,
339
340    /// Tables to skip entirely
341    #[serde(skip_serializing_if = "Option::is_none")]
342    pub skip_tables: Option<Vec<String>>,
343}
344
345impl RedactYamlConfig {
346    /// Load configuration from a YAML file
347    pub fn load(path: &PathBuf) -> anyhow::Result<Self> {
348        let content = std::fs::read_to_string(path)?;
349        let config: Self = serde_yaml::from_str(&content)?;
350        Ok(config)
351    }
352
353    /// Save configuration to a YAML file
354    pub fn save(&self, path: &PathBuf) -> anyhow::Result<()> {
355        let content = serde_yaml::to_string(self)?;
356        std::fs::write(path, content)?;
357        Ok(())
358    }
359}
360
361/// Default settings in YAML config
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct Defaults {
364    /// Default strategy for columns not matching any rule
365    pub strategy: StrategyKind,
366}