Skip to main content

sanitize_engine/processor/
toml_proc.rs

1//! TOML structured processor.
2//!
3//! Parses TOML input, walks the value tree, replaces matched field
4//! values, and serializes back to TOML preserving structure.
5//!
6//! # Key Paths
7//!
8//! Nested keys use the same dot-separated convention as the JSON processor:
9//! `database.password`, `server.credentials.token`.
10//!
11//! Array elements are traversed transparently — a rule for `servers.host`
12//! matches the `host` field inside every table in the `servers` array.
13//!
14//! # Non-String Scalars
15//!
16//! When a FieldRule matches an integer, float, boolean, or datetime value,
17//! that value is converted to a string replacement. This changes the TOML
18//! type for that key but keeps the file syntactically valid. Use specific
19//! field rules (e.g. `"database.password"`) rather than `"*"` if you want
20//! to avoid replacing non-sensitive numeric values.
21
22use crate::error::{Result, SanitizeError};
23use crate::processor::{build_path, find_matching_rule, replace_value, FileTypeProfile, Processor};
24use crate::store::MappingStore;
25use toml::Value;
26
27/// Maximum recursion depth for walking TOML value trees.
28const MAX_TOML_DEPTH: usize = 128;
29
30/// Maximum allowed input size (bytes) for TOML processing.
31const MAX_TOML_INPUT_SIZE: usize = 256 * 1024 * 1024; // 256 MiB
32
33/// Structured processor for TOML configuration files.
34pub struct TomlProcessor;
35
36impl Processor for TomlProcessor {
37    fn name(&self) -> &'static str {
38        "toml"
39    }
40
41    fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
42        profile.processor == "toml"
43    }
44
45    fn process(
46        &self,
47        content: &[u8],
48        profile: &FileTypeProfile,
49        store: &MappingStore,
50    ) -> Result<Vec<u8>> {
51        if content.len() > MAX_TOML_INPUT_SIZE {
52            return Err(SanitizeError::InputTooLarge {
53                size: content.len(),
54                limit: MAX_TOML_INPUT_SIZE,
55            });
56        }
57
58        let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
59            format: "TOML".into(),
60            message: format!("invalid UTF-8: {}", e),
61        })?;
62
63        let mut value: Value = toml::from_str(text).map_err(|e| SanitizeError::ParseError {
64            format: "TOML".into(),
65            message: format!("TOML parse error: {}", e),
66        })?;
67
68        walk_toml(&mut value, "", profile, store, 0)?;
69
70        let output = toml::to_string_pretty(&value)
71            .map_err(|e| SanitizeError::IoError(format!("TOML serialize error: {}", e)))?;
72
73        Ok(output.into_bytes())
74    }
75}
76
77/// Recursively walk a TOML value tree, replacing matched field values.
78fn walk_toml(
79    value: &mut Value,
80    prefix: &str,
81    profile: &FileTypeProfile,
82    store: &MappingStore,
83    depth: usize,
84) -> Result<()> {
85    if depth > MAX_TOML_DEPTH {
86        return Err(SanitizeError::RecursionDepthExceeded(format!(
87            "TOML recursion depth exceeds limit of {MAX_TOML_DEPTH}"
88        )));
89    }
90    match value {
91        Value::Table(map) => {
92            let keys: Vec<String> = map.keys().cloned().collect();
93            for key in keys {
94                let path = build_path(prefix, &key);
95                if let Some(v) = map.get_mut(&key) {
96                    match v {
97                        Value::String(s) => {
98                            if let Some(rule) = find_matching_rule(&path, profile) {
99                                *s = replace_value(s, rule, store)?;
100                            }
101                        }
102                        // Non-string scalars: convert to string replacement when matched.
103                        // This preserves TOML syntax validity while sanitizing the value.
104                        Value::Integer(_)
105                        | Value::Float(_)
106                        | Value::Boolean(_)
107                        | Value::Datetime(_) => {
108                            if let Some(rule) = find_matching_rule(&path, profile) {
109                                let repr = v.to_string();
110                                let replaced = replace_value(&repr, rule, store)?;
111                                *v = Value::String(replaced);
112                            }
113                        }
114                        Value::Table(_) | Value::Array(_) => {
115                            walk_toml(v, &path, profile, store, depth + 1)?;
116                        }
117                    }
118                }
119            }
120        }
121        Value::Array(arr) => {
122            for item in arr.iter_mut() {
123                walk_toml(item, prefix, profile, store, depth + 1)?;
124            }
125        }
126        _ => {}
127    }
128    Ok(())
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use crate::category::Category;
135    use crate::generator::HmacGenerator;
136    use crate::processor::profile::FieldRule;
137    use std::sync::Arc;
138
139    fn make_store() -> MappingStore {
140        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
141        MappingStore::new(gen, None)
142    }
143
144    #[test]
145    fn basic_toml_replacement() {
146        let store = make_store();
147        let proc = TomlProcessor;
148        let content = br#"[database]
149host = "db.corp.com"
150password = "s3cret"
151port = 5432
152
153[smtp]
154user = "admin@corp.com"
155"#;
156        let profile = FileTypeProfile::new(
157            "toml",
158            vec![
159                FieldRule::new("database.password"),
160                FieldRule::new("smtp.user").with_category(Category::Email),
161            ],
162        );
163        let output = proc.process(content, &profile, &store).unwrap();
164        let text = String::from_utf8(output).unwrap();
165        // Password replaced, host and port preserved.
166        assert!(!text.contains("s3cret"));
167        assert!(text.contains("db.corp.com"));
168        assert!(text.contains("5432"));
169        // Email replaced.
170        assert!(!text.contains("admin@corp.com"));
171    }
172
173    #[test]
174    fn wildcard_replaces_all_strings() {
175        let store = make_store();
176        let proc = TomlProcessor;
177        let content = b"api_key = \"secret\"\ndb_url = \"postgres://user:pass@host/db\"\n";
178        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("*")]);
179        let output = proc.process(content, &profile, &store).unwrap();
180        let text = String::from_utf8(output).unwrap();
181        assert!(!text.contains("secret"));
182        assert!(!text.contains("postgres://user:pass@host/db"));
183    }
184
185    #[test]
186    fn invalid_toml_returns_parse_error() {
187        let store = make_store();
188        let proc = TomlProcessor;
189        let content = b"this is not valid toml [[[";
190        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("*")]);
191        let result = proc.process(content, &profile, &store);
192        assert!(result.is_err());
193    }
194
195    #[test]
196    fn deeply_nested_toml() {
197        let store = make_store();
198        let proc = TomlProcessor;
199        let content = b"[a.b.c]\nkey = \"value\"\n";
200        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("a.b.c.key")]);
201        let output = proc.process(content, &profile, &store).unwrap();
202        let text = String::from_utf8(output).unwrap();
203        assert!(!text.contains("value"));
204    }
205}