Skip to main content

sanitize_engine/processor/
toml_proc.rs

1//! TOML structured processor.
2//!
3//! Parses TOML input, walks the value tree, replaces matched field
4//! values, and serializes back to TOML preserving structure.
5//!
6//! # Key Paths
7//!
8//! Nested keys use the same dot-separated convention as the JSON processor:
9//! `database.password`, `server.credentials.token`.
10//!
11//! Array elements are traversed transparently — a rule for `servers.host`
12//! matches the `host` field inside every table in the `servers` array.
13//!
14//! # Non-String Scalars
15//!
16//! When a FieldRule matches an integer, float, boolean, or datetime value,
17//! that value is converted to a string replacement. This changes the TOML
18//! type for that key but keeps the file syntactically valid. Use specific
19//! field rules (e.g. `"database.password"`) rather than `"*"` if you want
20//! to avoid replacing non-sensitive numeric values.
21
22use crate::error::{Result, SanitizeError};
23use crate::processor::limits::DEFAULT_INPUT_SIZE;
24use crate::processor::{walk_tree, FileTypeProfile, Processor, TreeNode};
25use crate::store::MappingStore;
26use toml::Value;
27
28/// Structured processor for TOML configuration files.
29pub struct TomlProcessor;
30
31impl Processor for TomlProcessor {
32    fn name(&self) -> &'static str {
33        "toml"
34    }
35
36    fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
37        profile.processor == "toml"
38    }
39
40    fn process(
41        &self,
42        content: &[u8],
43        profile: &FileTypeProfile,
44        store: &MappingStore,
45    ) -> Result<Vec<u8>> {
46        if content.len() > DEFAULT_INPUT_SIZE {
47            return Err(SanitizeError::InputTooLarge {
48                size: content.len(),
49                limit: DEFAULT_INPUT_SIZE,
50            });
51        }
52
53        let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
54            format: "TOML".into(),
55            message: format!("invalid UTF-8: {}", e),
56        })?;
57
58        let mut value: Value = toml::from_str(text).map_err(|e| SanitizeError::ParseError {
59            format: "TOML".into(),
60            message: format!("TOML parse error: {}", e),
61        })?;
62
63        walk_toml(&mut value, "", profile, store, 0)?;
64
65        let output = toml::to_string_pretty(&value).map_err(|e| {
66            SanitizeError::IoError(std::io::Error::other(format!("TOML serialize error: {e}")))
67        })?;
68
69        Ok(output.into_bytes())
70    }
71}
72
73impl TreeNode for Value {
74    fn for_each_map_entry<F>(&mut self, mut f: F) -> Result<()>
75    where
76        F: FnMut(&str, &mut Self) -> Result<()>,
77    {
78        if let Self::Table(map) = self {
79            let keys: Vec<String> = map.keys().cloned().collect();
80            for key in keys {
81                if let Some(v) = map.get_mut(&key) {
82                    f(&key, v)?;
83                }
84            }
85        }
86        Ok(())
87    }
88
89    fn for_each_seq_item<F>(&mut self, mut f: F) -> Result<()>
90    where
91        F: FnMut(&mut Self) -> Result<()>,
92    {
93        if let Self::Array(arr) = self {
94            for item in arr.iter_mut() {
95                f(item)?;
96            }
97        }
98        Ok(())
99    }
100
101    fn as_str_mut(&mut self) -> Option<&mut String> {
102        if let Self::String(s) = self {
103            Some(s)
104        } else {
105            None
106        }
107    }
108
109    fn is_scalar(&self) -> bool {
110        // Non-string scalars are converted to string replacements. This changes
111        // the TOML type for matched keys but keeps the file syntactically valid.
112        matches!(
113            self,
114            Self::Integer(_) | Self::Float(_) | Self::Boolean(_) | Self::Datetime(_)
115        )
116    }
117
118    fn scalar_to_string(&self) -> String {
119        self.to_string()
120    }
121
122    fn set_string(&mut self, s: String) {
123        *self = Self::String(s);
124    }
125}
126
127/// Recursively walk a TOML value tree, replacing matched field values.
128fn walk_toml(
129    value: &mut Value,
130    prefix: &str,
131    profile: &FileTypeProfile,
132    store: &MappingStore,
133    depth: usize,
134) -> Result<()> {
135    walk_tree(value, prefix, profile, store, depth, "TOML")
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use crate::category::Category;
142    use crate::generator::HmacGenerator;
143    use crate::processor::profile::FieldRule;
144    use std::sync::Arc;
145
146    fn make_store() -> MappingStore {
147        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
148        MappingStore::new(gen, None)
149    }
150
151    #[test]
152    fn basic_toml_replacement() {
153        let store = make_store();
154        let proc = TomlProcessor;
155        let content = br#"[database]
156host = "db.corp.com"
157password = "s3cret"
158port = 5432
159
160[smtp]
161user = "admin@corp.com"
162"#;
163        let profile = FileTypeProfile::new(
164            "toml",
165            vec![
166                FieldRule::new("database.password"),
167                FieldRule::new("smtp.user").with_category(Category::Email),
168            ],
169        );
170        let output = proc.process(content, &profile, &store).unwrap();
171        let text = String::from_utf8(output).unwrap();
172        // Password replaced, host and port preserved.
173        assert!(!text.contains("s3cret"));
174        assert!(text.contains("db.corp.com"));
175        assert!(text.contains("5432"));
176        // Email replaced.
177        assert!(!text.contains("admin@corp.com"));
178    }
179
180    #[test]
181    fn wildcard_replaces_all_strings() {
182        let store = make_store();
183        let proc = TomlProcessor;
184        let content = b"api_key = \"secret\"\ndb_url = \"postgres://user:pass@host/db\"\n";
185        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("*")]);
186        let output = proc.process(content, &profile, &store).unwrap();
187        let text = String::from_utf8(output).unwrap();
188        assert!(!text.contains("secret"));
189        assert!(!text.contains("postgres://user:pass@host/db"));
190    }
191
192    #[test]
193    fn invalid_toml_returns_parse_error() {
194        let store = make_store();
195        let proc = TomlProcessor;
196        let content = b"this is not valid toml [[[";
197        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("*")]);
198        let result = proc.process(content, &profile, &store);
199        assert!(result.is_err());
200    }
201
202    #[test]
203    fn deeply_nested_toml() {
204        let store = make_store();
205        let proc = TomlProcessor;
206        let content = b"[a.b.c]\nkey = \"value\"\n";
207        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("a.b.c.key")]);
208        let output = proc.process(content, &profile, &store).unwrap();
209        let text = String::from_utf8(output).unwrap();
210        assert!(!text.contains("value"));
211    }
212}