Skip to main content

sanitize_engine/processor/
toml_proc.rs

1//! TOML structured processor.
2//!
3//! Parses TOML input, walks the value tree, replaces matched field
4//! values, and serializes back to TOML preserving structure.
5//!
6//! # Key Paths
7//!
8//! Nested keys use the same dot-separated convention as the JSON processor:
9//! `database.password`, `server.credentials.token`.
10//!
11//! Array elements are traversed transparently — a rule for `servers.host`
12//! matches the `host` field inside every table in the `servers` array.
13//!
14//! # Non-String Scalars
15//!
16//! When a FieldRule matches an integer, float, boolean, or datetime value,
17//! that value is converted to a string replacement. This changes the TOML
18//! type for that key but keeps the file syntactically valid. Use specific
19//! field rules (e.g. `"database.password"`) rather than `"*"` if you want
20//! to avoid replacing non-sensitive numeric values.
21
22use crate::error::{Result, SanitizeError};
23use crate::processor::limits::DEFAULT_INPUT_SIZE;
24use crate::processor::{walk_tree, FileTypeProfile, Processor, TreeNode};
25use crate::store::MappingStore;
26use toml::Value;
27
28/// Structured processor for TOML configuration files.
29pub struct TomlProcessor;
30
31impl Processor for TomlProcessor {
32    fn name(&self) -> &'static str {
33        "toml"
34    }
35
36    fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
37        profile.processor == "toml"
38    }
39
40    fn process(
41        &self,
42        content: &[u8],
43        profile: &FileTypeProfile,
44        store: &MappingStore,
45    ) -> Result<Vec<u8>> {
46        if content.len() > DEFAULT_INPUT_SIZE {
47            return Err(SanitizeError::InputTooLarge {
48                size: content.len(),
49                limit: DEFAULT_INPUT_SIZE,
50            });
51        }
52
53        let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
54            format: "TOML".into(),
55            message: format!("invalid UTF-8: {}", e),
56        })?;
57
58        let mut value: Value = toml::from_str(text).map_err(|e| SanitizeError::ParseError {
59            format: "TOML".into(),
60            message: format!("TOML parse error: {}", e),
61        })?;
62
63        walk_toml(&mut value, "", profile, store, 0)?;
64
65        let output = toml::to_string_pretty(&value)
66            .map_err(|e| SanitizeError::IoError(format!("TOML serialize error: {}", e)))?;
67
68        Ok(output.into_bytes())
69    }
70}
71
72impl TreeNode for Value {
73    fn for_each_map_entry<F>(&mut self, mut f: F) -> Result<()>
74    where
75        F: FnMut(&str, &mut Self) -> Result<()>,
76    {
77        if let Self::Table(map) = self {
78            let keys: Vec<String> = map.keys().cloned().collect();
79            for key in keys {
80                if let Some(v) = map.get_mut(&key) {
81                    f(&key, v)?;
82                }
83            }
84        }
85        Ok(())
86    }
87
88    fn for_each_seq_item<F>(&mut self, mut f: F) -> Result<()>
89    where
90        F: FnMut(&mut Self) -> Result<()>,
91    {
92        if let Self::Array(arr) = self {
93            for item in arr.iter_mut() {
94                f(item)?;
95            }
96        }
97        Ok(())
98    }
99
100    fn as_str_mut(&mut self) -> Option<&mut String> {
101        if let Self::String(s) = self {
102            Some(s)
103        } else {
104            None
105        }
106    }
107
108    fn is_scalar(&self) -> bool {
109        // Non-string scalars are converted to string replacements. This changes
110        // the TOML type for matched keys but keeps the file syntactically valid.
111        matches!(
112            self,
113            Self::Integer(_) | Self::Float(_) | Self::Boolean(_) | Self::Datetime(_)
114        )
115    }
116
117    fn scalar_to_string(&self) -> String {
118        self.to_string()
119    }
120
121    fn set_string(&mut self, s: String) {
122        *self = Self::String(s);
123    }
124}
125
126/// Recursively walk a TOML value tree, replacing matched field values.
127fn walk_toml(
128    value: &mut Value,
129    prefix: &str,
130    profile: &FileTypeProfile,
131    store: &MappingStore,
132    depth: usize,
133) -> Result<()> {
134    walk_tree(value, prefix, profile, store, depth, "TOML")
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140    use crate::category::Category;
141    use crate::generator::HmacGenerator;
142    use crate::processor::profile::FieldRule;
143    use std::sync::Arc;
144
145    fn make_store() -> MappingStore {
146        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
147        MappingStore::new(gen, None)
148    }
149
150    #[test]
151    fn basic_toml_replacement() {
152        let store = make_store();
153        let proc = TomlProcessor;
154        let content = br#"[database]
155host = "db.corp.com"
156password = "s3cret"
157port = 5432
158
159[smtp]
160user = "admin@corp.com"
161"#;
162        let profile = FileTypeProfile::new(
163            "toml",
164            vec![
165                FieldRule::new("database.password"),
166                FieldRule::new("smtp.user").with_category(Category::Email),
167            ],
168        );
169        let output = proc.process(content, &profile, &store).unwrap();
170        let text = String::from_utf8(output).unwrap();
171        // Password replaced, host and port preserved.
172        assert!(!text.contains("s3cret"));
173        assert!(text.contains("db.corp.com"));
174        assert!(text.contains("5432"));
175        // Email replaced.
176        assert!(!text.contains("admin@corp.com"));
177    }
178
179    #[test]
180    fn wildcard_replaces_all_strings() {
181        let store = make_store();
182        let proc = TomlProcessor;
183        let content = b"api_key = \"secret\"\ndb_url = \"postgres://user:pass@host/db\"\n";
184        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("*")]);
185        let output = proc.process(content, &profile, &store).unwrap();
186        let text = String::from_utf8(output).unwrap();
187        assert!(!text.contains("secret"));
188        assert!(!text.contains("postgres://user:pass@host/db"));
189    }
190
191    #[test]
192    fn invalid_toml_returns_parse_error() {
193        let store = make_store();
194        let proc = TomlProcessor;
195        let content = b"this is not valid toml [[[";
196        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("*")]);
197        let result = proc.process(content, &profile, &store);
198        assert!(result.is_err());
199    }
200
201    #[test]
202    fn deeply_nested_toml() {
203        let store = make_store();
204        let proc = TomlProcessor;
205        let content = b"[a.b.c]\nkey = \"value\"\n";
206        let profile = FileTypeProfile::new("toml", vec![FieldRule::new("a.b.c.key")]);
207        let output = proc.process(content, &profile, &store).unwrap();
208        let text = String::from_utf8(output).unwrap();
209        assert!(!text.contains("value"));
210    }
211}