Skip to main content

sanitize_engine/processor/
ini_proc.rs

1//! INI / CFG file processor with `[section]` awareness.
2//!
3//! Handles Windows/Unix INI-style configuration files:
4//!
5//! ```ini
6//! [section]
7//! key = value
8//! key: value
9//! ; semicolon comment
10//! # hash comment
11//! ```
12//!
13//! # Key Paths
14//!
15//! Field rules use dot notation combining section and key:
16//! - `"database.host"` — matches key `host` in section `[database]`
17//! - `"*"` — matches all key=value pairs in all sections
18//! - `"global_key"` — matches a key before any section header (global scope)
19//!
20//! # Formatting Preservation
21//!
22//! - Section headers `[section]` are preserved verbatim.
23//! - `#` and `;` comment lines are preserved verbatim.
24//! - Blank lines are preserved.
25//! - Leading whitespace in value is stripped; quoting is not applied.
26//! - Inline comments (`key = value ; comment`) are stripped and NOT written
27//!   back to avoid leaking sensitive context in comments.
28//! - Both `key = value` and `key: value` assignment operators are handled.
29
30use crate::error::{Result, SanitizeError};
31use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
32use crate::store::MappingStore;
33
34/// Maximum allowed input size (bytes) for INI processing.
35const MAX_INI_INPUT_SIZE: usize = 256 * 1024 * 1024; // 256 MiB
36
37/// Structured processor for INI / CFG files.
38pub struct IniProcessor;
39
40impl Processor for IniProcessor {
41    fn name(&self) -> &'static str {
42        "ini"
43    }
44
45    fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
46        profile.processor == "ini"
47    }
48
49    fn process(
50        &self,
51        content: &[u8],
52        profile: &FileTypeProfile,
53        store: &MappingStore,
54    ) -> Result<Vec<u8>> {
55        if content.len() > MAX_INI_INPUT_SIZE {
56            return Err(SanitizeError::InputTooLarge {
57                size: content.len(),
58                limit: MAX_INI_INPUT_SIZE,
59            });
60        }
61
62        let text = String::from_utf8_lossy(content);
63        let mut output = String::with_capacity(text.len());
64        let mut current_section: Option<String> = None;
65
66        for line in text.split('\n') {
67            let trimmed = line.trim();
68
69            // Blank line.
70            if trimmed.is_empty() {
71                output.push_str(line);
72                output.push('\n');
73                continue;
74            }
75
76            // Comment line.
77            if trimmed.starts_with('#') || trimmed.starts_with(';') {
78                output.push_str(line);
79                output.push('\n');
80                continue;
81            }
82
83            // Section header: `[section_name]`
84            if trimmed.starts_with('[') {
85                if let Some(close) = trimmed.find(']') {
86                    current_section = Some(trimmed[1..close].trim().to_string());
87                }
88                output.push_str(line);
89                output.push('\n');
90                continue;
91            }
92
93            // Key=value or key:value line.
94            let Some((raw_key, raw_value)) = split_kv(trimmed) else {
95                // Unrecognised line — preserve as-is.
96                output.push_str(line);
97                output.push('\n');
98                continue;
99            };
100
101            let key = raw_key.trim();
102
103            // Capture leading whitespace for output reconstruction.
104            let indent_len = line.len() - line.trim_start().len();
105            let indent = &line[..indent_len];
106
107            // Capture the original delimiter (` = ` or ` : ` etc.).
108            let delimiter = extract_delimiter(line, key, raw_value);
109
110            // Strip inline comments from the value.
111            let value = strip_inline_comment(raw_value.trim_start());
112
113            // Build section-qualified key path.
114            let path = match &current_section {
115                Some(section) => format!("{}.{}", section, key),
116                None => key.to_string(),
117            };
118
119            if let Some(rule) = find_matching_rule(&path, profile) {
120                let replaced = replace_value(value, rule, store)?;
121                output.push_str(indent);
122                output.push_str(key);
123                output.push_str(&delimiter);
124                output.push_str(&replaced);
125                output.push('\n');
126            } else {
127                output.push_str(line);
128                output.push('\n');
129            }
130        }
131
132        // Remove the trailing newline we added if the original didn't end with one.
133        if !text.ends_with('\n') && output.ends_with('\n') {
134            output.pop();
135        }
136
137        Ok(output.into_bytes())
138    }
139}
140
141/// Split `key = value` or `key: value` on the first `=` or `:` delimiter.
142/// Returns `None` if no delimiter is found.
143fn split_kv(s: &str) -> Option<(&str, &str)> {
144    // Prefer `=` first (most common in INI files).
145    if let Some(pos) = s.find('=') {
146        return Some((&s[..pos], &s[pos + 1..]));
147    }
148    if let Some(pos) = s.find(':') {
149        return Some((&s[..pos], &s[pos + 1..]));
150    }
151    None
152}
153
154/// Reproduce the original delimiter string from the source line.
155/// Falls back to `" = "` if extraction fails.
156fn extract_delimiter(line: &str, key: &str, after_delim: &str) -> String {
157    // Locate the key in the line to find where the delimiter starts.
158    if let Some(key_start) = line.find(key.trim()) {
159        let after_key = &line[key_start + key.trim().len()..];
160        // The delimiter ends where after_delim (unstripped) begins.
161        // after_delim already includes everything after the `=`/`:` character.
162        // We need: after_key[..pos_of_value_start].
163        let delimiter_end =
164            after_key.len().saturating_sub(after_delim.len()).saturating_add(1);
165        if delimiter_end <= after_key.len() {
166            return after_key[..delimiter_end].to_string();
167        }
168    }
169    " = ".to_string()
170}
171
172/// Strip trailing inline comments from a value string.
173/// Recognises ` # ` and ` ; ` as inline comment markers.
174fn strip_inline_comment(value: &str) -> &str {
175    for marker in [" # ", " ; "] {
176        if let Some(pos) = value.find(marker) {
177            return value[..pos].trim_end();
178        }
179    }
180    value.trim_end()
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use crate::generator::HmacGenerator;
187    use crate::processor::profile::FieldRule;
188    use std::sync::Arc;
189
190    fn make_store() -> MappingStore {
191        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
192        MappingStore::new(gen, None)
193    }
194
195    fn wildcard_profile() -> FileTypeProfile {
196        FileTypeProfile::new("ini", vec![FieldRule::new("*")])
197    }
198
199    #[test]
200    fn basic_ini_replacement() {
201        let store = make_store();
202        let proc = IniProcessor;
203        let content = b"[database]\nhost = db.corp.com\npassword = s3cret\n\n[smtp]\nuser = admin\n";
204        let output = proc.process(content, &wildcard_profile(), &store).unwrap();
205        let text = String::from_utf8(output).unwrap();
206        // Values replaced.
207        assert!(!text.contains("db.corp.com"));
208        assert!(!text.contains("s3cret"));
209        assert!(!text.contains("admin"));
210        // Section headers preserved.
211        assert!(text.contains("[database]"));
212        assert!(text.contains("[smtp]"));
213        // Keys preserved.
214        assert!(text.contains("host =") || text.contains("host="));
215    }
216
217    #[test]
218    fn section_qualified_rule() {
219        let store = make_store();
220        let proc = IniProcessor;
221        let content = b"[database]\npassword = secret\n[app]\nname = myapp\n";
222        let profile =
223            FileTypeProfile::new("ini", vec![FieldRule::new("database.password")]);
224        let output = proc.process(content, &profile, &store).unwrap();
225        let text = String::from_utf8(output).unwrap();
226        // password replaced, app.name untouched.
227        assert!(!text.contains("secret"));
228        assert!(text.contains("myapp"));
229    }
230
231    #[test]
232    fn comments_and_blanks_preserved() {
233        let store = make_store();
234        let proc = IniProcessor;
235        let content = b"# Global config\n\n[section]\n; this is a semicolon comment\nkey = val\n";
236        let output = proc.process(content, &wildcard_profile(), &store).unwrap();
237        let text = String::from_utf8(output).unwrap();
238        assert!(text.contains("# Global config"));
239        assert!(text.contains("; this is a semicolon comment"));
240        // Blank line preserved.
241        assert!(text.contains("\n\n"));
242    }
243
244    #[test]
245    fn colon_delimiter_handled() {
246        let store = make_store();
247        let proc = IniProcessor;
248        let content = b"[section]\napi_key: abc123\n";
249        let profile =
250            FileTypeProfile::new("ini", vec![FieldRule::new("section.api_key")]);
251        let output = proc.process(content, &profile, &store).unwrap();
252        let text = String::from_utf8(output).unwrap();
253        assert!(!text.contains("abc123"));
254    }
255}