Skip to main content

sanitize_engine/processor/
key_value.rs

1//! Key-value processor for `gitlab.rb`-style configuration files.
2//!
3//! Handles files with lines of the form:
4//!
5//! ```text
6//! key = "value"
7//! key = 'value'
8//! key = value
9//! # comment lines are preserved
10//! ```
11//!
12//! The delimiter, comment prefix, and quoting style are configurable
13//! via the profile's `options` map.
14//!
15//! # Profile Options
16//!
17//! | Key              | Default | Description                                  |
18//! |------------------|---------|----------------------------------------------|
19//! | `delimiter`      | `"="`   | The key-value separator.                     |
20//! | `comment_prefix` | `"#"`   | Lines starting with this (after whitespace)  |
21//! |                  |         | are treated as comments and preserved as-is. |
22//!
23//! # Formatting Preservation
24//!
25//! - Blank lines, comment lines, and indentation are preserved verbatim.
26//! - The original quoting style (single, double, or unquoted) is kept.
27//! - Whitespace around the delimiter is preserved where possible.
28
29use crate::error::{Result, SanitizeError};
30use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
31use crate::store::MappingStore;
32
33/// Maximum allowed input size (bytes) for key-value processing.
34const MAX_KV_INPUT_SIZE: usize = 256 * 1024 * 1024; // 256 MiB
35
36/// Structured processor for key = value configuration files.
37pub struct KeyValueProcessor;
38
39impl Processor for KeyValueProcessor {
40    fn name(&self) -> &'static str {
41        "key_value"
42    }
43
44    fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
45        profile.processor == "key_value"
46    }
47
48    fn process(
49        &self,
50        content: &[u8],
51        profile: &FileTypeProfile,
52        store: &MappingStore,
53    ) -> Result<Vec<u8>> {
54        if content.len() > MAX_KV_INPUT_SIZE {
55            return Err(SanitizeError::InputTooLarge {
56                size: content.len(),
57                limit: MAX_KV_INPUT_SIZE,
58            });
59        }
60
61        let text = String::from_utf8_lossy(content);
62        let delimiter = profile.options.get("delimiter").map_or("=", |s| s.as_str());
63        let comment_prefix = profile
64            .options
65            .get("comment_prefix")
66            .map_or("#", |s| s.as_str());
67
68        let mut output = String::with_capacity(text.len());
69
70        for line in text.split('\n') {
71            let trimmed = line.trim();
72
73            // Preserve blank lines.
74            if trimmed.is_empty() {
75                output.push_str(line);
76                output.push('\n');
77                continue;
78            }
79
80            // Preserve comment lines.
81            if trimmed.starts_with(comment_prefix) {
82                output.push_str(line);
83                output.push('\n');
84                continue;
85            }
86
87            // Try to split on delimiter.
88            if let Some(delim_pos) = line.find(delimiter) {
89                let raw_key = &line[..delim_pos];
90                let after_delim = &line[delim_pos + delimiter.len()..];
91
92                let key = raw_key.trim();
93
94                // Check if this key matches any field rule.
95                if let Some(rule) = find_matching_rule(key, profile) {
96                    // Determine leading whitespace on the value side.
97                    let value_leading_ws: &str = {
98                        let trimmed_start = after_delim.trim_start();
99                        &after_delim[..after_delim.len() - trimmed_start.len()]
100                    };
101                    let raw_value = after_delim.trim();
102
103                    // Detect quoting.
104                    let (quote_char, inner_value) = detect_quotes(raw_value);
105
106                    // Replace the inner value.
107                    let replaced = replace_value(inner_value, rule, store)?;
108
109                    // Reconstruct the line preserving formatting.
110                    output.push_str(raw_key);
111                    output.push_str(delimiter);
112                    output.push_str(value_leading_ws);
113                    if let Some(q) = quote_char {
114                        output.push(q);
115                        output.push_str(&replaced);
116                        output.push(q);
117                    } else {
118                        output.push_str(&replaced);
119                    }
120                    output.push('\n');
121                } else {
122                    // Key not matched; preserve line as-is.
123                    output.push_str(line);
124                    output.push('\n');
125                }
126            } else {
127                // No delimiter found; preserve line as-is.
128                output.push_str(line);
129                output.push('\n');
130            }
131        }
132
133        // Remove the trailing newline we added if the original didn't end with one.
134        if !text.ends_with('\n') && output.ends_with('\n') {
135            output.pop();
136        }
137
138        Ok(output.into_bytes())
139    }
140}
141
142/// Detect surrounding quotes and return `(quote_char, inner_value)`.
143fn detect_quotes(value: &str) -> (Option<char>, &str) {
144    if value.len() >= 2 {
145        let first = value.as_bytes()[0];
146        let last = value.as_bytes()[value.len() - 1];
147        if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
148            return (Some(first as char), &value[1..value.len() - 1]);
149        }
150    }
151    (None, value)
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157    use crate::category::Category;
158    use crate::generator::HmacGenerator;
159    use crate::processor::profile::FieldRule;
160    use std::sync::Arc;
161
162    fn make_store() -> MappingStore {
163        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
164        MappingStore::new(gen, None)
165    }
166
167    #[test]
168    fn basic_key_value_replacement() {
169        let store = make_store();
170        let proc = KeyValueProcessor;
171
172        let content = br#"# GitLab configuration file
173gitlab_rails['smtp_password'] = "super_secret_123"
174gitlab_rails['smtp_address'] = "smtp.corp.com"
175gitlab_rails['db_pool'] = 10
176"#;
177
178        let profile = FileTypeProfile::new(
179            "key_value",
180            vec![
181                FieldRule::new("gitlab_rails['smtp_password']")
182                    .with_category(Category::Custom("password".into())),
183                FieldRule::new("gitlab_rails['smtp_address']").with_category(Category::Hostname),
184            ],
185        );
186
187        let result = proc.process(content, &profile, &store).unwrap();
188        let out = String::from_utf8(result).unwrap();
189
190        // Comment preserved.
191        assert!(out.contains("# GitLab configuration file"));
192        // Secrets replaced.
193        assert!(!out.contains("super_secret_123"));
194        assert!(!out.contains("smtp.corp.com"));
195        // Unmatched key preserved.
196        assert!(out.contains("gitlab_rails['db_pool'] = 10"));
197        // Quoting preserved.
198        assert!(out.contains('"'));
199    }
200
201    #[test]
202    fn preserves_blank_lines_and_comments() {
203        let store = make_store();
204        let proc = KeyValueProcessor;
205
206        let content = b"# Header comment\n\nkey = value\n\n# Footer\n";
207        let profile = FileTypeProfile::new(
208            "key_value",
209            vec![FieldRule::new("key").with_category(Category::Custom("test".into()))],
210        );
211
212        let result = proc.process(content, &profile, &store).unwrap();
213        let out = String::from_utf8(result).unwrap();
214
215        assert!(out.starts_with("# Header comment\n\n"));
216        assert!(out.contains("\n\n# Footer\n"));
217        assert!(!out.contains("= value"));
218    }
219
220    #[test]
221    fn glob_pattern_matching() {
222        let store = make_store();
223        let proc = KeyValueProcessor;
224
225        let content = b"db.password = secret1\ndb.host = myhost\napp.name = test\n";
226        let profile = FileTypeProfile::new(
227            "key_value",
228            vec![FieldRule::new("db.*").with_category(Category::Custom("db".into()))],
229        );
230
231        let result = proc.process(content, &profile, &store).unwrap();
232        let out = String::from_utf8(result).unwrap();
233
234        assert!(!out.contains("secret1"));
235        assert!(!out.contains("myhost"));
236        assert!(out.contains("app.name = test"));
237    }
238
239    #[test]
240    fn deterministic_replacement() {
241        let store = make_store();
242        let proc = KeyValueProcessor;
243
244        let content = b"key1 = secret\nkey2 = secret\n";
245        let profile = FileTypeProfile::new(
246            "key_value",
247            vec![
248                FieldRule::new("key1").with_category(Category::Custom("test".into())),
249                FieldRule::new("key2").with_category(Category::Custom("test".into())),
250            ],
251        );
252
253        let result = proc.process(content, &profile, &store).unwrap();
254        let out = String::from_utf8(result).unwrap();
255        let lines: Vec<&str> = out.lines().collect();
256
257        // Same original + same category → same replacement.
258        let val1 = lines[0].split(" = ").nth(1).unwrap();
259        let val2 = lines[1].split(" = ").nth(1).unwrap();
260        assert_eq!(val1, val2);
261    }
262
263    #[test]
264    fn custom_delimiter() {
265        let store = make_store();
266        let proc = KeyValueProcessor;
267
268        let content = b"key: value\n";
269        let profile = FileTypeProfile::new(
270            "key_value",
271            vec![FieldRule::new("key").with_category(Category::Custom("test".into()))],
272        )
273        .with_option("delimiter", ":");
274
275        let result = proc.process(content, &profile, &store).unwrap();
276        let out = String::from_utf8(result).unwrap();
277        assert!(!out.contains("value"));
278        assert!(out.contains("key:"));
279    }
280}