Skip to main content

sanitize_engine/processor/
env_proc.rs

1//! `.env` file processor.
2//!
3//! Handles shell-style environment variable files with lines of the form:
4//!
5//! ```text
6//! KEY=value
7//! KEY="quoted value"
8//! KEY='single quoted'
9//! export KEY=value
10//! # comment lines are preserved
11//! ```
12//!
13//! The `export` keyword is stripped before key matching so that a
14//! FieldRule for `"SECRET_KEY"` correctly matches both `SECRET_KEY=val`
15//! and `export SECRET_KEY=val`.
16//!
17//! # Inline Comments
18//!
19//! Unquoted values may have inline comments (`KEY=value # comment`).
20//! The comment and trailing whitespace are stripped before replacement
21//! and the comment is NOT written back (it may contain sensitive context).
22//! Quoted values are treated as opaque — everything between the quotes
23//! is the value.
24//!
25//! # Formatting Preservation
26//!
27//! - Leading whitespace, blank lines, and `#` comment lines are preserved.
28//! - The original quoting style (single, double, or unquoted) is retained.
29//! - The `export` prefix, if present, is retained in the output.
30
31use crate::error::{Result, SanitizeError};
32use crate::processor::limits::DEFAULT_INPUT_SIZE;
33use crate::processor::{
34    find_field_signal, find_matching_rule, replace_by_signal, replace_value, FileTypeProfile,
35    Processor,
36};
37use crate::store::MappingStore;
38
39/// Structured processor for `.env` / shell environment files.
40pub struct EnvProcessor;
41
42impl Processor for EnvProcessor {
43    fn name(&self) -> &'static str {
44        "env"
45    }
46
47    fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
48        profile.processor == "env"
49    }
50
51    fn process(
52        &self,
53        content: &[u8],
54        profile: &FileTypeProfile,
55        store: &MappingStore,
56    ) -> Result<Vec<u8>> {
57        if content.len() > DEFAULT_INPUT_SIZE {
58            return Err(SanitizeError::InputTooLarge {
59                size: content.len(),
60                limit: DEFAULT_INPUT_SIZE,
61            });
62        }
63
64        let text = String::from_utf8_lossy(content);
65        let mut output = String::with_capacity(text.len());
66
67        for line in text.split('\n') {
68            let trimmed = line.trim();
69
70            // Preserve blank lines.
71            if trimmed.is_empty() {
72                output.push_str(line);
73                output.push('\n');
74                continue;
75            }
76
77            // Preserve comment-only lines.
78            if trimmed.starts_with('#') {
79                output.push_str(line);
80                output.push('\n');
81                continue;
82            }
83
84            // Capture leading whitespace (indentation) for output reconstruction.
85            let indent_len = line.len() - line.trim_start().len();
86            let indent = &line[..indent_len];
87
88            // Detect and preserve `export ` prefix.
89            let (has_export, after_export) = if let Some(rest) = trimmed.strip_prefix("export ") {
90                (true, rest.trim_start())
91            } else {
92                (false, trimmed)
93            };
94
95            // Split on the first `=`.
96            let Some((raw_key, after_eq)) = after_export.split_once('=') else {
97                // No `=` — not a key=value line; preserve as-is.
98                output.push_str(line);
99                output.push('\n');
100                continue;
101            };
102
103            let key = raw_key.trim();
104
105            // Detect quoting and extract the inner value.
106            let (quote_char, inner_value) = detect_env_quotes(after_eq);
107
108            // Strip inline comments from unquoted values.
109            let inner_value = if quote_char.is_none() {
110                // Everything before a ` #` (space-hash) is the value.
111                inner_value
112                    .find(" #")
113                    .map_or(inner_value, |pos| &inner_value[..pos])
114                    .trim_end()
115            } else {
116                inner_value
117            };
118
119            if let Some(rule) = find_matching_rule(key, profile) {
120                let replaced = replace_value(inner_value, rule, store)?;
121
122                // Reconstruct: indent + [export ] + KEY=["']value["']
123                output.push_str(indent);
124                if has_export {
125                    output.push_str("export ");
126                }
127                output.push_str(key);
128                output.push('=');
129                if let Some(q) = quote_char {
130                    output.push(q);
131                    output.push_str(&replaced);
132                    output.push(q);
133                } else {
134                    output.push_str(&replaced);
135                }
136                output.push('\n');
137            } else if let Some(sig) = find_field_signal(key, &profile.field_name_signals) {
138                if let Some(replaced) = replace_by_signal(inner_value, sig, store)? {
139                    output.push_str(indent);
140                    if has_export {
141                        output.push_str("export ");
142                    }
143                    output.push_str(key);
144                    output.push('=');
145                    if let Some(q) = quote_char {
146                        output.push(q);
147                        output.push_str(&replaced);
148                        output.push(q);
149                    } else {
150                        output.push_str(&replaced);
151                    }
152                    output.push('\n');
153                } else {
154                    output.push_str(line);
155                    output.push('\n');
156                }
157            } else {
158                output.push_str(line);
159                output.push('\n');
160            }
161        }
162
163        // Remove the trailing newline we added if the original didn't end with one.
164        if !text.ends_with('\n') && output.ends_with('\n') {
165            output.pop();
166        }
167
168        Ok(output.into_bytes())
169    }
170}
171
172/// Detect surrounding quotes and return `(quote_char, inner_value)`.
173/// Returns `(None, value)` for unquoted values.
174fn detect_env_quotes(value: &str) -> (Option<char>, &str) {
175    if value.len() >= 2 {
176        let first = value.as_bytes()[0];
177        let last = value.as_bytes()[value.len() - 1];
178        if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
179            return (Some(first as char), &value[1..value.len() - 1]);
180        }
181    }
182    (None, value)
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use crate::generator::HmacGenerator;
189    use crate::processor::profile::FieldRule;
190    use std::sync::Arc;
191
192    fn make_store() -> MappingStore {
193        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
194        MappingStore::new(gen, None)
195    }
196
197    fn wildcard_profile() -> FileTypeProfile {
198        FileTypeProfile::new("env", vec![FieldRule::new("*")])
199    }
200
201    #[test]
202    fn basic_key_value() {
203        let store = make_store();
204        let proc = EnvProcessor;
205        let content = b"SECRET_KEY=abc123\nDB_HOST=localhost\n";
206        let output = proc.process(content, &wildcard_profile(), &store).unwrap();
207        let text = String::from_utf8(output).unwrap();
208        assert!(!text.contains("abc123"));
209        assert!(!text.contains("localhost"));
210        // Keys are preserved.
211        assert!(text.contains("SECRET_KEY="));
212        assert!(text.contains("DB_HOST="));
213    }
214
215    #[test]
216    fn export_prefix_preserved() {
217        let store = make_store();
218        let proc = EnvProcessor;
219        let content = b"export SECRET=hunter2\nDBPASS=s3cret\n";
220        let output = proc.process(content, &wildcard_profile(), &store).unwrap();
221        let text = String::from_utf8(output).unwrap();
222        assert!(!text.contains("hunter2"));
223        assert!(!text.contains("s3cret"));
224        // `export` keyword is kept.
225        assert!(text.contains("export SECRET="));
226        // Non-export line works too.
227        assert!(text.contains("DBPASS="));
228    }
229
230    #[test]
231    fn quoted_values() {
232        let store = make_store();
233        let proc = EnvProcessor;
234        let content = b"PW=\"my secret\"\nKEY='another secret'\n";
235        let output = proc.process(content, &wildcard_profile(), &store).unwrap();
236        let text = String::from_utf8(output).unwrap();
237        assert!(!text.contains("my secret"));
238        assert!(!text.contains("another secret"));
239        // Quote chars are preserved.
240        assert!(text.contains("PW=\""));
241        assert!(text.contains("KEY='"));
242    }
243
244    #[test]
245    fn comments_and_blanks_preserved() {
246        let store = make_store();
247        let proc = EnvProcessor;
248        let content = b"# This is a comment\n\nKEY=value\n";
249        let output = proc.process(content, &wildcard_profile(), &store).unwrap();
250        let text = String::from_utf8(output).unwrap();
251        assert!(text.contains("# This is a comment"));
252        assert!(text.contains("\n\n"));
253    }
254
255    #[test]
256    fn field_rule_targets_specific_key() {
257        let store = make_store();
258        let proc = EnvProcessor;
259        let content = b"SECRET=abc123\nPUBLIC_URL=https://example.com\n";
260        let profile = FileTypeProfile::new("env", vec![FieldRule::new("SECRET")]);
261        let output = proc.process(content, &profile, &store).unwrap();
262        let text = String::from_utf8(output).unwrap();
263        // SECRET replaced, PUBLIC_URL unchanged.
264        assert!(!text.contains("abc123"));
265        assert!(text.contains("https://example.com"));
266    }
267}