Skip to main content

safe_chains/
parse.rs

1pub fn split_outside_quotes(cmd: &str) -> Vec<String> {
2    let mut segments = Vec::new();
3    let mut current = String::new();
4    let mut in_single = false;
5    let mut in_double = false;
6    let mut escaped = false;
7    let mut chars = cmd.chars().peekable();
8
9    while let Some(c) = chars.next() {
10        if escaped {
11            current.push(c);
12            escaped = false;
13            continue;
14        }
15        if c == '\\' && !in_single {
16            escaped = true;
17            current.push(c);
18            continue;
19        }
20        if c == '\'' && !in_double {
21            in_single = !in_single;
22            current.push(c);
23            continue;
24        }
25        if c == '"' && !in_single {
26            in_double = !in_double;
27            current.push(c);
28            continue;
29        }
30        if !in_single && !in_double {
31            if c == '|' {
32                segments.push(current.clone());
33                current.clear();
34                continue;
35            }
36            if c == '&' && !current.ends_with('>') {
37                segments.push(current.clone());
38                current.clear();
39                if chars.peek() == Some(&'&') {
40                    chars.next();
41                }
42                continue;
43            }
44            if c == ';' || c == '\n' {
45                segments.push(current.clone());
46                current.clear();
47                continue;
48            }
49        }
50        current.push(c);
51    }
52    segments.push(current);
53    segments
54        .into_iter()
55        .map(|s| s.trim().to_string())
56        .filter(|s| !s.is_empty())
57        .collect()
58}
59
60pub fn tokenize(segment: &str) -> Option<Vec<String>> {
61    shell_words::split(segment).ok()
62}
63
64pub fn has_unsafe_shell_syntax(segment: &str) -> bool {
65    let mut in_single = false;
66    let mut in_double = false;
67    let mut escaped = false;
68    let chars: Vec<char> = segment.chars().collect();
69
70    for (i, &c) in chars.iter().enumerate() {
71        if escaped {
72            escaped = false;
73            continue;
74        }
75        if c == '\\' && !in_single {
76            escaped = true;
77            continue;
78        }
79        if c == '\'' && !in_double {
80            in_single = !in_single;
81            continue;
82        }
83        if c == '"' && !in_single {
84            in_double = !in_double;
85            continue;
86        }
87        if !in_single && !in_double {
88            if c == '>' || c == '<' {
89                let next = chars.get(i + 1);
90                if next == Some(&'&')
91                    && chars
92                        .get(i + 2)
93                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
94                {
95                    continue;
96                }
97                if is_dev_null_target(&chars, i + 1, c) {
98                    continue;
99                }
100                return true;
101            }
102            if c == '`' {
103                return true;
104            }
105            if c == '$' && chars.get(i + 1) == Some(&'(') {
106                return true;
107            }
108        }
109    }
110    false
111}
112
113const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
114
115fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
116    let mut j = start;
117    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
118        j += 1;
119    }
120    while j < chars.len() && chars[j] == ' ' {
121        j += 1;
122    }
123    if j + DEV_NULL.len() > chars.len() {
124        return false;
125    }
126    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
127        return false;
128    }
129    let end = j + DEV_NULL.len();
130    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
131}
132
133pub fn has_flag(tokens: &[String], short: &str, long: Option<&str>) -> bool {
134    let short_char = short.trim_start_matches('-');
135    for token in &tokens[1..] {
136        if token == "--" {
137            return false;
138        }
139        if let Some(long_flag) = long
140            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
141        {
142            return true;
143        }
144        if token.starts_with('-') && !token.starts_with("--") && token[1..].contains(short_char) {
145            return true;
146        }
147    }
148    false
149}
150
151pub fn strip_env_prefix(segment: &str) -> &str {
152    let mut rest = segment;
153    loop {
154        let trimmed = rest.trim_start();
155        if trimmed.is_empty() {
156            return trimmed;
157        }
158        let bytes = trimmed.as_bytes();
159        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
160            return trimmed;
161        }
162        if let Some(eq_pos) = trimmed.find('=') {
163            let key = &trimmed[..eq_pos];
164            let valid_key = key
165                .bytes()
166                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
167            if !valid_key {
168                return trimmed;
169            }
170            if let Some(space_pos) = trimmed[eq_pos..].find(' ') {
171                rest = &trimmed[eq_pos + space_pos..];
172                continue;
173            }
174            return trimmed;
175        }
176        return trimmed;
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    #[test]
185    fn split_pipe() {
186        assert_eq!(
187            split_outside_quotes("grep foo | head -5"),
188            vec!["grep foo", "head -5"]
189        );
190    }
191
192    #[test]
193    fn split_and() {
194        assert_eq!(
195            split_outside_quotes("ls && echo done"),
196            vec!["ls", "echo done"]
197        );
198    }
199
200    #[test]
201    fn split_semicolon() {
202        assert_eq!(
203            split_outside_quotes("ls; echo done"),
204            vec!["ls", "echo done"]
205        );
206    }
207
208    #[test]
209    fn split_preserves_quoted_pipes() {
210        assert_eq!(
211            split_outside_quotes("echo 'a | b' foo"),
212            vec!["echo 'a | b' foo"]
213        );
214    }
215
216    #[test]
217    fn split_background_operator() {
218        assert_eq!(
219            split_outside_quotes("cat file & rm -rf /"),
220            vec!["cat file", "rm -rf /"]
221        );
222    }
223
224    #[test]
225    fn split_newline() {
226        assert_eq!(
227            split_outside_quotes("echo foo\necho bar"),
228            vec!["echo foo", "echo bar"]
229        );
230    }
231
232    #[test]
233    fn unsafe_redirect() {
234        assert!(has_unsafe_shell_syntax("echo hello > file.txt"));
235    }
236
237    #[test]
238    fn safe_fd_redirect_stderr_to_stdout() {
239        assert!(!has_unsafe_shell_syntax("cargo clippy 2>&1"));
240    }
241
242    #[test]
243    fn safe_fd_redirect_close() {
244        assert!(!has_unsafe_shell_syntax("cmd 2>&-"));
245    }
246
247    #[test]
248    fn unsafe_redirect_ampersand_no_digit() {
249        assert!(has_unsafe_shell_syntax("echo hello >& file.txt"));
250    }
251
252    #[test]
253    fn unsafe_backtick() {
254        assert!(has_unsafe_shell_syntax("echo `rm -rf /`"));
255    }
256
257    #[test]
258    fn unsafe_command_substitution() {
259        assert!(has_unsafe_shell_syntax("echo $(rm -rf /)"));
260    }
261
262    #[test]
263    fn safe_quoted_dollar_paren() {
264        assert!(!has_unsafe_shell_syntax("echo '$(safe)' arg"));
265    }
266
267    #[test]
268    fn safe_quoted_redirect() {
269        assert!(!has_unsafe_shell_syntax("echo 'greater > than' test"));
270    }
271
272    #[test]
273    fn safe_no_special_chars() {
274        assert!(!has_unsafe_shell_syntax("grep pattern file"));
275    }
276
277    #[test]
278    fn safe_redirect_to_dev_null() {
279        assert!(!has_unsafe_shell_syntax("cmd >/dev/null"));
280    }
281
282    #[test]
283    fn safe_redirect_stderr_to_dev_null() {
284        assert!(!has_unsafe_shell_syntax("cmd 2>/dev/null"));
285    }
286
287    #[test]
288    fn safe_redirect_append_to_dev_null() {
289        assert!(!has_unsafe_shell_syntax("cmd >>/dev/null"));
290    }
291
292    #[test]
293    fn safe_redirect_space_dev_null() {
294        assert!(!has_unsafe_shell_syntax("cmd > /dev/null"));
295    }
296
297    #[test]
298    fn safe_redirect_input_dev_null() {
299        assert!(!has_unsafe_shell_syntax("cmd < /dev/null"));
300    }
301
302    #[test]
303    fn safe_redirect_both_dev_null() {
304        assert!(!has_unsafe_shell_syntax("cmd 2>/dev/null"));
305    }
306
307    #[test]
308    fn unsafe_redirect_dev_null_prefix() {
309        assert!(has_unsafe_shell_syntax("cmd > /dev/nullicious"));
310    }
311
312    #[test]
313    fn unsafe_redirect_dev_null_path_traversal() {
314        assert!(has_unsafe_shell_syntax("cmd > /dev/null/../etc/passwd"));
315    }
316
317    #[test]
318    fn unsafe_redirect_dev_null_subpath() {
319        assert!(has_unsafe_shell_syntax("cmd > /dev/null/foo"));
320    }
321
322    #[test]
323    fn unsafe_redirect_to_file() {
324        assert!(has_unsafe_shell_syntax("cmd > output.txt"));
325    }
326
327    #[test]
328    fn has_flag_short() {
329        let tokens: Vec<String> = vec!["sed", "-i", "s/foo/bar/"]
330            .into_iter()
331            .map(String::from)
332            .collect();
333        assert!(has_flag(&tokens, "-i", Some("--in-place")));
334    }
335
336    #[test]
337    fn has_flag_long_with_eq() {
338        let tokens: Vec<String> = vec!["sed", "--in-place=.bak", "s/foo/bar/"]
339            .into_iter()
340            .map(String::from)
341            .collect();
342        assert!(has_flag(&tokens, "-i", Some("--in-place")));
343    }
344
345    #[test]
346    fn has_flag_combined_short() {
347        let tokens: Vec<String> = vec!["sed", "-ni", "s/foo/bar/p"]
348            .into_iter()
349            .map(String::from)
350            .collect();
351        assert!(has_flag(&tokens, "-i", Some("--in-place")));
352    }
353
354    #[test]
355    fn has_flag_stops_at_double_dash() {
356        let tokens: Vec<String> = vec!["cmd", "--", "-i"]
357            .into_iter()
358            .map(String::from)
359            .collect();
360        assert!(!has_flag(&tokens, "-i", Some("--in-place")));
361    }
362
363    #[test]
364    fn strip_single_env_var() {
365        assert_eq!(strip_env_prefix("RACK_ENV=test bundle exec rspec"), "bundle exec rspec");
366    }
367
368    #[test]
369    fn strip_multiple_env_vars() {
370        assert_eq!(
371            strip_env_prefix("RACK_ENV=test RAILS_ENV=test bundle exec rspec"),
372            "bundle exec rspec"
373        );
374    }
375
376    #[test]
377    fn strip_no_env_var() {
378        assert_eq!(strip_env_prefix("bundle exec rspec"), "bundle exec rspec");
379    }
380
381    #[test]
382    fn tokenize_simple() {
383        assert_eq!(
384            tokenize("grep foo file.txt"),
385            Some(vec!["grep".to_string(), "foo".to_string(), "file.txt".to_string()])
386        );
387    }
388
389    #[test]
390    fn tokenize_quoted() {
391        assert_eq!(
392            tokenize("echo 'hello world'"),
393            Some(vec!["echo".to_string(), "hello world".to_string()])
394        );
395    }
396}