Skip to main content

safe_chains/
parse.rs

1pub fn split_outside_quotes(cmd: &str) -> Vec<String> {
2    let mut segments = Vec::new();
3    let mut current = String::new();
4    let mut in_single = false;
5    let mut in_double = false;
6    let mut escaped = false;
7    let mut chars = cmd.chars().peekable();
8
9    while let Some(c) = chars.next() {
10        if escaped {
11            current.push(c);
12            escaped = false;
13            continue;
14        }
15        if c == '\\' && !in_single {
16            escaped = true;
17            current.push(c);
18            continue;
19        }
20        if c == '\'' && !in_double {
21            in_single = !in_single;
22            current.push(c);
23            continue;
24        }
25        if c == '"' && !in_single {
26            in_double = !in_double;
27            current.push(c);
28            continue;
29        }
30        if !in_single && !in_double {
31            if c == '|' {
32                segments.push(current.clone());
33                current.clear();
34                continue;
35            }
36            if c == '&' && !current.ends_with('>') {
37                segments.push(current.clone());
38                current.clear();
39                if chars.peek() == Some(&'&') {
40                    chars.next();
41                }
42                continue;
43            }
44            if c == ';' || c == '\n' {
45                segments.push(current.clone());
46                current.clear();
47                continue;
48            }
49        }
50        current.push(c);
51    }
52    segments.push(current);
53    segments
54        .into_iter()
55        .map(|s| s.trim().to_string())
56        .filter(|s| !s.is_empty())
57        .collect()
58}
59
60pub fn tokenize(segment: &str) -> Option<Vec<String>> {
61    shell_words::split(segment).ok()
62}
63
64pub fn has_unsafe_shell_syntax(segment: &str) -> bool {
65    let mut in_single = false;
66    let mut in_double = false;
67    let mut escaped = false;
68    let chars: Vec<char> = segment.chars().collect();
69
70    for (i, &c) in chars.iter().enumerate() {
71        if escaped {
72            escaped = false;
73            continue;
74        }
75        if c == '\\' && !in_single {
76            escaped = true;
77            continue;
78        }
79        if c == '\'' && !in_double {
80            in_single = !in_single;
81            continue;
82        }
83        if c == '"' && !in_single {
84            in_double = !in_double;
85            continue;
86        }
87        if !in_single && !in_double {
88            if c == '>' || c == '<' {
89                let next = chars.get(i + 1);
90                if next == Some(&'&')
91                    && chars
92                        .get(i + 2)
93                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
94                {
95                    continue;
96                }
97                if is_dev_null_target(&chars, i + 1, c) {
98                    continue;
99                }
100                return true;
101            }
102            if c == '`' {
103                return true;
104            }
105            if c == '$' && chars.get(i + 1) == Some(&'(') {
106                return true;
107            }
108        }
109    }
110    false
111}
112
113const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
114
115fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
116    let mut j = start;
117    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
118        j += 1;
119    }
120    while j < chars.len() && chars[j] == ' ' {
121        j += 1;
122    }
123    if j + DEV_NULL.len() > chars.len() {
124        return false;
125    }
126    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
127        return false;
128    }
129    let end = j + DEV_NULL.len();
130    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
131}
132
133pub fn has_flag(tokens: &[String], short: &str, long: Option<&str>) -> bool {
134    let short_char = short.trim_start_matches('-');
135    for token in &tokens[1..] {
136        if token == "--" {
137            return false;
138        }
139        if let Some(long_flag) = long
140            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
141        {
142            return true;
143        }
144        if token.starts_with('-') && !token.starts_with("--") && token[1..].contains(short_char) {
145            return true;
146        }
147    }
148    false
149}
150
151pub fn is_fd_redirect(token: &str) -> bool {
152    let bytes = token.as_bytes();
153    if bytes.len() < 3 {
154        return false;
155    }
156    let start = usize::from(bytes[0].is_ascii_digit());
157    bytes.get(start) == Some(&b'>')
158        && bytes.get(start + 1) == Some(&b'&')
159        && bytes[start + 2..].iter().all(|b| b.is_ascii_digit() || *b == b'-')
160}
161
162pub fn strip_fd_redirects(s: &str) -> String {
163    match tokenize(s) {
164        Some(tokens) => {
165            let filtered: Vec<_> = tokens
166                .into_iter()
167                .filter(|t| !is_fd_redirect(t))
168                .collect();
169            shell_words::join(&filtered)
170        }
171        None => s.to_string(),
172    }
173}
174
175pub fn strip_env_prefix(segment: &str) -> &str {
176    let mut rest = segment;
177    loop {
178        let trimmed = rest.trim_start();
179        if trimmed.is_empty() {
180            return trimmed;
181        }
182        let bytes = trimmed.as_bytes();
183        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
184            return trimmed;
185        }
186        if let Some(eq_pos) = trimmed.find('=') {
187            let key = &trimmed[..eq_pos];
188            let valid_key = key
189                .bytes()
190                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
191            if !valid_key {
192                return trimmed;
193            }
194            if let Some(space_pos) = trimmed[eq_pos..].find(' ') {
195                rest = &trimmed[eq_pos + space_pos..];
196                continue;
197            }
198            return trimmed;
199        }
200        return trimmed;
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn split_pipe() {
210        assert_eq!(
211            split_outside_quotes("grep foo | head -5"),
212            vec!["grep foo", "head -5"]
213        );
214    }
215
216    #[test]
217    fn split_and() {
218        assert_eq!(
219            split_outside_quotes("ls && echo done"),
220            vec!["ls", "echo done"]
221        );
222    }
223
224    #[test]
225    fn split_semicolon() {
226        assert_eq!(
227            split_outside_quotes("ls; echo done"),
228            vec!["ls", "echo done"]
229        );
230    }
231
232    #[test]
233    fn split_preserves_quoted_pipes() {
234        assert_eq!(
235            split_outside_quotes("echo 'a | b' foo"),
236            vec!["echo 'a | b' foo"]
237        );
238    }
239
240    #[test]
241    fn split_background_operator() {
242        assert_eq!(
243            split_outside_quotes("cat file & rm -rf /"),
244            vec!["cat file", "rm -rf /"]
245        );
246    }
247
248    #[test]
249    fn split_newline() {
250        assert_eq!(
251            split_outside_quotes("echo foo\necho bar"),
252            vec!["echo foo", "echo bar"]
253        );
254    }
255
256    #[test]
257    fn unsafe_redirect() {
258        assert!(has_unsafe_shell_syntax("echo hello > file.txt"));
259    }
260
261    #[test]
262    fn safe_fd_redirect_stderr_to_stdout() {
263        assert!(!has_unsafe_shell_syntax("cargo clippy 2>&1"));
264    }
265
266    #[test]
267    fn safe_fd_redirect_close() {
268        assert!(!has_unsafe_shell_syntax("cmd 2>&-"));
269    }
270
271    #[test]
272    fn unsafe_redirect_ampersand_no_digit() {
273        assert!(has_unsafe_shell_syntax("echo hello >& file.txt"));
274    }
275
276    #[test]
277    fn unsafe_backtick() {
278        assert!(has_unsafe_shell_syntax("echo `rm -rf /`"));
279    }
280
281    #[test]
282    fn unsafe_command_substitution() {
283        assert!(has_unsafe_shell_syntax("echo $(rm -rf /)"));
284    }
285
286    #[test]
287    fn safe_quoted_dollar_paren() {
288        assert!(!has_unsafe_shell_syntax("echo '$(safe)' arg"));
289    }
290
291    #[test]
292    fn safe_quoted_redirect() {
293        assert!(!has_unsafe_shell_syntax("echo 'greater > than' test"));
294    }
295
296    #[test]
297    fn safe_no_special_chars() {
298        assert!(!has_unsafe_shell_syntax("grep pattern file"));
299    }
300
301    #[test]
302    fn safe_redirect_to_dev_null() {
303        assert!(!has_unsafe_shell_syntax("cmd >/dev/null"));
304    }
305
306    #[test]
307    fn safe_redirect_stderr_to_dev_null() {
308        assert!(!has_unsafe_shell_syntax("cmd 2>/dev/null"));
309    }
310
311    #[test]
312    fn safe_redirect_append_to_dev_null() {
313        assert!(!has_unsafe_shell_syntax("cmd >>/dev/null"));
314    }
315
316    #[test]
317    fn safe_redirect_space_dev_null() {
318        assert!(!has_unsafe_shell_syntax("cmd > /dev/null"));
319    }
320
321    #[test]
322    fn safe_redirect_input_dev_null() {
323        assert!(!has_unsafe_shell_syntax("cmd < /dev/null"));
324    }
325
326    #[test]
327    fn safe_redirect_both_dev_null() {
328        assert!(!has_unsafe_shell_syntax("cmd 2>/dev/null"));
329    }
330
331    #[test]
332    fn unsafe_redirect_dev_null_prefix() {
333        assert!(has_unsafe_shell_syntax("cmd > /dev/nullicious"));
334    }
335
336    #[test]
337    fn unsafe_redirect_dev_null_path_traversal() {
338        assert!(has_unsafe_shell_syntax("cmd > /dev/null/../etc/passwd"));
339    }
340
341    #[test]
342    fn unsafe_redirect_dev_null_subpath() {
343        assert!(has_unsafe_shell_syntax("cmd > /dev/null/foo"));
344    }
345
346    #[test]
347    fn unsafe_redirect_to_file() {
348        assert!(has_unsafe_shell_syntax("cmd > output.txt"));
349    }
350
351    #[test]
352    fn has_flag_short() {
353        let tokens: Vec<String> = vec!["sed", "-i", "s/foo/bar/"]
354            .into_iter()
355            .map(String::from)
356            .collect();
357        assert!(has_flag(&tokens, "-i", Some("--in-place")));
358    }
359
360    #[test]
361    fn has_flag_long_with_eq() {
362        let tokens: Vec<String> = vec!["sed", "--in-place=.bak", "s/foo/bar/"]
363            .into_iter()
364            .map(String::from)
365            .collect();
366        assert!(has_flag(&tokens, "-i", Some("--in-place")));
367    }
368
369    #[test]
370    fn has_flag_combined_short() {
371        let tokens: Vec<String> = vec!["sed", "-ni", "s/foo/bar/p"]
372            .into_iter()
373            .map(String::from)
374            .collect();
375        assert!(has_flag(&tokens, "-i", Some("--in-place")));
376    }
377
378    #[test]
379    fn has_flag_stops_at_double_dash() {
380        let tokens: Vec<String> = vec!["cmd", "--", "-i"]
381            .into_iter()
382            .map(String::from)
383            .collect();
384        assert!(!has_flag(&tokens, "-i", Some("--in-place")));
385    }
386
387    #[test]
388    fn strip_single_env_var() {
389        assert_eq!(strip_env_prefix("RACK_ENV=test bundle exec rspec"), "bundle exec rspec");
390    }
391
392    #[test]
393    fn strip_multiple_env_vars() {
394        assert_eq!(
395            strip_env_prefix("RACK_ENV=test RAILS_ENV=test bundle exec rspec"),
396            "bundle exec rspec"
397        );
398    }
399
400    #[test]
401    fn strip_no_env_var() {
402        assert_eq!(strip_env_prefix("bundle exec rspec"), "bundle exec rspec");
403    }
404
405    #[test]
406    fn tokenize_simple() {
407        assert_eq!(
408            tokenize("grep foo file.txt"),
409            Some(vec!["grep".to_string(), "foo".to_string(), "file.txt".to_string()])
410        );
411    }
412
413    #[test]
414    fn tokenize_quoted() {
415        assert_eq!(
416            tokenize("echo 'hello world'"),
417            Some(vec!["echo".to_string(), "hello world".to_string()])
418        );
419    }
420}