Skip to main content

safe_chains/
parse.rs

1#[derive(Debug, Clone, PartialEq, Eq)]
2pub struct CommandLine(String);
3
4#[derive(Debug, Clone, PartialEq, Eq)]
5pub struct Segment(String);
6
7#[derive(Debug, Clone, Eq)]
8pub struct Token(String);
9
10impl CommandLine {
11    pub fn new(s: impl Into<String>) -> Self {
12        Self(s.into())
13    }
14
15    pub fn as_str(&self) -> &str {
16        &self.0
17    }
18
19    pub fn segments(&self) -> Vec<Segment> {
20        split_outside_quotes(&self.0)
21            .into_iter()
22            .map(Segment)
23            .collect()
24    }
25}
26
27impl Segment {
28    pub fn as_str(&self) -> &str {
29        &self.0
30    }
31
32    pub fn is_empty(&self) -> bool {
33        self.0.is_empty()
34    }
35
36    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
37        Segment(shell_words::join(words))
38    }
39
40    pub fn tokenize(&self) -> Option<Vec<Token>> {
41        shell_words::split(&self.0)
42            .ok()
43            .map(|v| v.into_iter().map(Token).collect())
44    }
45
46    pub fn has_unsafe_shell_syntax(&self) -> bool {
47        check_unsafe_shell_syntax(&self.0)
48    }
49
50    pub fn strip_env_prefix(&self) -> Segment {
51        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
52    }
53
54    pub fn strip_fd_redirects(&self) -> Segment {
55        match self.tokenize() {
56            Some(tokens) => {
57                let filtered: Vec<_> = tokens
58                    .into_iter()
59                    .filter(|t| !t.is_fd_redirect())
60                    .collect();
61                Token::join(&filtered)
62            }
63            None => Segment(self.0.clone()),
64        }
65    }
66}
67
68impl Token {
69    pub fn as_str(&self) -> &str {
70        &self.0
71    }
72
73    pub fn join(tokens: &[Token]) -> Segment {
74        Segment::from_words(tokens)
75    }
76
77    pub fn as_command_line(&self) -> CommandLine {
78        CommandLine(self.0.clone())
79    }
80
81    pub fn command_name(&self) -> &str {
82        self.rsplit('/').next().unwrap_or(self.as_str())
83    }
84
85    pub fn is_fd_redirect(&self) -> bool {
86        let s = self.as_str();
87        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
88        if rest.len() < 2 || !rest.starts_with(">&") {
89            return false;
90        }
91        let after = &rest[2..];
92        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
93    }
94
95    pub fn is_dev_null_redirect(&self) -> bool {
96        let s = self.as_str();
97        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
98        rest.strip_prefix(">>")
99            .or_else(|| rest.strip_prefix('>'))
100            .or_else(|| rest.strip_prefix('<'))
101            .is_some_and(|after| after == "/dev/null")
102    }
103
104    pub fn is_redirect_operator(&self) -> bool {
105        let s = self.as_str();
106        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
107        matches!(rest, ">" | ">>" | "<")
108    }
109}
110
111impl std::ops::Deref for Token {
112    type Target = str;
113    fn deref(&self) -> &str {
114        &self.0
115    }
116}
117
118impl AsRef<str> for Token {
119    fn as_ref(&self) -> &str {
120        &self.0
121    }
122}
123
124impl PartialEq for Token {
125    fn eq(&self, other: &Self) -> bool {
126        self.0 == other.0
127    }
128}
129
130impl PartialEq<str> for Token {
131    fn eq(&self, other: &str) -> bool {
132        self.0 == other
133    }
134}
135
136impl PartialEq<&str> for Token {
137    fn eq(&self, other: &&str) -> bool {
138        self.0 == *other
139    }
140}
141
142impl PartialEq<Token> for str {
143    fn eq(&self, other: &Token) -> bool {
144        self == other.as_str()
145    }
146}
147
148impl PartialEq<Token> for &str {
149    fn eq(&self, other: &Token) -> bool {
150        *self == other.as_str()
151    }
152}
153
154impl std::fmt::Display for Token {
155    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156        f.write_str(&self.0)
157    }
158}
159
160pub fn has_flag(tokens: &[Token], short: &str, long: Option<&str>) -> bool {
161    let short_char = short.trim_start_matches('-');
162    for token in &tokens[1..] {
163        if token == "--" {
164            return false;
165        }
166        if let Some(long_flag) = long
167            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
168        {
169            return true;
170        }
171        if token.starts_with('-') && !token.starts_with("--") && token[1..].contains(short_char) {
172            return true;
173        }
174    }
175    false
176}
177
178fn split_outside_quotes(cmd: &str) -> Vec<String> {
179    let mut segments = Vec::new();
180    let mut current = String::new();
181    let mut in_single = false;
182    let mut in_double = false;
183    let mut escaped = false;
184    let mut chars = cmd.chars().peekable();
185
186    while let Some(c) = chars.next() {
187        if escaped {
188            current.push(c);
189            escaped = false;
190            continue;
191        }
192        if c == '\\' && !in_single {
193            escaped = true;
194            current.push(c);
195            continue;
196        }
197        if c == '\'' && !in_double {
198            in_single = !in_single;
199            current.push(c);
200            continue;
201        }
202        if c == '"' && !in_single {
203            in_double = !in_double;
204            current.push(c);
205            continue;
206        }
207        if !in_single && !in_double {
208            if c == '|' {
209                segments.push(std::mem::take(&mut current));
210                continue;
211            }
212            if c == '&' && !current.ends_with('>') {
213                segments.push(std::mem::take(&mut current));
214                if chars.peek() == Some(&'&') {
215                    chars.next();
216                }
217                continue;
218            }
219            if c == ';' || c == '\n' {
220                segments.push(std::mem::take(&mut current));
221                continue;
222            }
223        }
224        current.push(c);
225    }
226    segments.push(current);
227    segments
228        .into_iter()
229        .map(|s| s.trim().to_string())
230        .filter(|s| !s.is_empty())
231        .collect()
232}
233
234fn check_unsafe_shell_syntax(segment: &str) -> bool {
235    let mut in_single = false;
236    let mut in_double = false;
237    let mut escaped = false;
238    let chars: Vec<char> = segment.chars().collect();
239
240    for (i, &c) in chars.iter().enumerate() {
241        if escaped {
242            escaped = false;
243            continue;
244        }
245        if c == '\\' && !in_single {
246            escaped = true;
247            continue;
248        }
249        if c == '\'' && !in_double {
250            in_single = !in_single;
251            continue;
252        }
253        if c == '"' && !in_single {
254            in_double = !in_double;
255            continue;
256        }
257        if !in_single && !in_double {
258            if c == '>' || c == '<' {
259                let next = chars.get(i + 1);
260                if next == Some(&'&')
261                    && chars
262                        .get(i + 2)
263                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
264                {
265                    continue;
266                }
267                if is_dev_null_target(&chars, i + 1, c) {
268                    continue;
269                }
270                return true;
271            }
272            if c == '`' {
273                return true;
274            }
275            if c == '$' && chars.get(i + 1) == Some(&'(') {
276                return true;
277            }
278        }
279    }
280    false
281}
282
283const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
284
285fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
286    let mut j = start;
287    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
288        j += 1;
289    }
290    while j < chars.len() && chars[j] == ' ' {
291        j += 1;
292    }
293    if j + DEV_NULL.len() > chars.len() {
294        return false;
295    }
296    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
297        return false;
298    }
299    let end = j + DEV_NULL.len();
300    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
301}
302
303fn find_unquoted_space(s: &str) -> Option<usize> {
304    let mut in_single = false;
305    let mut in_double = false;
306    let mut escaped = false;
307    for (i, b) in s.bytes().enumerate() {
308        if escaped {
309            escaped = false;
310            continue;
311        }
312        if b == b'\\' && !in_single {
313            escaped = true;
314            continue;
315        }
316        if b == b'\'' && !in_double {
317            in_single = !in_single;
318            continue;
319        }
320        if b == b'"' && !in_single {
321            in_double = !in_double;
322            continue;
323        }
324        if b == b' ' && !in_single && !in_double {
325            return Some(i);
326        }
327    }
328    None
329}
330
331fn strip_env_prefix_str(segment: &str) -> &str {
332    let mut rest = segment;
333    loop {
334        let trimmed = rest.trim_start();
335        if trimmed.is_empty() {
336            return trimmed;
337        }
338        let bytes = trimmed.as_bytes();
339        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
340            return trimmed;
341        }
342        if let Some(eq_pos) = trimmed.find('=') {
343            let key = &trimmed[..eq_pos];
344            let valid_key = key
345                .bytes()
346                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
347            if !valid_key {
348                return trimmed;
349            }
350            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
351                rest = &trimmed[eq_pos + space_pos..];
352                continue;
353            }
354            return trimmed;
355        }
356        return trimmed;
357    }
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363
364    fn seg(s: &str) -> Segment {
365        Segment(s.to_string())
366    }
367
368    fn tok(s: &str) -> Token {
369        Token(s.to_string())
370    }
371
372    fn toks(words: &[&str]) -> Vec<Token> {
373        words.iter().map(|s| tok(s)).collect()
374    }
375
376    #[test]
377    fn split_pipe() {
378        let segs = CommandLine::new("grep foo | head -5").segments();
379        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
380    }
381
382    #[test]
383    fn split_and() {
384        let segs = CommandLine::new("ls && echo done").segments();
385        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
386    }
387
388    #[test]
389    fn split_semicolon() {
390        let segs = CommandLine::new("ls; echo done").segments();
391        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
392    }
393
394    #[test]
395    fn split_preserves_quoted_pipes() {
396        let segs = CommandLine::new("echo 'a | b' foo").segments();
397        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
398    }
399
400    #[test]
401    fn split_background_operator() {
402        let segs = CommandLine::new("cat file & rm -rf /").segments();
403        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
404    }
405
406    #[test]
407    fn split_newline() {
408        let segs = CommandLine::new("echo foo\necho bar").segments();
409        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
410    }
411
412    #[test]
413    fn unsafe_redirect() {
414        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
415    }
416
417    #[test]
418    fn safe_fd_redirect_stderr_to_stdout() {
419        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
420    }
421
422    #[test]
423    fn safe_fd_redirect_close() {
424        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
425    }
426
427    #[test]
428    fn unsafe_redirect_ampersand_no_digit() {
429        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
430    }
431
432    #[test]
433    fn unsafe_backtick() {
434        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
435    }
436
437    #[test]
438    fn unsafe_command_substitution() {
439        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
440    }
441
442    #[test]
443    fn safe_quoted_dollar_paren() {
444        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
445    }
446
447    #[test]
448    fn safe_quoted_redirect() {
449        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
450    }
451
452    #[test]
453    fn safe_no_special_chars() {
454        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
455    }
456
457    #[test]
458    fn safe_redirect_to_dev_null() {
459        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
460    }
461
462    #[test]
463    fn safe_redirect_stderr_to_dev_null() {
464        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
465    }
466
467    #[test]
468    fn safe_redirect_append_to_dev_null() {
469        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
470    }
471
472    #[test]
473    fn safe_redirect_space_dev_null() {
474        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
475    }
476
477    #[test]
478    fn safe_redirect_input_dev_null() {
479        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
480    }
481
482    #[test]
483    fn safe_redirect_both_dev_null() {
484        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
485    }
486
487    #[test]
488    fn unsafe_redirect_dev_null_prefix() {
489        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
490    }
491
492    #[test]
493    fn unsafe_redirect_dev_null_path_traversal() {
494        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
495    }
496
497    #[test]
498    fn unsafe_redirect_dev_null_subpath() {
499        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
500    }
501
502    #[test]
503    fn unsafe_redirect_to_file() {
504        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
505    }
506
507    #[test]
508    fn has_flag_short() {
509        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
510        assert!(has_flag(&tokens, "-i", Some("--in-place")));
511    }
512
513    #[test]
514    fn has_flag_long_with_eq() {
515        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
516        assert!(has_flag(&tokens, "-i", Some("--in-place")));
517    }
518
519    #[test]
520    fn has_flag_combined_short() {
521        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
522        assert!(has_flag(&tokens, "-i", Some("--in-place")));
523    }
524
525    #[test]
526    fn has_flag_stops_at_double_dash() {
527        let tokens = toks(&["cmd", "--", "-i"]);
528        assert!(!has_flag(&tokens, "-i", Some("--in-place")));
529    }
530
531    #[test]
532    fn strip_single_env_var() {
533        assert_eq!(
534            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
535            seg("bundle exec rspec")
536        );
537    }
538
539    #[test]
540    fn strip_multiple_env_vars() {
541        assert_eq!(
542            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
543            seg("bundle exec rspec")
544        );
545    }
546
547    #[test]
548    fn strip_no_env_var() {
549        assert_eq!(
550            seg("bundle exec rspec").strip_env_prefix(),
551            seg("bundle exec rspec")
552        );
553    }
554
555    #[test]
556    fn tokenize_simple() {
557        assert_eq!(
558            seg("grep foo file.txt").tokenize(),
559            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
560        );
561    }
562
563    #[test]
564    fn tokenize_quoted() {
565        assert_eq!(
566            seg("echo 'hello world'").tokenize(),
567            Some(vec![tok("echo"), tok("hello world")])
568        );
569    }
570
571    #[test]
572    fn strip_env_quoted_single() {
573        assert_eq!(
574            seg("FOO='bar baz' ls").strip_env_prefix(),
575            seg("ls")
576        );
577    }
578
579    #[test]
580    fn strip_env_quoted_double() {
581        assert_eq!(
582            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
583            seg("ls")
584        );
585    }
586
587    #[test]
588    fn strip_env_quoted_with_equals() {
589        assert_eq!(
590            seg("FOO='a=b' ls").strip_env_prefix(),
591            seg("ls")
592        );
593    }
594
595    #[test]
596    fn strip_env_quoted_multiple() {
597        assert_eq!(
598            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
599            seg("cmd")
600        );
601    }
602
603    #[test]
604    fn command_name_simple() {
605        assert_eq!(tok("ls").command_name(), "ls");
606    }
607
608    #[test]
609    fn command_name_with_path() {
610        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
611    }
612
613    #[test]
614    fn command_name_relative_path() {
615        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
616    }
617
618    #[test]
619    fn fd_redirect_detection() {
620        assert!(tok("2>&1").is_fd_redirect());
621        assert!(tok(">&2").is_fd_redirect());
622        assert!(tok("10>&1").is_fd_redirect());
623        assert!(tok("255>&2").is_fd_redirect());
624        assert!(tok("2>&-").is_fd_redirect());
625        assert!(tok("2>&10").is_fd_redirect());
626        assert!(!tok(">").is_fd_redirect());
627        assert!(!tok("/dev/null").is_fd_redirect());
628        assert!(!tok(">&").is_fd_redirect());
629        assert!(!tok("").is_fd_redirect());
630        assert!(!tok("42").is_fd_redirect());
631        assert!(!tok("123abc").is_fd_redirect());
632    }
633
634    #[test]
635    fn dev_null_redirect_single_token() {
636        assert!(tok(">/dev/null").is_dev_null_redirect());
637        assert!(tok(">>/dev/null").is_dev_null_redirect());
638        assert!(tok("2>/dev/null").is_dev_null_redirect());
639        assert!(tok("2>>/dev/null").is_dev_null_redirect());
640        assert!(tok("</dev/null").is_dev_null_redirect());
641        assert!(tok("10>/dev/null").is_dev_null_redirect());
642        assert!(tok("255>/dev/null").is_dev_null_redirect());
643        assert!(!tok(">/tmp/file").is_dev_null_redirect());
644        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
645        assert!(!tok("ls").is_dev_null_redirect());
646        assert!(!tok("").is_dev_null_redirect());
647        assert!(!tok("42").is_dev_null_redirect());
648        assert!(!tok("<</dev/null").is_dev_null_redirect());
649    }
650
651    #[test]
652    fn redirect_operator_detection() {
653        assert!(tok(">").is_redirect_operator());
654        assert!(tok(">>").is_redirect_operator());
655        assert!(tok("<").is_redirect_operator());
656        assert!(tok("2>").is_redirect_operator());
657        assert!(tok("2>>").is_redirect_operator());
658        assert!(tok("10>").is_redirect_operator());
659        assert!(tok("255>>").is_redirect_operator());
660        assert!(!tok("ls").is_redirect_operator());
661        assert!(!tok(">&1").is_redirect_operator());
662        assert!(!tok("/dev/null").is_redirect_operator());
663        assert!(!tok("").is_redirect_operator());
664        assert!(!tok("42").is_redirect_operator());
665        assert!(!tok("<<").is_redirect_operator());
666    }
667
668    #[test]
669    fn reverse_partial_eq() {
670        let t = tok("hello");
671        assert!("hello" == t);
672        assert!("world" != t);
673        let s: &str = "hello";
674        assert!(s == t);
675    }
676}