Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub fn contains(&self, s: &str) -> bool {
36        self.0.binary_search(&s).is_ok()
37    }
38
39    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
40        self.0.iter().copied()
41    }
42}
43
44const fn const_less(a: &[u8], b: &[u8]) -> bool {
45    let min = if a.len() < b.len() { a.len() } else { b.len() };
46    let mut i = 0;
47    while i < min {
48        if a[i] < b[i] {
49            return true;
50        }
51        if a[i] > b[i] {
52            return false;
53        }
54        i += 1;
55    }
56    a.len() < b.len()
57}
58
59pub struct FlagCheck {
60    required: WordSet,
61    denied: WordSet,
62}
63
64impl FlagCheck {
65    pub const fn new(required: &'static [&'static str], denied: &'static [&'static str]) -> Self {
66        Self {
67            required: WordSet::new(required),
68            denied: WordSet::new(denied),
69        }
70    }
71
72    pub fn required(&self) -> &WordSet {
73        &self.required
74    }
75
76    pub fn denied(&self) -> &WordSet {
77        &self.denied
78    }
79
80    pub fn is_safe(&self, tokens: &[Token]) -> bool {
81        tokens.iter().any(|t| self.required.contains(t))
82            && !tokens.iter().any(|t| self.denied.contains(t))
83    }
84}
85
86impl CommandLine {
87    pub fn new(s: impl Into<String>) -> Self {
88        Self(s.into())
89    }
90
91    pub fn as_str(&self) -> &str {
92        &self.0
93    }
94
95    pub fn segments(&self) -> Vec<Segment> {
96        split_outside_quotes(&self.0)
97            .into_iter()
98            .map(Segment)
99            .collect()
100    }
101}
102
103impl Segment {
104    pub fn as_str(&self) -> &str {
105        &self.0
106    }
107
108    pub fn is_empty(&self) -> bool {
109        self.0.is_empty()
110    }
111
112    pub fn from_raw(s: String) -> Self {
113        Segment(s)
114    }
115
116    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
117        Segment(shell_words::join(words))
118    }
119
120    pub fn tokenize(&self) -> Option<Vec<Token>> {
121        shell_words::split(&self.0)
122            .ok()
123            .map(|v| v.into_iter().map(Token).collect())
124    }
125
126    pub fn has_unsafe_shell_syntax(&self) -> bool {
127        check_unsafe_shell_syntax(&self.0)
128    }
129
130    pub fn has_unsafe_redirects(&self) -> bool {
131        check_unsafe_redirects(&self.0)
132    }
133
134    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
135        extract_substitutions(&self.0)
136    }
137
138    pub fn strip_env_prefix(&self) -> Segment {
139        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
140    }
141
142    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
143        let words: Vec<&str> = tokens
144            .iter()
145            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
146            .collect();
147        Self::from_words(&words)
148    }
149
150    pub fn strip_fd_redirects(&self) -> Segment {
151        match self.tokenize() {
152            Some(tokens) => {
153                let filtered: Vec<_> = tokens
154                    .into_iter()
155                    .filter(|t| !t.is_fd_redirect())
156                    .collect();
157                Token::join(&filtered)
158            }
159            None => Segment(self.0.clone()),
160        }
161    }
162}
163
164impl Token {
165    #[cfg(test)]
166    pub(crate) fn from_test(s: &str) -> Self {
167        Self(s.to_string())
168    }
169
170    pub fn as_str(&self) -> &str {
171        &self.0
172    }
173
174    pub fn join(tokens: &[Token]) -> Segment {
175        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
176    }
177
178    pub fn as_command_line(&self) -> CommandLine {
179        CommandLine(self.0.clone())
180    }
181
182    pub fn command_name(&self) -> &str {
183        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
184    }
185
186    pub fn is_one_of(&self, options: &[&str]) -> bool {
187        options.contains(&self.as_str())
188    }
189
190    pub fn split_value(&self, sep: &str) -> Option<&str> {
191        self.as_str().split_once(sep).map(|(_, v)| v)
192    }
193
194    pub fn content_outside_double_quotes(&self) -> String {
195        let bytes = self.as_str().as_bytes();
196        let mut result = Vec::with_capacity(bytes.len());
197        let mut i = 0;
198        while i < bytes.len() {
199            if bytes[i] == b'"' {
200                result.push(b' ');
201                i += 1;
202                while i < bytes.len() {
203                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
204                        i += 2;
205                        continue;
206                    }
207                    if bytes[i] == b'"' {
208                        i += 1;
209                        break;
210                    }
211                    i += 1;
212                }
213            } else {
214                result.push(bytes[i]);
215                i += 1;
216            }
217        }
218        String::from_utf8(result).unwrap_or_default()
219    }
220
221    pub fn is_fd_redirect(&self) -> bool {
222        let s = self.as_str();
223        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
224        if rest.len() < 2 || !rest.starts_with(">&") {
225            return false;
226        }
227        let after = &rest[2..];
228        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
229    }
230
231    pub fn is_dev_null_redirect(&self) -> bool {
232        let s = self.as_str();
233        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
234        rest.strip_prefix(">>")
235            .or_else(|| rest.strip_prefix('>'))
236            .or_else(|| rest.strip_prefix('<'))
237            .is_some_and(|after| after == "/dev/null")
238    }
239
240    pub fn is_redirect_operator(&self) -> bool {
241        let s = self.as_str();
242        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
243        matches!(rest, ">" | ">>" | "<")
244    }
245}
246
247impl PartialEq<str> for Token {
248    fn eq(&self, other: &str) -> bool {
249        self.0 == other
250    }
251}
252
253impl PartialEq<&str> for Token {
254    fn eq(&self, other: &&str) -> bool {
255        self.0 == *other
256    }
257}
258
259impl PartialEq<Token> for str {
260    fn eq(&self, other: &Token) -> bool {
261        self == other.as_str()
262    }
263}
264
265impl PartialEq<Token> for &str {
266    fn eq(&self, other: &Token) -> bool {
267        *self == other.as_str()
268    }
269}
270
271impl std::fmt::Display for Token {
272    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273        f.write_str(&self.0)
274    }
275}
276
277pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
278    for token in &tokens[1..] {
279        if token == "--" {
280            return false;
281        }
282        if let Some(long_flag) = long
283            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
284        {
285            return true;
286        }
287        if let Some(short_flag) = short {
288            let short_char = short_flag.trim_start_matches('-');
289            if token.starts_with('-')
290                && !token.starts_with("--")
291                && token[1..].contains(short_char)
292            {
293                return true;
294            }
295        }
296    }
297    false
298}
299
300fn split_outside_quotes(cmd: &str) -> Vec<String> {
301    let mut segments = Vec::new();
302    let mut current = String::new();
303    let mut in_single = false;
304    let mut in_double = false;
305    let mut escaped = false;
306    let mut chars = cmd.chars().peekable();
307
308    while let Some(c) = chars.next() {
309        if escaped {
310            current.push(c);
311            escaped = false;
312            continue;
313        }
314        if c == '\\' && !in_single {
315            escaped = true;
316            current.push(c);
317            continue;
318        }
319        if c == '\'' && !in_double {
320            in_single = !in_single;
321            current.push(c);
322            continue;
323        }
324        if c == '"' && !in_single {
325            in_double = !in_double;
326            current.push(c);
327            continue;
328        }
329        if !in_single && !in_double {
330            if c == '|' {
331                segments.push(std::mem::take(&mut current));
332                continue;
333            }
334            if c == '&' && !current.ends_with('>') {
335                segments.push(std::mem::take(&mut current));
336                if chars.peek() == Some(&'&') {
337                    chars.next();
338                }
339                continue;
340            }
341            if c == ';' || c == '\n' {
342                segments.push(std::mem::take(&mut current));
343                continue;
344            }
345        }
346        current.push(c);
347    }
348    segments.push(current);
349    segments
350        .into_iter()
351        .map(|s| s.trim().to_string())
352        .filter(|s| !s.is_empty())
353        .collect()
354}
355
356fn check_unsafe_shell_syntax(segment: &str) -> bool {
357    let mut in_single = false;
358    let mut in_double = false;
359    let mut escaped = false;
360    let chars: Vec<char> = segment.chars().collect();
361
362    for (i, &c) in chars.iter().enumerate() {
363        if escaped {
364            escaped = false;
365            continue;
366        }
367        if c == '\\' && !in_single {
368            escaped = true;
369            continue;
370        }
371        if c == '\'' && !in_double {
372            in_single = !in_single;
373            continue;
374        }
375        if c == '"' && !in_single {
376            in_double = !in_double;
377            continue;
378        }
379        if !in_single && !in_double {
380            if c == '>' || c == '<' {
381                let next = chars.get(i + 1);
382                if next == Some(&'&')
383                    && chars
384                        .get(i + 2)
385                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
386                {
387                    continue;
388                }
389                if is_dev_null_target(&chars, i + 1, c) {
390                    continue;
391                }
392                return true;
393            }
394            if c == '`' {
395                return true;
396            }
397            if c == '$' && chars.get(i + 1) == Some(&'(') {
398                return true;
399            }
400        }
401    }
402    false
403}
404
405fn check_unsafe_redirects(segment: &str) -> bool {
406    let mut in_single = false;
407    let mut in_double = false;
408    let mut escaped = false;
409    let chars: Vec<char> = segment.chars().collect();
410
411    for (i, &c) in chars.iter().enumerate() {
412        if escaped {
413            escaped = false;
414            continue;
415        }
416        if c == '\\' && !in_single {
417            escaped = true;
418            continue;
419        }
420        if c == '\'' && !in_double {
421            in_single = !in_single;
422            continue;
423        }
424        if c == '"' && !in_single {
425            in_double = !in_double;
426            continue;
427        }
428        if !in_single && !in_double && (c == '>' || c == '<') {
429            let next = chars.get(i + 1);
430            if next == Some(&'&')
431                && chars
432                    .get(i + 2)
433                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
434            {
435                continue;
436            }
437            if is_dev_null_target(&chars, i + 1, c) {
438                continue;
439            }
440            return true;
441        }
442    }
443    false
444}
445
446fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
447    let mut subs = Vec::new();
448    let mut cleaned = String::with_capacity(segment.len());
449    let mut in_single = false;
450    let mut in_double = false;
451    let mut escaped = false;
452    let chars: Vec<char> = segment.chars().collect();
453    let mut i = 0;
454
455    while i < chars.len() {
456        if escaped {
457            escaped = false;
458            cleaned.push(chars[i]);
459            i += 1;
460            continue;
461        }
462        if chars[i] == '\\' && !in_single {
463            escaped = true;
464            cleaned.push(chars[i]);
465            i += 1;
466            continue;
467        }
468        if chars[i] == '\'' && !in_double {
469            in_single = !in_single;
470            cleaned.push(chars[i]);
471            i += 1;
472            continue;
473        }
474        if chars[i] == '"' && !in_single {
475            in_double = !in_double;
476            cleaned.push(chars[i]);
477            i += 1;
478            continue;
479        }
480        if !in_single {
481            if chars[i] == '`' {
482                let start = i + 1;
483                let end = find_matching_backtick(&chars, start).ok_or(())?;
484                let inner: String = chars[start..end].iter().collect();
485                subs.push(inner);
486                cleaned.push('_');
487                i = end + 1;
488                continue;
489            }
490            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
491                let start = i + 2;
492                let end = find_matching_paren(&chars, start).ok_or(())?;
493                let inner: String = chars[start..end].iter().collect();
494                subs.push(inner);
495                cleaned.push('_');
496                i = end + 1;
497                continue;
498            }
499        }
500        cleaned.push(chars[i]);
501        i += 1;
502    }
503    Ok((subs, cleaned))
504}
505
506fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
507    let mut in_single = false;
508    let mut in_double = false;
509    let mut escaped = false;
510    let mut i = start;
511    while i < chars.len() {
512        if escaped {
513            escaped = false;
514            i += 1;
515            continue;
516        }
517        if chars[i] == '\\' && !in_single {
518            escaped = true;
519            i += 1;
520            continue;
521        }
522        if chars[i] == '\'' && !in_double {
523            in_single = !in_single;
524            i += 1;
525            continue;
526        }
527        if chars[i] == '"' && !in_single {
528            in_double = !in_double;
529            i += 1;
530            continue;
531        }
532        if !in_single && !in_double && chars[i] == '`' {
533            return Some(i);
534        }
535        i += 1;
536    }
537    None
538}
539
540fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
541    let mut depth = 1u32;
542    let mut in_single = false;
543    let mut in_double = false;
544    let mut escaped = false;
545    let mut i = start;
546    while i < chars.len() {
547        if escaped {
548            escaped = false;
549            i += 1;
550            continue;
551        }
552        if chars[i] == '\\' && !in_single {
553            escaped = true;
554            i += 1;
555            continue;
556        }
557        if chars[i] == '\'' && !in_double {
558            in_single = !in_single;
559            i += 1;
560            continue;
561        }
562        if chars[i] == '"' && !in_single {
563            in_double = !in_double;
564            i += 1;
565            continue;
566        }
567        if !in_single && !in_double {
568            if chars[i] == '(' {
569                depth += 1;
570            } else if chars[i] == ')' {
571                depth -= 1;
572                if depth == 0 {
573                    return Some(i);
574                }
575            }
576        }
577        i += 1;
578    }
579    None
580}
581
582const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
583
584fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
585    let mut j = start;
586    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
587        j += 1;
588    }
589    while j < chars.len() && chars[j] == ' ' {
590        j += 1;
591    }
592    if j + DEV_NULL.len() > chars.len() {
593        return false;
594    }
595    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
596        return false;
597    }
598    let end = j + DEV_NULL.len();
599    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
600}
601
602fn find_unquoted_space(s: &str) -> Option<usize> {
603    let mut in_single = false;
604    let mut in_double = false;
605    let mut escaped = false;
606    for (i, b) in s.bytes().enumerate() {
607        if escaped {
608            escaped = false;
609            continue;
610        }
611        if b == b'\\' && !in_single {
612            escaped = true;
613            continue;
614        }
615        if b == b'\'' && !in_double {
616            in_single = !in_single;
617            continue;
618        }
619        if b == b'"' && !in_single {
620            in_double = !in_double;
621            continue;
622        }
623        if b == b' ' && !in_single && !in_double {
624            return Some(i);
625        }
626    }
627    None
628}
629
630fn strip_env_prefix_str(segment: &str) -> &str {
631    let mut rest = segment;
632    loop {
633        let trimmed = rest.trim_start();
634        if trimmed.is_empty() {
635            return trimmed;
636        }
637        let bytes = trimmed.as_bytes();
638        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
639            return trimmed;
640        }
641        if let Some(eq_pos) = trimmed.find('=') {
642            let key = &trimmed[..eq_pos];
643            let valid_key = key
644                .bytes()
645                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
646            if !valid_key {
647                return trimmed;
648            }
649            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
650                rest = &trimmed[eq_pos + space_pos..];
651                continue;
652            }
653            return trimmed;
654        }
655        return trimmed;
656    }
657}
658
659#[cfg(test)]
660mod tests {
661    use super::*;
662
663    fn seg(s: &str) -> Segment {
664        Segment(s.to_string())
665    }
666
667    fn tok(s: &str) -> Token {
668        Token(s.to_string())
669    }
670
671    fn toks(words: &[&str]) -> Vec<Token> {
672        words.iter().map(|s| tok(s)).collect()
673    }
674
675    #[test]
676    fn split_pipe() {
677        let segs = CommandLine::new("grep foo | head -5").segments();
678        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
679    }
680
681    #[test]
682    fn split_and() {
683        let segs = CommandLine::new("ls && echo done").segments();
684        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
685    }
686
687    #[test]
688    fn split_semicolon() {
689        let segs = CommandLine::new("ls; echo done").segments();
690        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
691    }
692
693    #[test]
694    fn split_preserves_quoted_pipes() {
695        let segs = CommandLine::new("echo 'a | b' foo").segments();
696        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
697    }
698
699    #[test]
700    fn split_background_operator() {
701        let segs = CommandLine::new("cat file & rm -rf /").segments();
702        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
703    }
704
705    #[test]
706    fn split_newline() {
707        let segs = CommandLine::new("echo foo\necho bar").segments();
708        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
709    }
710
711    #[test]
712    fn unsafe_redirect() {
713        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
714    }
715
716    #[test]
717    fn safe_fd_redirect_stderr_to_stdout() {
718        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
719    }
720
721    #[test]
722    fn safe_fd_redirect_close() {
723        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
724    }
725
726    #[test]
727    fn unsafe_redirect_ampersand_no_digit() {
728        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
729    }
730
731    #[test]
732    fn unsafe_backtick() {
733        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
734    }
735
736    #[test]
737    fn unsafe_command_substitution() {
738        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
739    }
740
741    #[test]
742    fn safe_quoted_dollar_paren() {
743        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
744    }
745
746    #[test]
747    fn safe_quoted_redirect() {
748        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
749    }
750
751    #[test]
752    fn safe_no_special_chars() {
753        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
754    }
755
756    #[test]
757    fn safe_redirect_to_dev_null() {
758        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
759    }
760
761    #[test]
762    fn safe_redirect_stderr_to_dev_null() {
763        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
764    }
765
766    #[test]
767    fn safe_redirect_append_to_dev_null() {
768        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
769    }
770
771    #[test]
772    fn safe_redirect_space_dev_null() {
773        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
774    }
775
776    #[test]
777    fn safe_redirect_input_dev_null() {
778        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
779    }
780
781    #[test]
782    fn safe_redirect_both_dev_null() {
783        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
784    }
785
786    #[test]
787    fn unsafe_redirect_dev_null_prefix() {
788        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
789    }
790
791    #[test]
792    fn unsafe_redirect_dev_null_path_traversal() {
793        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
794    }
795
796    #[test]
797    fn unsafe_redirect_dev_null_subpath() {
798        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
799    }
800
801    #[test]
802    fn unsafe_redirect_to_file() {
803        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
804    }
805
806    #[test]
807    fn has_flag_short() {
808        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
809        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
810    }
811
812    #[test]
813    fn has_flag_long_with_eq() {
814        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
815        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
816    }
817
818    #[test]
819    fn has_flag_combined_short() {
820        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
821        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
822    }
823
824    #[test]
825    fn has_flag_stops_at_double_dash() {
826        let tokens = toks(&["cmd", "--", "-i"]);
827        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
828    }
829
830    #[test]
831    fn has_flag_long_only() {
832        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
833        assert!(has_flag(&tokens, None, Some("--compress-program")));
834    }
835
836    #[test]
837    fn has_flag_long_only_eq() {
838        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
839        assert!(has_flag(&tokens, None, Some("--compress-program")));
840    }
841
842    #[test]
843    fn has_flag_long_only_absent() {
844        let tokens = toks(&["sort", "-r", "file.txt"]);
845        assert!(!has_flag(&tokens, None, Some("--compress-program")));
846    }
847
848    #[test]
849    fn strip_single_env_var() {
850        assert_eq!(
851            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
852            seg("bundle exec rspec")
853        );
854    }
855
856    #[test]
857    fn strip_multiple_env_vars() {
858        assert_eq!(
859            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
860            seg("bundle exec rspec")
861        );
862    }
863
864    #[test]
865    fn strip_no_env_var() {
866        assert_eq!(
867            seg("bundle exec rspec").strip_env_prefix(),
868            seg("bundle exec rspec")
869        );
870    }
871
872    #[test]
873    fn tokenize_simple() {
874        assert_eq!(
875            seg("grep foo file.txt").tokenize(),
876            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
877        );
878    }
879
880    #[test]
881    fn tokenize_quoted() {
882        assert_eq!(
883            seg("echo 'hello world'").tokenize(),
884            Some(vec![tok("echo"), tok("hello world")])
885        );
886    }
887
888    #[test]
889    fn strip_env_quoted_single() {
890        assert_eq!(
891            seg("FOO='bar baz' ls").strip_env_prefix(),
892            seg("ls")
893        );
894    }
895
896    #[test]
897    fn strip_env_quoted_double() {
898        assert_eq!(
899            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
900            seg("ls")
901        );
902    }
903
904    #[test]
905    fn strip_env_quoted_with_equals() {
906        assert_eq!(
907            seg("FOO='a=b' ls").strip_env_prefix(),
908            seg("ls")
909        );
910    }
911
912    #[test]
913    fn strip_env_quoted_multiple() {
914        assert_eq!(
915            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
916            seg("cmd")
917        );
918    }
919
920    #[test]
921    fn command_name_simple() {
922        assert_eq!(tok("ls").command_name(), "ls");
923    }
924
925    #[test]
926    fn command_name_with_path() {
927        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
928    }
929
930    #[test]
931    fn command_name_relative_path() {
932        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
933    }
934
935    #[test]
936    fn fd_redirect_detection() {
937        assert!(tok("2>&1").is_fd_redirect());
938        assert!(tok(">&2").is_fd_redirect());
939        assert!(tok("10>&1").is_fd_redirect());
940        assert!(tok("255>&2").is_fd_redirect());
941        assert!(tok("2>&-").is_fd_redirect());
942        assert!(tok("2>&10").is_fd_redirect());
943        assert!(!tok(">").is_fd_redirect());
944        assert!(!tok("/dev/null").is_fd_redirect());
945        assert!(!tok(">&").is_fd_redirect());
946        assert!(!tok("").is_fd_redirect());
947        assert!(!tok("42").is_fd_redirect());
948        assert!(!tok("123abc").is_fd_redirect());
949    }
950
951    #[test]
952    fn dev_null_redirect_single_token() {
953        assert!(tok(">/dev/null").is_dev_null_redirect());
954        assert!(tok(">>/dev/null").is_dev_null_redirect());
955        assert!(tok("2>/dev/null").is_dev_null_redirect());
956        assert!(tok("2>>/dev/null").is_dev_null_redirect());
957        assert!(tok("</dev/null").is_dev_null_redirect());
958        assert!(tok("10>/dev/null").is_dev_null_redirect());
959        assert!(tok("255>/dev/null").is_dev_null_redirect());
960        assert!(!tok(">/tmp/file").is_dev_null_redirect());
961        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
962        assert!(!tok("ls").is_dev_null_redirect());
963        assert!(!tok("").is_dev_null_redirect());
964        assert!(!tok("42").is_dev_null_redirect());
965        assert!(!tok("<</dev/null").is_dev_null_redirect());
966    }
967
968    #[test]
969    fn redirect_operator_detection() {
970        assert!(tok(">").is_redirect_operator());
971        assert!(tok(">>").is_redirect_operator());
972        assert!(tok("<").is_redirect_operator());
973        assert!(tok("2>").is_redirect_operator());
974        assert!(tok("2>>").is_redirect_operator());
975        assert!(tok("10>").is_redirect_operator());
976        assert!(tok("255>>").is_redirect_operator());
977        assert!(!tok("ls").is_redirect_operator());
978        assert!(!tok(">&1").is_redirect_operator());
979        assert!(!tok("/dev/null").is_redirect_operator());
980        assert!(!tok("").is_redirect_operator());
981        assert!(!tok("42").is_redirect_operator());
982        assert!(!tok("<<").is_redirect_operator());
983    }
984
985    #[test]
986    fn reverse_partial_eq() {
987        let t = tok("hello");
988        assert!("hello" == t);
989        assert!("world" != t);
990        let s: &str = "hello";
991        assert!(s == t);
992    }
993
994    #[test]
995    fn token_deref() {
996        let t = tok("--flag");
997        assert!(t.starts_with("--"));
998        assert!(t.contains("fl"));
999        assert_eq!(t.len(), 6);
1000        assert!(!t.is_empty());
1001        assert_eq!(t.as_bytes()[0], b'-');
1002        assert!(t.eq_ignore_ascii_case("--FLAG"));
1003        assert_eq!(t.get(2..), Some("flag"));
1004    }
1005
1006    #[test]
1007    fn token_is_one_of() {
1008        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1009        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1010    }
1011
1012    #[test]
1013    fn token_split_value() {
1014        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1015        assert_eq!(tok("--flag").split_value("="), None);
1016    }
1017
1018    #[test]
1019    fn word_set_contains() {
1020        let set = WordSet::new(&["list", "show", "view"]);
1021        assert!(set.contains(&tok("list")));
1022        assert!(set.contains(&tok("view")));
1023        assert!(!set.contains(&tok("delete")));
1024        assert!(set.contains("list"));
1025        assert!(!set.contains("delete"));
1026    }
1027
1028    #[test]
1029    fn word_set_iter() {
1030        let set = WordSet::new(&["a", "b", "c"]);
1031        let items: Vec<&str> = set.iter().collect();
1032        assert_eq!(items, vec!["a", "b", "c"]);
1033    }
1034
1035    #[test]
1036    fn token_as_command_line() {
1037        let cl = tok("ls -la | grep foo").as_command_line();
1038        let segs = cl.segments();
1039        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1040    }
1041
1042    #[test]
1043    fn segment_from_tokens_replacing() {
1044        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1045        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1046        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1047    }
1048
1049    #[test]
1050    fn segment_strip_fd_redirects() {
1051        assert_eq!(
1052            seg("cargo test 2>&1").strip_fd_redirects(),
1053            seg("cargo test")
1054        );
1055        assert_eq!(
1056            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1057            seg("cmd")
1058        );
1059        assert_eq!(
1060            seg("ls -la").strip_fd_redirects(),
1061            seg("ls -la")
1062        );
1063    }
1064
1065    #[test]
1066    fn flag_check_required_present_no_denied() {
1067        let fc = FlagCheck::new(&["--show"], &["--set"]);
1068        assert!(fc.is_safe(&toks(&["--show"])));
1069    }
1070
1071    #[test]
1072    fn flag_check_required_absent() {
1073        let fc = FlagCheck::new(&["--show"], &["--set"]);
1074        assert!(!fc.is_safe(&toks(&["--verbose"])));
1075    }
1076
1077    #[test]
1078    fn flag_check_denied_present() {
1079        let fc = FlagCheck::new(&["--show"], &["--set"]);
1080        assert!(!fc.is_safe(&toks(&["--show", "--set", "key", "val"])));
1081    }
1082
1083    #[test]
1084    fn flag_check_empty_denied() {
1085        let fc = FlagCheck::new(&["--check"], &[]);
1086        assert!(fc.is_safe(&toks(&["--check", "--all"])));
1087    }
1088
1089    #[test]
1090    fn flag_check_empty_tokens() {
1091        let fc = FlagCheck::new(&["--show"], &[]);
1092        assert!(!fc.is_safe(&[]));
1093    }
1094
1095    #[test]
1096    fn content_outside_double_quotes_strips_string() {
1097        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1098    }
1099
1100    #[test]
1101    fn content_outside_double_quotes_preserves_code() {
1102        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1103        assert_eq!(result, r#"{print  } END{print NR}"#);
1104    }
1105
1106    #[test]
1107    fn content_outside_double_quotes_escaped() {
1108        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1109        assert_eq!(result, "{print  }");
1110    }
1111
1112    #[test]
1113    fn content_outside_double_quotes_no_quotes() {
1114        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1115    }
1116
1117    #[test]
1118    fn content_outside_double_quotes_empty() {
1119        assert_eq!(tok("").content_outside_double_quotes(), "");
1120    }
1121
1122    #[test]
1123    fn extract_subs_none() {
1124        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1125        assert!(subs.is_empty());
1126        assert_eq!(cleaned, "echo hello");
1127    }
1128
1129    #[test]
1130    fn extract_subs_dollar_paren() {
1131        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1132        assert_eq!(subs, vec!["ls"]);
1133        assert_eq!(cleaned, "echo _");
1134    }
1135
1136    #[test]
1137    fn extract_subs_backtick() {
1138        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1139        assert_eq!(subs, vec!["pwd"]);
1140        assert_eq!(cleaned, "ls _");
1141    }
1142
1143    #[test]
1144    fn extract_subs_multiple() {
1145        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1146        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1147        assert_eq!(cleaned, "echo _ _");
1148    }
1149
1150    #[test]
1151    fn extract_subs_nested() {
1152        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1153        assert_eq!(subs, vec!["echo $(ls)"]);
1154        assert_eq!(cleaned, "echo _");
1155    }
1156
1157    #[test]
1158    fn extract_subs_quoted_skipped() {
1159        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1160        assert!(subs.is_empty());
1161        assert_eq!(cleaned, "echo '$(safe)' arg");
1162    }
1163
1164    #[test]
1165    fn extract_subs_unmatched_backtick() {
1166        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1167    }
1168
1169    #[test]
1170    fn extract_subs_unmatched_paren() {
1171        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1172    }
1173
1174    #[test]
1175    fn unsafe_redirects_to_file() {
1176        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1177    }
1178
1179    #[test]
1180    fn unsafe_redirects_dev_null_ok() {
1181        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1182    }
1183
1184    #[test]
1185    fn unsafe_redirects_fd_ok() {
1186        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1187    }
1188
1189    #[test]
1190    fn unsafe_redirects_no_backtick_check() {
1191        assert!(!seg("echo `ls`").has_unsafe_redirects());
1192    }
1193
1194    #[test]
1195    fn unsafe_redirects_no_dollar_paren_check() {
1196        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1197    }
1198}