Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub fn contains(&self, s: &str) -> bool {
36        self.0.binary_search(&s).is_ok()
37    }
38
39    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
40        self.0.iter().copied()
41    }
42}
43
44const fn const_less(a: &[u8], b: &[u8]) -> bool {
45    let min = if a.len() < b.len() { a.len() } else { b.len() };
46    let mut i = 0;
47    while i < min {
48        if a[i] < b[i] {
49            return true;
50        }
51        if a[i] > b[i] {
52            return false;
53        }
54        i += 1;
55    }
56    a.len() < b.len()
57}
58
59
60impl CommandLine {
61    pub fn new(s: impl Into<String>) -> Self {
62        Self(s.into())
63    }
64
65    pub fn as_str(&self) -> &str {
66        &self.0
67    }
68
69    pub fn segments(&self) -> Vec<Segment> {
70        split_outside_quotes(&self.0)
71            .into_iter()
72            .map(Segment)
73            .collect()
74    }
75}
76
77impl Segment {
78    pub fn as_str(&self) -> &str {
79        &self.0
80    }
81
82    pub fn is_empty(&self) -> bool {
83        self.0.is_empty()
84    }
85
86    pub fn from_raw(s: String) -> Self {
87        Segment(s)
88    }
89
90    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
91        Segment(shell_words::join(words))
92    }
93
94    pub fn tokenize(&self) -> Option<Vec<Token>> {
95        shell_words::split(&self.0)
96            .ok()
97            .map(|v| v.into_iter().map(Token).collect())
98    }
99
100    pub fn has_unsafe_shell_syntax(&self) -> bool {
101        check_unsafe_shell_syntax(&self.0)
102    }
103
104    pub fn has_unsafe_redirects(&self) -> bool {
105        check_unsafe_redirects(&self.0)
106    }
107
108    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
109        extract_substitutions(&self.0)
110    }
111
112    pub fn strip_env_prefix(&self) -> Segment {
113        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
114    }
115
116    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
117        let words: Vec<&str> = tokens
118            .iter()
119            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
120            .collect();
121        Self::from_words(&words)
122    }
123
124    pub fn strip_fd_redirects(&self) -> Segment {
125        match self.tokenize() {
126            Some(tokens) => {
127                let filtered: Vec<_> = tokens
128                    .into_iter()
129                    .filter(|t| !t.is_fd_redirect())
130                    .collect();
131                Token::join(&filtered)
132            }
133            None => Segment(self.0.clone()),
134        }
135    }
136}
137
138impl Token {
139    #[cfg(test)]
140    pub(crate) fn from_test(s: &str) -> Self {
141        Self(s.to_string())
142    }
143
144    pub fn as_str(&self) -> &str {
145        &self.0
146    }
147
148    pub fn join(tokens: &[Token]) -> Segment {
149        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
150    }
151
152    pub fn as_command_line(&self) -> CommandLine {
153        CommandLine(self.0.clone())
154    }
155
156    pub fn command_name(&self) -> &str {
157        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
158    }
159
160    pub fn is_one_of(&self, options: &[&str]) -> bool {
161        options.contains(&self.as_str())
162    }
163
164    pub fn split_value(&self, sep: &str) -> Option<&str> {
165        self.as_str().split_once(sep).map(|(_, v)| v)
166    }
167
168    pub fn content_outside_double_quotes(&self) -> String {
169        let bytes = self.as_str().as_bytes();
170        let mut result = Vec::with_capacity(bytes.len());
171        let mut i = 0;
172        while i < bytes.len() {
173            if bytes[i] == b'"' {
174                result.push(b' ');
175                i += 1;
176                while i < bytes.len() {
177                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
178                        i += 2;
179                        continue;
180                    }
181                    if bytes[i] == b'"' {
182                        i += 1;
183                        break;
184                    }
185                    i += 1;
186                }
187            } else {
188                result.push(bytes[i]);
189                i += 1;
190            }
191        }
192        String::from_utf8(result).unwrap_or_default()
193    }
194
195    pub fn is_fd_redirect(&self) -> bool {
196        let s = self.as_str();
197        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
198        if rest.len() < 2 || !rest.starts_with(">&") {
199            return false;
200        }
201        let after = &rest[2..];
202        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
203    }
204
205    pub fn is_dev_null_redirect(&self) -> bool {
206        let s = self.as_str();
207        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
208        rest.strip_prefix(">>")
209            .or_else(|| rest.strip_prefix('>'))
210            .or_else(|| rest.strip_prefix('<'))
211            .is_some_and(|after| after == "/dev/null")
212    }
213
214    pub fn is_redirect_operator(&self) -> bool {
215        let s = self.as_str();
216        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
217        matches!(rest, ">" | ">>" | "<")
218    }
219}
220
221impl PartialEq<str> for Token {
222    fn eq(&self, other: &str) -> bool {
223        self.0 == other
224    }
225}
226
227impl PartialEq<&str> for Token {
228    fn eq(&self, other: &&str) -> bool {
229        self.0 == *other
230    }
231}
232
233impl PartialEq<Token> for str {
234    fn eq(&self, other: &Token) -> bool {
235        self == other.as_str()
236    }
237}
238
239impl PartialEq<Token> for &str {
240    fn eq(&self, other: &Token) -> bool {
241        *self == other.as_str()
242    }
243}
244
245impl std::fmt::Display for Token {
246    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247        f.write_str(&self.0)
248    }
249}
250
251pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
252    for token in &tokens[1..] {
253        if token == "--" {
254            return false;
255        }
256        if let Some(long_flag) = long
257            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
258        {
259            return true;
260        }
261        if let Some(short_flag) = short {
262            let short_char = short_flag.trim_start_matches('-');
263            if token.starts_with('-')
264                && !token.starts_with("--")
265                && token[1..].contains(short_char)
266            {
267                return true;
268            }
269        }
270    }
271    false
272}
273
274fn split_outside_quotes(cmd: &str) -> Vec<String> {
275    let mut segments = Vec::new();
276    let mut current = String::new();
277    let mut in_single = false;
278    let mut in_double = false;
279    let mut escaped = false;
280    let mut chars = cmd.chars().peekable();
281
282    while let Some(c) = chars.next() {
283        if escaped {
284            current.push(c);
285            escaped = false;
286            continue;
287        }
288        if c == '\\' && !in_single {
289            escaped = true;
290            current.push(c);
291            continue;
292        }
293        if c == '\'' && !in_double {
294            in_single = !in_single;
295            current.push(c);
296            continue;
297        }
298        if c == '"' && !in_single {
299            in_double = !in_double;
300            current.push(c);
301            continue;
302        }
303        if !in_single && !in_double {
304            if c == '|' {
305                segments.push(std::mem::take(&mut current));
306                continue;
307            }
308            if c == '&' && !current.ends_with('>') {
309                segments.push(std::mem::take(&mut current));
310                if chars.peek() == Some(&'&') {
311                    chars.next();
312                }
313                continue;
314            }
315            if c == ';' || c == '\n' {
316                segments.push(std::mem::take(&mut current));
317                continue;
318            }
319        }
320        current.push(c);
321    }
322    segments.push(current);
323    segments
324        .into_iter()
325        .map(|s| s.trim().to_string())
326        .filter(|s| !s.is_empty())
327        .collect()
328}
329
330fn check_unsafe_shell_syntax(segment: &str) -> bool {
331    let mut in_single = false;
332    let mut in_double = false;
333    let mut escaped = false;
334    let chars: Vec<char> = segment.chars().collect();
335    let mut skip_until = 0;
336
337    for (i, &c) in chars.iter().enumerate() {
338        if i < skip_until {
339            continue;
340        }
341        if escaped {
342            escaped = false;
343            continue;
344        }
345        if c == '\\' && !in_single {
346            escaped = true;
347            continue;
348        }
349        if c == '\'' && !in_double {
350            in_single = !in_single;
351            continue;
352        }
353        if c == '"' && !in_single {
354            in_double = !in_double;
355            continue;
356        }
357        if !in_single && !in_double {
358            if c == '>' || c == '<' {
359                if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
360                    skip_until = i + 3;
361                    continue;
362                }
363                let next = chars.get(i + 1);
364                if next == Some(&'&')
365                    && chars
366                        .get(i + 2)
367                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
368                {
369                    continue;
370                }
371                if is_dev_null_target(&chars, i + 1, c) {
372                    continue;
373                }
374                return true;
375            }
376            if c == '`' {
377                return true;
378            }
379            if c == '$' && chars.get(i + 1) == Some(&'(') {
380                return true;
381            }
382        }
383    }
384    false
385}
386
387fn check_unsafe_redirects(segment: &str) -> bool {
388    let mut in_single = false;
389    let mut in_double = false;
390    let mut escaped = false;
391    let chars: Vec<char> = segment.chars().collect();
392    let mut skip_until = 0;
393
394    for (i, &c) in chars.iter().enumerate() {
395        if i < skip_until {
396            continue;
397        }
398        if escaped {
399            escaped = false;
400            continue;
401        }
402        if c == '\\' && !in_single {
403            escaped = true;
404            continue;
405        }
406        if c == '\'' && !in_double {
407            in_single = !in_single;
408            continue;
409        }
410        if c == '"' && !in_single {
411            in_double = !in_double;
412            continue;
413        }
414        if !in_single && !in_double && (c == '>' || c == '<') {
415            if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
416                skip_until = i + 3;
417                continue;
418            }
419            let next = chars.get(i + 1);
420            if next == Some(&'&')
421                && chars
422                    .get(i + 2)
423                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
424            {
425                continue;
426            }
427            if is_dev_null_target(&chars, i + 1, c) {
428                continue;
429            }
430            return true;
431        }
432    }
433    false
434}
435
436fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
437    let mut subs = Vec::new();
438    let mut cleaned = String::with_capacity(segment.len());
439    let mut in_single = false;
440    let mut in_double = false;
441    let mut escaped = false;
442    let chars: Vec<char> = segment.chars().collect();
443    let mut i = 0;
444
445    while i < chars.len() {
446        if escaped {
447            escaped = false;
448            cleaned.push(chars[i]);
449            i += 1;
450            continue;
451        }
452        if chars[i] == '\\' && !in_single {
453            escaped = true;
454            cleaned.push(chars[i]);
455            i += 1;
456            continue;
457        }
458        if chars[i] == '\'' && !in_double {
459            in_single = !in_single;
460            cleaned.push(chars[i]);
461            i += 1;
462            continue;
463        }
464        if chars[i] == '"' && !in_single {
465            in_double = !in_double;
466            cleaned.push(chars[i]);
467            i += 1;
468            continue;
469        }
470        if !in_single {
471            if chars[i] == '`' {
472                let start = i + 1;
473                let end = find_matching_backtick(&chars, start).ok_or(())?;
474                let inner: String = chars[start..end].iter().collect();
475                subs.push(inner);
476                cleaned.push('_');
477                i = end + 1;
478                continue;
479            }
480            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
481                let start = i + 2;
482                let end = find_matching_paren(&chars, start).ok_or(())?;
483                let inner: String = chars[start..end].iter().collect();
484                subs.push(inner);
485                cleaned.push('_');
486                i = end + 1;
487                continue;
488            }
489        }
490        cleaned.push(chars[i]);
491        i += 1;
492    }
493    Ok((subs, cleaned))
494}
495
496fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
497    let mut in_single = false;
498    let mut in_double = false;
499    let mut escaped = false;
500    let mut i = start;
501    while i < chars.len() {
502        if escaped {
503            escaped = false;
504            i += 1;
505            continue;
506        }
507        if chars[i] == '\\' && !in_single {
508            escaped = true;
509            i += 1;
510            continue;
511        }
512        if chars[i] == '\'' && !in_double {
513            in_single = !in_single;
514            i += 1;
515            continue;
516        }
517        if chars[i] == '"' && !in_single {
518            in_double = !in_double;
519            i += 1;
520            continue;
521        }
522        if !in_single && !in_double && chars[i] == '`' {
523            return Some(i);
524        }
525        i += 1;
526    }
527    None
528}
529
530fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
531    let mut depth = 1u32;
532    let mut in_single = false;
533    let mut in_double = false;
534    let mut escaped = false;
535    let mut i = start;
536    while i < chars.len() {
537        if escaped {
538            escaped = false;
539            i += 1;
540            continue;
541        }
542        if chars[i] == '\\' && !in_single {
543            escaped = true;
544            i += 1;
545            continue;
546        }
547        if chars[i] == '\'' && !in_double {
548            in_single = !in_single;
549            i += 1;
550            continue;
551        }
552        if chars[i] == '"' && !in_single {
553            in_double = !in_double;
554            i += 1;
555            continue;
556        }
557        if !in_single && !in_double {
558            if chars[i] == '(' {
559                depth += 1;
560            } else if chars[i] == ')' {
561                depth -= 1;
562                if depth == 0 {
563                    return Some(i);
564                }
565            }
566        }
567        i += 1;
568    }
569    None
570}
571
572const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
573
574fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
575    let mut j = start;
576    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
577        j += 1;
578    }
579    while j < chars.len() && chars[j] == ' ' {
580        j += 1;
581    }
582    if j + DEV_NULL.len() > chars.len() {
583        return false;
584    }
585    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
586        return false;
587    }
588    let end = j + DEV_NULL.len();
589    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
590}
591
592fn find_unquoted_space(s: &str) -> Option<usize> {
593    let mut in_single = false;
594    let mut in_double = false;
595    let mut escaped = false;
596    for (i, b) in s.bytes().enumerate() {
597        if escaped {
598            escaped = false;
599            continue;
600        }
601        if b == b'\\' && !in_single {
602            escaped = true;
603            continue;
604        }
605        if b == b'\'' && !in_double {
606            in_single = !in_single;
607            continue;
608        }
609        if b == b'"' && !in_single {
610            in_double = !in_double;
611            continue;
612        }
613        if b == b' ' && !in_single && !in_double {
614            return Some(i);
615        }
616    }
617    None
618}
619
620fn strip_env_prefix_str(segment: &str) -> &str {
621    let mut rest = segment;
622    loop {
623        let trimmed = rest.trim_start();
624        if trimmed.is_empty() {
625            return trimmed;
626        }
627        let bytes = trimmed.as_bytes();
628        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
629            return trimmed;
630        }
631        if let Some(eq_pos) = trimmed.find('=') {
632            let key = &trimmed[..eq_pos];
633            let valid_key = key
634                .bytes()
635                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
636            if !valid_key {
637                return trimmed;
638            }
639            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
640                rest = &trimmed[eq_pos + space_pos..];
641                continue;
642            }
643            return trimmed;
644        }
645        return trimmed;
646    }
647}
648
649#[cfg(test)]
650mod tests {
651    use super::*;
652
653    fn seg(s: &str) -> Segment {
654        Segment(s.to_string())
655    }
656
657    fn tok(s: &str) -> Token {
658        Token(s.to_string())
659    }
660
661    fn toks(words: &[&str]) -> Vec<Token> {
662        words.iter().map(|s| tok(s)).collect()
663    }
664
665    #[test]
666    fn split_pipe() {
667        let segs = CommandLine::new("grep foo | head -5").segments();
668        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
669    }
670
671    #[test]
672    fn split_and() {
673        let segs = CommandLine::new("ls && echo done").segments();
674        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
675    }
676
677    #[test]
678    fn split_semicolon() {
679        let segs = CommandLine::new("ls; echo done").segments();
680        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
681    }
682
683    #[test]
684    fn split_preserves_quoted_pipes() {
685        let segs = CommandLine::new("echo 'a | b' foo").segments();
686        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
687    }
688
689    #[test]
690    fn split_background_operator() {
691        let segs = CommandLine::new("cat file & rm -rf /").segments();
692        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
693    }
694
695    #[test]
696    fn split_newline() {
697        let segs = CommandLine::new("echo foo\necho bar").segments();
698        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
699    }
700
701    #[test]
702    fn unsafe_redirect() {
703        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
704    }
705
706    #[test]
707    fn safe_fd_redirect_stderr_to_stdout() {
708        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
709    }
710
711    #[test]
712    fn safe_fd_redirect_close() {
713        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
714    }
715
716    #[test]
717    fn unsafe_redirect_ampersand_no_digit() {
718        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
719    }
720
721    #[test]
722    fn unsafe_backtick() {
723        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
724    }
725
726    #[test]
727    fn unsafe_command_substitution() {
728        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
729    }
730
731    #[test]
732    fn safe_quoted_dollar_paren() {
733        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
734    }
735
736    #[test]
737    fn safe_quoted_redirect() {
738        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
739    }
740
741    #[test]
742    fn safe_no_special_chars() {
743        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
744    }
745
746    #[test]
747    fn safe_redirect_to_dev_null() {
748        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
749    }
750
751    #[test]
752    fn safe_redirect_stderr_to_dev_null() {
753        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
754    }
755
756    #[test]
757    fn safe_redirect_append_to_dev_null() {
758        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
759    }
760
761    #[test]
762    fn safe_redirect_space_dev_null() {
763        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
764    }
765
766    #[test]
767    fn safe_redirect_input_dev_null() {
768        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
769    }
770
771    #[test]
772    fn safe_redirect_both_dev_null() {
773        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
774    }
775
776    #[test]
777    fn unsafe_redirect_dev_null_prefix() {
778        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
779    }
780
781    #[test]
782    fn unsafe_redirect_dev_null_path_traversal() {
783        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
784    }
785
786    #[test]
787    fn unsafe_redirect_dev_null_subpath() {
788        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
789    }
790
791    #[test]
792    fn unsafe_redirect_to_file() {
793        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
794    }
795
796    #[test]
797    fn safe_here_string() {
798        assert!(!seg("grep -c , <<< 'hello world'").has_unsafe_shell_syntax());
799    }
800
801    #[test]
802    fn safe_here_string_double_quoted() {
803        assert!(!seg("cat <<< \"some text\"").has_unsafe_shell_syntax());
804    }
805
806    #[test]
807    fn unsafe_heredoc_still_blocked() {
808        assert!(seg("cat << EOF").has_unsafe_shell_syntax());
809    }
810
811    #[test]
812    fn unsafe_input_redirect_still_blocked() {
813        assert!(seg("cmd < file.txt").has_unsafe_shell_syntax());
814    }
815
816    #[test]
817    fn has_flag_short() {
818        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
819        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
820    }
821
822    #[test]
823    fn has_flag_long_with_eq() {
824        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
825        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
826    }
827
828    #[test]
829    fn has_flag_combined_short() {
830        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
831        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
832    }
833
834    #[test]
835    fn has_flag_stops_at_double_dash() {
836        let tokens = toks(&["cmd", "--", "-i"]);
837        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
838    }
839
840    #[test]
841    fn has_flag_long_only() {
842        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
843        assert!(has_flag(&tokens, None, Some("--compress-program")));
844    }
845
846    #[test]
847    fn has_flag_long_only_eq() {
848        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
849        assert!(has_flag(&tokens, None, Some("--compress-program")));
850    }
851
852    #[test]
853    fn has_flag_long_only_absent() {
854        let tokens = toks(&["sort", "-r", "file.txt"]);
855        assert!(!has_flag(&tokens, None, Some("--compress-program")));
856    }
857
858    #[test]
859    fn strip_single_env_var() {
860        assert_eq!(
861            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
862            seg("bundle exec rspec")
863        );
864    }
865
866    #[test]
867    fn strip_multiple_env_vars() {
868        assert_eq!(
869            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
870            seg("bundle exec rspec")
871        );
872    }
873
874    #[test]
875    fn strip_no_env_var() {
876        assert_eq!(
877            seg("bundle exec rspec").strip_env_prefix(),
878            seg("bundle exec rspec")
879        );
880    }
881
882    #[test]
883    fn tokenize_simple() {
884        assert_eq!(
885            seg("grep foo file.txt").tokenize(),
886            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
887        );
888    }
889
890    #[test]
891    fn tokenize_quoted() {
892        assert_eq!(
893            seg("echo 'hello world'").tokenize(),
894            Some(vec![tok("echo"), tok("hello world")])
895        );
896    }
897
898    #[test]
899    fn strip_env_quoted_single() {
900        assert_eq!(
901            seg("FOO='bar baz' ls").strip_env_prefix(),
902            seg("ls")
903        );
904    }
905
906    #[test]
907    fn strip_env_quoted_double() {
908        assert_eq!(
909            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
910            seg("ls")
911        );
912    }
913
914    #[test]
915    fn strip_env_quoted_with_equals() {
916        assert_eq!(
917            seg("FOO='a=b' ls").strip_env_prefix(),
918            seg("ls")
919        );
920    }
921
922    #[test]
923    fn strip_env_quoted_multiple() {
924        assert_eq!(
925            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
926            seg("cmd")
927        );
928    }
929
930    #[test]
931    fn command_name_simple() {
932        assert_eq!(tok("ls").command_name(), "ls");
933    }
934
935    #[test]
936    fn command_name_with_path() {
937        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
938    }
939
940    #[test]
941    fn command_name_relative_path() {
942        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
943    }
944
945    #[test]
946    fn fd_redirect_detection() {
947        assert!(tok("2>&1").is_fd_redirect());
948        assert!(tok(">&2").is_fd_redirect());
949        assert!(tok("10>&1").is_fd_redirect());
950        assert!(tok("255>&2").is_fd_redirect());
951        assert!(tok("2>&-").is_fd_redirect());
952        assert!(tok("2>&10").is_fd_redirect());
953        assert!(!tok(">").is_fd_redirect());
954        assert!(!tok("/dev/null").is_fd_redirect());
955        assert!(!tok(">&").is_fd_redirect());
956        assert!(!tok("").is_fd_redirect());
957        assert!(!tok("42").is_fd_redirect());
958        assert!(!tok("123abc").is_fd_redirect());
959    }
960
961    #[test]
962    fn dev_null_redirect_single_token() {
963        assert!(tok(">/dev/null").is_dev_null_redirect());
964        assert!(tok(">>/dev/null").is_dev_null_redirect());
965        assert!(tok("2>/dev/null").is_dev_null_redirect());
966        assert!(tok("2>>/dev/null").is_dev_null_redirect());
967        assert!(tok("</dev/null").is_dev_null_redirect());
968        assert!(tok("10>/dev/null").is_dev_null_redirect());
969        assert!(tok("255>/dev/null").is_dev_null_redirect());
970        assert!(!tok(">/tmp/file").is_dev_null_redirect());
971        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
972        assert!(!tok("ls").is_dev_null_redirect());
973        assert!(!tok("").is_dev_null_redirect());
974        assert!(!tok("42").is_dev_null_redirect());
975        assert!(!tok("<</dev/null").is_dev_null_redirect());
976    }
977
978    #[test]
979    fn redirect_operator_detection() {
980        assert!(tok(">").is_redirect_operator());
981        assert!(tok(">>").is_redirect_operator());
982        assert!(tok("<").is_redirect_operator());
983        assert!(tok("2>").is_redirect_operator());
984        assert!(tok("2>>").is_redirect_operator());
985        assert!(tok("10>").is_redirect_operator());
986        assert!(tok("255>>").is_redirect_operator());
987        assert!(!tok("ls").is_redirect_operator());
988        assert!(!tok(">&1").is_redirect_operator());
989        assert!(!tok("/dev/null").is_redirect_operator());
990        assert!(!tok("").is_redirect_operator());
991        assert!(!tok("42").is_redirect_operator());
992        assert!(!tok("<<").is_redirect_operator());
993    }
994
995    #[test]
996    fn reverse_partial_eq() {
997        let t = tok("hello");
998        assert!("hello" == t);
999        assert!("world" != t);
1000        let s: &str = "hello";
1001        assert!(s == t);
1002    }
1003
1004    #[test]
1005    fn token_deref() {
1006        let t = tok("--flag");
1007        assert!(t.starts_with("--"));
1008        assert!(t.contains("fl"));
1009        assert_eq!(t.len(), 6);
1010        assert!(!t.is_empty());
1011        assert_eq!(t.as_bytes()[0], b'-');
1012        assert!(t.eq_ignore_ascii_case("--FLAG"));
1013        assert_eq!(t.get(2..), Some("flag"));
1014    }
1015
1016    #[test]
1017    fn token_is_one_of() {
1018        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1019        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1020    }
1021
1022    #[test]
1023    fn token_split_value() {
1024        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1025        assert_eq!(tok("--flag").split_value("="), None);
1026    }
1027
1028    #[test]
1029    fn word_set_contains() {
1030        let set = WordSet::new(&["list", "show", "view"]);
1031        assert!(set.contains(&tok("list")));
1032        assert!(set.contains(&tok("view")));
1033        assert!(!set.contains(&tok("delete")));
1034        assert!(set.contains("list"));
1035        assert!(!set.contains("delete"));
1036    }
1037
1038    #[test]
1039    fn word_set_iter() {
1040        let set = WordSet::new(&["a", "b", "c"]);
1041        let items: Vec<&str> = set.iter().collect();
1042        assert_eq!(items, vec!["a", "b", "c"]);
1043    }
1044
1045    #[test]
1046    fn token_as_command_line() {
1047        let cl = tok("ls -la | grep foo").as_command_line();
1048        let segs = cl.segments();
1049        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1050    }
1051
1052    #[test]
1053    fn segment_from_tokens_replacing() {
1054        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1055        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1056        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1057    }
1058
1059    #[test]
1060    fn segment_strip_fd_redirects() {
1061        assert_eq!(
1062            seg("cargo test 2>&1").strip_fd_redirects(),
1063            seg("cargo test")
1064        );
1065        assert_eq!(
1066            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1067            seg("cmd")
1068        );
1069        assert_eq!(
1070            seg("ls -la").strip_fd_redirects(),
1071            seg("ls -la")
1072        );
1073    }
1074
1075    #[test]
1076    fn content_outside_double_quotes_strips_string() {
1077        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1078    }
1079
1080    #[test]
1081    fn content_outside_double_quotes_preserves_code() {
1082        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1083        assert_eq!(result, r#"{print  } END{print NR}"#);
1084    }
1085
1086    #[test]
1087    fn content_outside_double_quotes_escaped() {
1088        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1089        assert_eq!(result, "{print  }");
1090    }
1091
1092    #[test]
1093    fn content_outside_double_quotes_no_quotes() {
1094        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1095    }
1096
1097    #[test]
1098    fn content_outside_double_quotes_empty() {
1099        assert_eq!(tok("").content_outside_double_quotes(), "");
1100    }
1101
1102    #[test]
1103    fn extract_subs_none() {
1104        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1105        assert!(subs.is_empty());
1106        assert_eq!(cleaned, "echo hello");
1107    }
1108
1109    #[test]
1110    fn extract_subs_dollar_paren() {
1111        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1112        assert_eq!(subs, vec!["ls"]);
1113        assert_eq!(cleaned, "echo _");
1114    }
1115
1116    #[test]
1117    fn extract_subs_backtick() {
1118        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1119        assert_eq!(subs, vec!["pwd"]);
1120        assert_eq!(cleaned, "ls _");
1121    }
1122
1123    #[test]
1124    fn extract_subs_multiple() {
1125        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1126        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1127        assert_eq!(cleaned, "echo _ _");
1128    }
1129
1130    #[test]
1131    fn extract_subs_nested() {
1132        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1133        assert_eq!(subs, vec!["echo $(ls)"]);
1134        assert_eq!(cleaned, "echo _");
1135    }
1136
1137    #[test]
1138    fn extract_subs_quoted_skipped() {
1139        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1140        assert!(subs.is_empty());
1141        assert_eq!(cleaned, "echo '$(safe)' arg");
1142    }
1143
1144    #[test]
1145    fn extract_subs_unmatched_backtick() {
1146        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1147    }
1148
1149    #[test]
1150    fn extract_subs_unmatched_paren() {
1151        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1152    }
1153
1154    #[test]
1155    fn unsafe_redirects_to_file() {
1156        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1157    }
1158
1159    #[test]
1160    fn unsafe_redirects_dev_null_ok() {
1161        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1162    }
1163
1164    #[test]
1165    fn unsafe_redirects_fd_ok() {
1166        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1167    }
1168
1169    #[test]
1170    fn unsafe_redirects_no_backtick_check() {
1171        assert!(!seg("echo `ls`").has_unsafe_redirects());
1172    }
1173
1174    #[test]
1175    fn unsafe_redirects_no_dollar_paren_check() {
1176        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1177    }
1178
1179    #[test]
1180    fn unsafe_redirects_here_string_ok() {
1181        assert!(!seg("grep -c , <<< 'hello'").has_unsafe_redirects());
1182    }
1183
1184    #[test]
1185    fn unsafe_redirects_heredoc_still_blocked() {
1186        assert!(seg("cat << EOF").has_unsafe_redirects());
1187    }
1188}