Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19pub struct WordSet(&'static [&'static str]);
20
21impl WordSet {
22    pub const fn new(words: &'static [&'static str]) -> Self {
23        let mut i = 1;
24        while i < words.len() {
25            assert!(
26                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
27                "WordSet: entries must be sorted, no duplicates"
28            );
29            i += 1;
30        }
31        Self(words)
32    }
33
34    pub fn contains(&self, s: &str) -> bool {
35        self.0.binary_search(&s).is_ok()
36    }
37
38    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
39        self.0.iter().copied()
40    }
41}
42
43const fn const_less(a: &[u8], b: &[u8]) -> bool {
44    let min = if a.len() < b.len() { a.len() } else { b.len() };
45    let mut i = 0;
46    while i < min {
47        if a[i] < b[i] {
48            return true;
49        }
50        if a[i] > b[i] {
51            return false;
52        }
53        i += 1;
54    }
55    a.len() < b.len()
56}
57
58pub struct FlagCheck {
59    required: WordSet,
60    denied: WordSet,
61}
62
63impl FlagCheck {
64    pub const fn new(required: &'static [&'static str], denied: &'static [&'static str]) -> Self {
65        Self {
66            required: WordSet::new(required),
67            denied: WordSet::new(denied),
68        }
69    }
70
71    pub fn required(&self) -> &WordSet {
72        &self.required
73    }
74
75    pub fn denied(&self) -> &WordSet {
76        &self.denied
77    }
78
79    pub fn is_safe(&self, tokens: &[Token]) -> bool {
80        tokens.iter().any(|t| self.required.contains(t))
81            && !tokens.iter().any(|t| self.denied.contains(t))
82    }
83}
84
85impl CommandLine {
86    pub fn new(s: impl Into<String>) -> Self {
87        Self(s.into())
88    }
89
90    pub fn as_str(&self) -> &str {
91        &self.0
92    }
93
94    pub fn segments(&self) -> Vec<Segment> {
95        split_outside_quotes(&self.0)
96            .into_iter()
97            .map(Segment)
98            .collect()
99    }
100}
101
102impl Segment {
103    pub fn as_str(&self) -> &str {
104        &self.0
105    }
106
107    pub fn is_empty(&self) -> bool {
108        self.0.is_empty()
109    }
110
111    pub fn from_raw(s: String) -> Self {
112        Segment(s)
113    }
114
115    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
116        Segment(shell_words::join(words))
117    }
118
119    pub fn tokenize(&self) -> Option<Vec<Token>> {
120        shell_words::split(&self.0)
121            .ok()
122            .map(|v| v.into_iter().map(Token).collect())
123    }
124
125    pub fn has_unsafe_shell_syntax(&self) -> bool {
126        check_unsafe_shell_syntax(&self.0)
127    }
128
129    pub fn has_unsafe_redirects(&self) -> bool {
130        check_unsafe_redirects(&self.0)
131    }
132
133    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
134        extract_substitutions(&self.0)
135    }
136
137    pub fn strip_env_prefix(&self) -> Segment {
138        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
139    }
140
141    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
142        let words: Vec<&str> = tokens
143            .iter()
144            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
145            .collect();
146        Self::from_words(&words)
147    }
148
149    pub fn strip_fd_redirects(&self) -> Segment {
150        match self.tokenize() {
151            Some(tokens) => {
152                let filtered: Vec<_> = tokens
153                    .into_iter()
154                    .filter(|t| !t.is_fd_redirect())
155                    .collect();
156                Token::join(&filtered)
157            }
158            None => Segment(self.0.clone()),
159        }
160    }
161}
162
163impl Token {
164    pub fn as_str(&self) -> &str {
165        &self.0
166    }
167
168    pub fn join(tokens: &[Token]) -> Segment {
169        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
170    }
171
172    pub fn as_command_line(&self) -> CommandLine {
173        CommandLine(self.0.clone())
174    }
175
176    pub fn command_name(&self) -> &str {
177        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
178    }
179
180    pub fn is_one_of(&self, options: &[&str]) -> bool {
181        options.contains(&self.as_str())
182    }
183
184    pub fn split_value(&self, sep: &str) -> Option<&str> {
185        self.as_str().split_once(sep).map(|(_, v)| v)
186    }
187
188    pub fn content_outside_double_quotes(&self) -> String {
189        let bytes = self.as_str().as_bytes();
190        let mut result = Vec::with_capacity(bytes.len());
191        let mut i = 0;
192        while i < bytes.len() {
193            if bytes[i] == b'"' {
194                result.push(b' ');
195                i += 1;
196                while i < bytes.len() {
197                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
198                        i += 2;
199                        continue;
200                    }
201                    if bytes[i] == b'"' {
202                        i += 1;
203                        break;
204                    }
205                    i += 1;
206                }
207            } else {
208                result.push(bytes[i]);
209                i += 1;
210            }
211        }
212        String::from_utf8(result).unwrap_or_default()
213    }
214
215    pub fn is_fd_redirect(&self) -> bool {
216        let s = self.as_str();
217        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
218        if rest.len() < 2 || !rest.starts_with(">&") {
219            return false;
220        }
221        let after = &rest[2..];
222        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
223    }
224
225    pub fn is_dev_null_redirect(&self) -> bool {
226        let s = self.as_str();
227        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
228        rest.strip_prefix(">>")
229            .or_else(|| rest.strip_prefix('>'))
230            .or_else(|| rest.strip_prefix('<'))
231            .is_some_and(|after| after == "/dev/null")
232    }
233
234    pub fn is_redirect_operator(&self) -> bool {
235        let s = self.as_str();
236        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
237        matches!(rest, ">" | ">>" | "<")
238    }
239}
240
241impl PartialEq<str> for Token {
242    fn eq(&self, other: &str) -> bool {
243        self.0 == other
244    }
245}
246
247impl PartialEq<&str> for Token {
248    fn eq(&self, other: &&str) -> bool {
249        self.0 == *other
250    }
251}
252
253impl PartialEq<Token> for str {
254    fn eq(&self, other: &Token) -> bool {
255        self == other.as_str()
256    }
257}
258
259impl PartialEq<Token> for &str {
260    fn eq(&self, other: &Token) -> bool {
261        *self == other.as_str()
262    }
263}
264
265impl std::fmt::Display for Token {
266    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
267        f.write_str(&self.0)
268    }
269}
270
271pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
272    for token in &tokens[1..] {
273        if token == "--" {
274            return false;
275        }
276        if let Some(long_flag) = long
277            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
278        {
279            return true;
280        }
281        if let Some(short_flag) = short {
282            let short_char = short_flag.trim_start_matches('-');
283            if token.starts_with('-')
284                && !token.starts_with("--")
285                && token[1..].contains(short_char)
286            {
287                return true;
288            }
289        }
290    }
291    false
292}
293
294fn split_outside_quotes(cmd: &str) -> Vec<String> {
295    let mut segments = Vec::new();
296    let mut current = String::new();
297    let mut in_single = false;
298    let mut in_double = false;
299    let mut escaped = false;
300    let mut chars = cmd.chars().peekable();
301
302    while let Some(c) = chars.next() {
303        if escaped {
304            current.push(c);
305            escaped = false;
306            continue;
307        }
308        if c == '\\' && !in_single {
309            escaped = true;
310            current.push(c);
311            continue;
312        }
313        if c == '\'' && !in_double {
314            in_single = !in_single;
315            current.push(c);
316            continue;
317        }
318        if c == '"' && !in_single {
319            in_double = !in_double;
320            current.push(c);
321            continue;
322        }
323        if !in_single && !in_double {
324            if c == '|' {
325                segments.push(std::mem::take(&mut current));
326                continue;
327            }
328            if c == '&' && !current.ends_with('>') {
329                segments.push(std::mem::take(&mut current));
330                if chars.peek() == Some(&'&') {
331                    chars.next();
332                }
333                continue;
334            }
335            if c == ';' || c == '\n' {
336                segments.push(std::mem::take(&mut current));
337                continue;
338            }
339        }
340        current.push(c);
341    }
342    segments.push(current);
343    segments
344        .into_iter()
345        .map(|s| s.trim().to_string())
346        .filter(|s| !s.is_empty())
347        .collect()
348}
349
350fn check_unsafe_shell_syntax(segment: &str) -> bool {
351    let mut in_single = false;
352    let mut in_double = false;
353    let mut escaped = false;
354    let chars: Vec<char> = segment.chars().collect();
355
356    for (i, &c) in chars.iter().enumerate() {
357        if escaped {
358            escaped = false;
359            continue;
360        }
361        if c == '\\' && !in_single {
362            escaped = true;
363            continue;
364        }
365        if c == '\'' && !in_double {
366            in_single = !in_single;
367            continue;
368        }
369        if c == '"' && !in_single {
370            in_double = !in_double;
371            continue;
372        }
373        if !in_single && !in_double {
374            if c == '>' || c == '<' {
375                let next = chars.get(i + 1);
376                if next == Some(&'&')
377                    && chars
378                        .get(i + 2)
379                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
380                {
381                    continue;
382                }
383                if is_dev_null_target(&chars, i + 1, c) {
384                    continue;
385                }
386                return true;
387            }
388            if c == '`' {
389                return true;
390            }
391            if c == '$' && chars.get(i + 1) == Some(&'(') {
392                return true;
393            }
394        }
395    }
396    false
397}
398
399fn check_unsafe_redirects(segment: &str) -> bool {
400    let mut in_single = false;
401    let mut in_double = false;
402    let mut escaped = false;
403    let chars: Vec<char> = segment.chars().collect();
404
405    for (i, &c) in chars.iter().enumerate() {
406        if escaped {
407            escaped = false;
408            continue;
409        }
410        if c == '\\' && !in_single {
411            escaped = true;
412            continue;
413        }
414        if c == '\'' && !in_double {
415            in_single = !in_single;
416            continue;
417        }
418        if c == '"' && !in_single {
419            in_double = !in_double;
420            continue;
421        }
422        if !in_single && !in_double && (c == '>' || c == '<') {
423            let next = chars.get(i + 1);
424            if next == Some(&'&')
425                && chars
426                    .get(i + 2)
427                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
428            {
429                continue;
430            }
431            if is_dev_null_target(&chars, i + 1, c) {
432                continue;
433            }
434            return true;
435        }
436    }
437    false
438}
439
440fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
441    let mut subs = Vec::new();
442    let mut cleaned = String::with_capacity(segment.len());
443    let mut in_single = false;
444    let mut in_double = false;
445    let mut escaped = false;
446    let chars: Vec<char> = segment.chars().collect();
447    let mut i = 0;
448
449    while i < chars.len() {
450        if escaped {
451            escaped = false;
452            cleaned.push(chars[i]);
453            i += 1;
454            continue;
455        }
456        if chars[i] == '\\' && !in_single {
457            escaped = true;
458            cleaned.push(chars[i]);
459            i += 1;
460            continue;
461        }
462        if chars[i] == '\'' && !in_double {
463            in_single = !in_single;
464            cleaned.push(chars[i]);
465            i += 1;
466            continue;
467        }
468        if chars[i] == '"' && !in_single {
469            in_double = !in_double;
470            cleaned.push(chars[i]);
471            i += 1;
472            continue;
473        }
474        if !in_single {
475            if chars[i] == '`' {
476                let start = i + 1;
477                let end = find_matching_backtick(&chars, start).ok_or(())?;
478                let inner: String = chars[start..end].iter().collect();
479                subs.push(inner);
480                cleaned.push('_');
481                i = end + 1;
482                continue;
483            }
484            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
485                let start = i + 2;
486                let end = find_matching_paren(&chars, start).ok_or(())?;
487                let inner: String = chars[start..end].iter().collect();
488                subs.push(inner);
489                cleaned.push('_');
490                i = end + 1;
491                continue;
492            }
493        }
494        cleaned.push(chars[i]);
495        i += 1;
496    }
497    Ok((subs, cleaned))
498}
499
500fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
501    let mut in_single = false;
502    let mut in_double = false;
503    let mut escaped = false;
504    let mut i = start;
505    while i < chars.len() {
506        if escaped {
507            escaped = false;
508            i += 1;
509            continue;
510        }
511        if chars[i] == '\\' && !in_single {
512            escaped = true;
513            i += 1;
514            continue;
515        }
516        if chars[i] == '\'' && !in_double {
517            in_single = !in_single;
518            i += 1;
519            continue;
520        }
521        if chars[i] == '"' && !in_single {
522            in_double = !in_double;
523            i += 1;
524            continue;
525        }
526        if !in_single && !in_double && chars[i] == '`' {
527            return Some(i);
528        }
529        i += 1;
530    }
531    None
532}
533
534fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
535    let mut depth = 1u32;
536    let mut in_single = false;
537    let mut in_double = false;
538    let mut escaped = false;
539    let mut i = start;
540    while i < chars.len() {
541        if escaped {
542            escaped = false;
543            i += 1;
544            continue;
545        }
546        if chars[i] == '\\' && !in_single {
547            escaped = true;
548            i += 1;
549            continue;
550        }
551        if chars[i] == '\'' && !in_double {
552            in_single = !in_single;
553            i += 1;
554            continue;
555        }
556        if chars[i] == '"' && !in_single {
557            in_double = !in_double;
558            i += 1;
559            continue;
560        }
561        if !in_single && !in_double {
562            if chars[i] == '(' {
563                depth += 1;
564            } else if chars[i] == ')' {
565                depth -= 1;
566                if depth == 0 {
567                    return Some(i);
568                }
569            }
570        }
571        i += 1;
572    }
573    None
574}
575
576const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
577
578fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
579    let mut j = start;
580    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
581        j += 1;
582    }
583    while j < chars.len() && chars[j] == ' ' {
584        j += 1;
585    }
586    if j + DEV_NULL.len() > chars.len() {
587        return false;
588    }
589    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
590        return false;
591    }
592    let end = j + DEV_NULL.len();
593    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
594}
595
596fn find_unquoted_space(s: &str) -> Option<usize> {
597    let mut in_single = false;
598    let mut in_double = false;
599    let mut escaped = false;
600    for (i, b) in s.bytes().enumerate() {
601        if escaped {
602            escaped = false;
603            continue;
604        }
605        if b == b'\\' && !in_single {
606            escaped = true;
607            continue;
608        }
609        if b == b'\'' && !in_double {
610            in_single = !in_single;
611            continue;
612        }
613        if b == b'"' && !in_single {
614            in_double = !in_double;
615            continue;
616        }
617        if b == b' ' && !in_single && !in_double {
618            return Some(i);
619        }
620    }
621    None
622}
623
624fn strip_env_prefix_str(segment: &str) -> &str {
625    let mut rest = segment;
626    loop {
627        let trimmed = rest.trim_start();
628        if trimmed.is_empty() {
629            return trimmed;
630        }
631        let bytes = trimmed.as_bytes();
632        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
633            return trimmed;
634        }
635        if let Some(eq_pos) = trimmed.find('=') {
636            let key = &trimmed[..eq_pos];
637            let valid_key = key
638                .bytes()
639                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
640            if !valid_key {
641                return trimmed;
642            }
643            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
644                rest = &trimmed[eq_pos + space_pos..];
645                continue;
646            }
647            return trimmed;
648        }
649        return trimmed;
650    }
651}
652
653#[cfg(test)]
654mod tests {
655    use super::*;
656
657    fn seg(s: &str) -> Segment {
658        Segment(s.to_string())
659    }
660
661    fn tok(s: &str) -> Token {
662        Token(s.to_string())
663    }
664
665    fn toks(words: &[&str]) -> Vec<Token> {
666        words.iter().map(|s| tok(s)).collect()
667    }
668
669    #[test]
670    fn split_pipe() {
671        let segs = CommandLine::new("grep foo | head -5").segments();
672        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
673    }
674
675    #[test]
676    fn split_and() {
677        let segs = CommandLine::new("ls && echo done").segments();
678        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
679    }
680
681    #[test]
682    fn split_semicolon() {
683        let segs = CommandLine::new("ls; echo done").segments();
684        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
685    }
686
687    #[test]
688    fn split_preserves_quoted_pipes() {
689        let segs = CommandLine::new("echo 'a | b' foo").segments();
690        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
691    }
692
693    #[test]
694    fn split_background_operator() {
695        let segs = CommandLine::new("cat file & rm -rf /").segments();
696        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
697    }
698
699    #[test]
700    fn split_newline() {
701        let segs = CommandLine::new("echo foo\necho bar").segments();
702        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
703    }
704
705    #[test]
706    fn unsafe_redirect() {
707        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
708    }
709
710    #[test]
711    fn safe_fd_redirect_stderr_to_stdout() {
712        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
713    }
714
715    #[test]
716    fn safe_fd_redirect_close() {
717        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
718    }
719
720    #[test]
721    fn unsafe_redirect_ampersand_no_digit() {
722        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
723    }
724
725    #[test]
726    fn unsafe_backtick() {
727        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
728    }
729
730    #[test]
731    fn unsafe_command_substitution() {
732        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
733    }
734
735    #[test]
736    fn safe_quoted_dollar_paren() {
737        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
738    }
739
740    #[test]
741    fn safe_quoted_redirect() {
742        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
743    }
744
745    #[test]
746    fn safe_no_special_chars() {
747        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
748    }
749
750    #[test]
751    fn safe_redirect_to_dev_null() {
752        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
753    }
754
755    #[test]
756    fn safe_redirect_stderr_to_dev_null() {
757        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
758    }
759
760    #[test]
761    fn safe_redirect_append_to_dev_null() {
762        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
763    }
764
765    #[test]
766    fn safe_redirect_space_dev_null() {
767        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
768    }
769
770    #[test]
771    fn safe_redirect_input_dev_null() {
772        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
773    }
774
775    #[test]
776    fn safe_redirect_both_dev_null() {
777        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
778    }
779
780    #[test]
781    fn unsafe_redirect_dev_null_prefix() {
782        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
783    }
784
785    #[test]
786    fn unsafe_redirect_dev_null_path_traversal() {
787        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
788    }
789
790    #[test]
791    fn unsafe_redirect_dev_null_subpath() {
792        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
793    }
794
795    #[test]
796    fn unsafe_redirect_to_file() {
797        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
798    }
799
800    #[test]
801    fn has_flag_short() {
802        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
803        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
804    }
805
806    #[test]
807    fn has_flag_long_with_eq() {
808        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
809        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
810    }
811
812    #[test]
813    fn has_flag_combined_short() {
814        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
815        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
816    }
817
818    #[test]
819    fn has_flag_stops_at_double_dash() {
820        let tokens = toks(&["cmd", "--", "-i"]);
821        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
822    }
823
824    #[test]
825    fn has_flag_long_only() {
826        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
827        assert!(has_flag(&tokens, None, Some("--compress-program")));
828    }
829
830    #[test]
831    fn has_flag_long_only_eq() {
832        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
833        assert!(has_flag(&tokens, None, Some("--compress-program")));
834    }
835
836    #[test]
837    fn has_flag_long_only_absent() {
838        let tokens = toks(&["sort", "-r", "file.txt"]);
839        assert!(!has_flag(&tokens, None, Some("--compress-program")));
840    }
841
842    #[test]
843    fn strip_single_env_var() {
844        assert_eq!(
845            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
846            seg("bundle exec rspec")
847        );
848    }
849
850    #[test]
851    fn strip_multiple_env_vars() {
852        assert_eq!(
853            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
854            seg("bundle exec rspec")
855        );
856    }
857
858    #[test]
859    fn strip_no_env_var() {
860        assert_eq!(
861            seg("bundle exec rspec").strip_env_prefix(),
862            seg("bundle exec rspec")
863        );
864    }
865
866    #[test]
867    fn tokenize_simple() {
868        assert_eq!(
869            seg("grep foo file.txt").tokenize(),
870            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
871        );
872    }
873
874    #[test]
875    fn tokenize_quoted() {
876        assert_eq!(
877            seg("echo 'hello world'").tokenize(),
878            Some(vec![tok("echo"), tok("hello world")])
879        );
880    }
881
882    #[test]
883    fn strip_env_quoted_single() {
884        assert_eq!(
885            seg("FOO='bar baz' ls").strip_env_prefix(),
886            seg("ls")
887        );
888    }
889
890    #[test]
891    fn strip_env_quoted_double() {
892        assert_eq!(
893            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
894            seg("ls")
895        );
896    }
897
898    #[test]
899    fn strip_env_quoted_with_equals() {
900        assert_eq!(
901            seg("FOO='a=b' ls").strip_env_prefix(),
902            seg("ls")
903        );
904    }
905
906    #[test]
907    fn strip_env_quoted_multiple() {
908        assert_eq!(
909            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
910            seg("cmd")
911        );
912    }
913
914    #[test]
915    fn command_name_simple() {
916        assert_eq!(tok("ls").command_name(), "ls");
917    }
918
919    #[test]
920    fn command_name_with_path() {
921        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
922    }
923
924    #[test]
925    fn command_name_relative_path() {
926        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
927    }
928
929    #[test]
930    fn fd_redirect_detection() {
931        assert!(tok("2>&1").is_fd_redirect());
932        assert!(tok(">&2").is_fd_redirect());
933        assert!(tok("10>&1").is_fd_redirect());
934        assert!(tok("255>&2").is_fd_redirect());
935        assert!(tok("2>&-").is_fd_redirect());
936        assert!(tok("2>&10").is_fd_redirect());
937        assert!(!tok(">").is_fd_redirect());
938        assert!(!tok("/dev/null").is_fd_redirect());
939        assert!(!tok(">&").is_fd_redirect());
940        assert!(!tok("").is_fd_redirect());
941        assert!(!tok("42").is_fd_redirect());
942        assert!(!tok("123abc").is_fd_redirect());
943    }
944
945    #[test]
946    fn dev_null_redirect_single_token() {
947        assert!(tok(">/dev/null").is_dev_null_redirect());
948        assert!(tok(">>/dev/null").is_dev_null_redirect());
949        assert!(tok("2>/dev/null").is_dev_null_redirect());
950        assert!(tok("2>>/dev/null").is_dev_null_redirect());
951        assert!(tok("</dev/null").is_dev_null_redirect());
952        assert!(tok("10>/dev/null").is_dev_null_redirect());
953        assert!(tok("255>/dev/null").is_dev_null_redirect());
954        assert!(!tok(">/tmp/file").is_dev_null_redirect());
955        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
956        assert!(!tok("ls").is_dev_null_redirect());
957        assert!(!tok("").is_dev_null_redirect());
958        assert!(!tok("42").is_dev_null_redirect());
959        assert!(!tok("<</dev/null").is_dev_null_redirect());
960    }
961
962    #[test]
963    fn redirect_operator_detection() {
964        assert!(tok(">").is_redirect_operator());
965        assert!(tok(">>").is_redirect_operator());
966        assert!(tok("<").is_redirect_operator());
967        assert!(tok("2>").is_redirect_operator());
968        assert!(tok("2>>").is_redirect_operator());
969        assert!(tok("10>").is_redirect_operator());
970        assert!(tok("255>>").is_redirect_operator());
971        assert!(!tok("ls").is_redirect_operator());
972        assert!(!tok(">&1").is_redirect_operator());
973        assert!(!tok("/dev/null").is_redirect_operator());
974        assert!(!tok("").is_redirect_operator());
975        assert!(!tok("42").is_redirect_operator());
976        assert!(!tok("<<").is_redirect_operator());
977    }
978
979    #[test]
980    fn reverse_partial_eq() {
981        let t = tok("hello");
982        assert!("hello" == t);
983        assert!("world" != t);
984        let s: &str = "hello";
985        assert!(s == t);
986    }
987
988    #[test]
989    fn token_deref() {
990        let t = tok("--flag");
991        assert!(t.starts_with("--"));
992        assert!(t.contains("fl"));
993        assert_eq!(t.len(), 6);
994        assert!(!t.is_empty());
995        assert_eq!(t.as_bytes()[0], b'-');
996        assert!(t.eq_ignore_ascii_case("--FLAG"));
997        assert_eq!(t.get(2..), Some("flag"));
998    }
999
1000    #[test]
1001    fn token_is_one_of() {
1002        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1003        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1004    }
1005
1006    #[test]
1007    fn token_split_value() {
1008        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1009        assert_eq!(tok("--flag").split_value("="), None);
1010    }
1011
1012    #[test]
1013    fn word_set_contains() {
1014        let set = WordSet::new(&["list", "show", "view"]);
1015        assert!(set.contains(&tok("list")));
1016        assert!(set.contains(&tok("view")));
1017        assert!(!set.contains(&tok("delete")));
1018        assert!(set.contains("list"));
1019        assert!(!set.contains("delete"));
1020    }
1021
1022    #[test]
1023    fn word_set_iter() {
1024        let set = WordSet::new(&["a", "b", "c"]);
1025        let items: Vec<&str> = set.iter().collect();
1026        assert_eq!(items, vec!["a", "b", "c"]);
1027    }
1028
1029    #[test]
1030    fn token_as_command_line() {
1031        let cl = tok("ls -la | grep foo").as_command_line();
1032        let segs = cl.segments();
1033        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1034    }
1035
1036    #[test]
1037    fn segment_from_tokens_replacing() {
1038        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1039        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1040        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1041    }
1042
1043    #[test]
1044    fn segment_strip_fd_redirects() {
1045        assert_eq!(
1046            seg("cargo test 2>&1").strip_fd_redirects(),
1047            seg("cargo test")
1048        );
1049        assert_eq!(
1050            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1051            seg("cmd")
1052        );
1053        assert_eq!(
1054            seg("ls -la").strip_fd_redirects(),
1055            seg("ls -la")
1056        );
1057    }
1058
1059    #[test]
1060    fn flag_check_required_present_no_denied() {
1061        let fc = FlagCheck::new(&["--show"], &["--set"]);
1062        assert!(fc.is_safe(&toks(&["--show"])));
1063    }
1064
1065    #[test]
1066    fn flag_check_required_absent() {
1067        let fc = FlagCheck::new(&["--show"], &["--set"]);
1068        assert!(!fc.is_safe(&toks(&["--verbose"])));
1069    }
1070
1071    #[test]
1072    fn flag_check_denied_present() {
1073        let fc = FlagCheck::new(&["--show"], &["--set"]);
1074        assert!(!fc.is_safe(&toks(&["--show", "--set", "key", "val"])));
1075    }
1076
1077    #[test]
1078    fn flag_check_empty_denied() {
1079        let fc = FlagCheck::new(&["--check"], &[]);
1080        assert!(fc.is_safe(&toks(&["--check", "--all"])));
1081    }
1082
1083    #[test]
1084    fn flag_check_empty_tokens() {
1085        let fc = FlagCheck::new(&["--show"], &[]);
1086        assert!(!fc.is_safe(&[]));
1087    }
1088
1089    #[test]
1090    fn content_outside_double_quotes_strips_string() {
1091        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1092    }
1093
1094    #[test]
1095    fn content_outside_double_quotes_preserves_code() {
1096        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1097        assert_eq!(result, r#"{print  } END{print NR}"#);
1098    }
1099
1100    #[test]
1101    fn content_outside_double_quotes_escaped() {
1102        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1103        assert_eq!(result, "{print  }");
1104    }
1105
1106    #[test]
1107    fn content_outside_double_quotes_no_quotes() {
1108        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1109    }
1110
1111    #[test]
1112    fn content_outside_double_quotes_empty() {
1113        assert_eq!(tok("").content_outside_double_quotes(), "");
1114    }
1115
1116    #[test]
1117    fn extract_subs_none() {
1118        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1119        assert!(subs.is_empty());
1120        assert_eq!(cleaned, "echo hello");
1121    }
1122
1123    #[test]
1124    fn extract_subs_dollar_paren() {
1125        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1126        assert_eq!(subs, vec!["ls"]);
1127        assert_eq!(cleaned, "echo _");
1128    }
1129
1130    #[test]
1131    fn extract_subs_backtick() {
1132        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1133        assert_eq!(subs, vec!["pwd"]);
1134        assert_eq!(cleaned, "ls _");
1135    }
1136
1137    #[test]
1138    fn extract_subs_multiple() {
1139        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1140        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1141        assert_eq!(cleaned, "echo _ _");
1142    }
1143
1144    #[test]
1145    fn extract_subs_nested() {
1146        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1147        assert_eq!(subs, vec!["echo $(ls)"]);
1148        assert_eq!(cleaned, "echo _");
1149    }
1150
1151    #[test]
1152    fn extract_subs_quoted_skipped() {
1153        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1154        assert!(subs.is_empty());
1155        assert_eq!(cleaned, "echo '$(safe)' arg");
1156    }
1157
1158    #[test]
1159    fn extract_subs_unmatched_backtick() {
1160        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1161    }
1162
1163    #[test]
1164    fn extract_subs_unmatched_paren() {
1165        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1166    }
1167
1168    #[test]
1169    fn unsafe_redirects_to_file() {
1170        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1171    }
1172
1173    #[test]
1174    fn unsafe_redirects_dev_null_ok() {
1175        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1176    }
1177
1178    #[test]
1179    fn unsafe_redirects_fd_ok() {
1180        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1181    }
1182
1183    #[test]
1184    fn unsafe_redirects_no_backtick_check() {
1185        assert!(!seg("echo `ls`").has_unsafe_redirects());
1186    }
1187
1188    #[test]
1189    fn unsafe_redirects_no_dollar_paren_check() {
1190        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1191    }
1192}