Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub fn contains(&self, s: &str) -> bool {
36        self.0.binary_search(&s).is_ok()
37    }
38
39    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
40        self.0.iter().copied()
41    }
42}
43
44const fn const_less(a: &[u8], b: &[u8]) -> bool {
45    let min = if a.len() < b.len() { a.len() } else { b.len() };
46    let mut i = 0;
47    while i < min {
48        if a[i] < b[i] {
49            return true;
50        }
51        if a[i] > b[i] {
52            return false;
53        }
54        i += 1;
55    }
56    a.len() < b.len()
57}
58
59pub struct FlagCheck {
60    required: WordSet,
61    denied: WordSet,
62}
63
64impl FlagCheck {
65    pub const fn new(required: &'static [&'static str], denied: &'static [&'static str]) -> Self {
66        Self {
67            required: WordSet::new(required),
68            denied: WordSet::new(denied),
69        }
70    }
71
72    pub fn required(&self) -> &WordSet {
73        &self.required
74    }
75
76    pub fn denied(&self) -> &WordSet {
77        &self.denied
78    }
79
80    pub fn is_safe(&self, tokens: &[Token]) -> bool {
81        tokens.iter().any(|t| self.required.contains(t))
82            && !tokens.iter().any(|t| self.denied.contains(t))
83    }
84}
85
86impl CommandLine {
87    pub fn new(s: impl Into<String>) -> Self {
88        Self(s.into())
89    }
90
91    pub fn as_str(&self) -> &str {
92        &self.0
93    }
94
95    pub fn segments(&self) -> Vec<Segment> {
96        split_outside_quotes(&self.0)
97            .into_iter()
98            .map(Segment)
99            .collect()
100    }
101}
102
103impl Segment {
104    pub fn as_str(&self) -> &str {
105        &self.0
106    }
107
108    pub fn is_empty(&self) -> bool {
109        self.0.is_empty()
110    }
111
112    pub fn from_raw(s: String) -> Self {
113        Segment(s)
114    }
115
116    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
117        Segment(shell_words::join(words))
118    }
119
120    pub fn tokenize(&self) -> Option<Vec<Token>> {
121        shell_words::split(&self.0)
122            .ok()
123            .map(|v| v.into_iter().map(Token).collect())
124    }
125
126    pub fn has_unsafe_shell_syntax(&self) -> bool {
127        check_unsafe_shell_syntax(&self.0)
128    }
129
130    pub fn has_unsafe_redirects(&self) -> bool {
131        check_unsafe_redirects(&self.0)
132    }
133
134    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
135        extract_substitutions(&self.0)
136    }
137
138    pub fn strip_env_prefix(&self) -> Segment {
139        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
140    }
141
142    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
143        let words: Vec<&str> = tokens
144            .iter()
145            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
146            .collect();
147        Self::from_words(&words)
148    }
149
150    pub fn strip_fd_redirects(&self) -> Segment {
151        match self.tokenize() {
152            Some(tokens) => {
153                let filtered: Vec<_> = tokens
154                    .into_iter()
155                    .filter(|t| !t.is_fd_redirect())
156                    .collect();
157                Token::join(&filtered)
158            }
159            None => Segment(self.0.clone()),
160        }
161    }
162}
163
164impl Token {
165    pub fn as_str(&self) -> &str {
166        &self.0
167    }
168
169    pub fn join(tokens: &[Token]) -> Segment {
170        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
171    }
172
173    pub fn as_command_line(&self) -> CommandLine {
174        CommandLine(self.0.clone())
175    }
176
177    pub fn command_name(&self) -> &str {
178        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
179    }
180
181    pub fn is_one_of(&self, options: &[&str]) -> bool {
182        options.contains(&self.as_str())
183    }
184
185    pub fn split_value(&self, sep: &str) -> Option<&str> {
186        self.as_str().split_once(sep).map(|(_, v)| v)
187    }
188
189    pub fn content_outside_double_quotes(&self) -> String {
190        let bytes = self.as_str().as_bytes();
191        let mut result = Vec::with_capacity(bytes.len());
192        let mut i = 0;
193        while i < bytes.len() {
194            if bytes[i] == b'"' {
195                result.push(b' ');
196                i += 1;
197                while i < bytes.len() {
198                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
199                        i += 2;
200                        continue;
201                    }
202                    if bytes[i] == b'"' {
203                        i += 1;
204                        break;
205                    }
206                    i += 1;
207                }
208            } else {
209                result.push(bytes[i]);
210                i += 1;
211            }
212        }
213        String::from_utf8(result).unwrap_or_default()
214    }
215
216    pub fn is_fd_redirect(&self) -> bool {
217        let s = self.as_str();
218        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
219        if rest.len() < 2 || !rest.starts_with(">&") {
220            return false;
221        }
222        let after = &rest[2..];
223        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
224    }
225
226    pub fn is_dev_null_redirect(&self) -> bool {
227        let s = self.as_str();
228        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
229        rest.strip_prefix(">>")
230            .or_else(|| rest.strip_prefix('>'))
231            .or_else(|| rest.strip_prefix('<'))
232            .is_some_and(|after| after == "/dev/null")
233    }
234
235    pub fn is_redirect_operator(&self) -> bool {
236        let s = self.as_str();
237        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
238        matches!(rest, ">" | ">>" | "<")
239    }
240}
241
242impl PartialEq<str> for Token {
243    fn eq(&self, other: &str) -> bool {
244        self.0 == other
245    }
246}
247
248impl PartialEq<&str> for Token {
249    fn eq(&self, other: &&str) -> bool {
250        self.0 == *other
251    }
252}
253
254impl PartialEq<Token> for str {
255    fn eq(&self, other: &Token) -> bool {
256        self == other.as_str()
257    }
258}
259
260impl PartialEq<Token> for &str {
261    fn eq(&self, other: &Token) -> bool {
262        *self == other.as_str()
263    }
264}
265
266impl std::fmt::Display for Token {
267    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
268        f.write_str(&self.0)
269    }
270}
271
272pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
273    for token in &tokens[1..] {
274        if token == "--" {
275            return false;
276        }
277        if let Some(long_flag) = long
278            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
279        {
280            return true;
281        }
282        if let Some(short_flag) = short {
283            let short_char = short_flag.trim_start_matches('-');
284            if token.starts_with('-')
285                && !token.starts_with("--")
286                && token[1..].contains(short_char)
287            {
288                return true;
289            }
290        }
291    }
292    false
293}
294
295fn split_outside_quotes(cmd: &str) -> Vec<String> {
296    let mut segments = Vec::new();
297    let mut current = String::new();
298    let mut in_single = false;
299    let mut in_double = false;
300    let mut escaped = false;
301    let mut chars = cmd.chars().peekable();
302
303    while let Some(c) = chars.next() {
304        if escaped {
305            current.push(c);
306            escaped = false;
307            continue;
308        }
309        if c == '\\' && !in_single {
310            escaped = true;
311            current.push(c);
312            continue;
313        }
314        if c == '\'' && !in_double {
315            in_single = !in_single;
316            current.push(c);
317            continue;
318        }
319        if c == '"' && !in_single {
320            in_double = !in_double;
321            current.push(c);
322            continue;
323        }
324        if !in_single && !in_double {
325            if c == '|' {
326                segments.push(std::mem::take(&mut current));
327                continue;
328            }
329            if c == '&' && !current.ends_with('>') {
330                segments.push(std::mem::take(&mut current));
331                if chars.peek() == Some(&'&') {
332                    chars.next();
333                }
334                continue;
335            }
336            if c == ';' || c == '\n' {
337                segments.push(std::mem::take(&mut current));
338                continue;
339            }
340        }
341        current.push(c);
342    }
343    segments.push(current);
344    segments
345        .into_iter()
346        .map(|s| s.trim().to_string())
347        .filter(|s| !s.is_empty())
348        .collect()
349}
350
351fn check_unsafe_shell_syntax(segment: &str) -> bool {
352    let mut in_single = false;
353    let mut in_double = false;
354    let mut escaped = false;
355    let chars: Vec<char> = segment.chars().collect();
356
357    for (i, &c) in chars.iter().enumerate() {
358        if escaped {
359            escaped = false;
360            continue;
361        }
362        if c == '\\' && !in_single {
363            escaped = true;
364            continue;
365        }
366        if c == '\'' && !in_double {
367            in_single = !in_single;
368            continue;
369        }
370        if c == '"' && !in_single {
371            in_double = !in_double;
372            continue;
373        }
374        if !in_single && !in_double {
375            if c == '>' || c == '<' {
376                let next = chars.get(i + 1);
377                if next == Some(&'&')
378                    && chars
379                        .get(i + 2)
380                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
381                {
382                    continue;
383                }
384                if is_dev_null_target(&chars, i + 1, c) {
385                    continue;
386                }
387                return true;
388            }
389            if c == '`' {
390                return true;
391            }
392            if c == '$' && chars.get(i + 1) == Some(&'(') {
393                return true;
394            }
395        }
396    }
397    false
398}
399
400fn check_unsafe_redirects(segment: &str) -> bool {
401    let mut in_single = false;
402    let mut in_double = false;
403    let mut escaped = false;
404    let chars: Vec<char> = segment.chars().collect();
405
406    for (i, &c) in chars.iter().enumerate() {
407        if escaped {
408            escaped = false;
409            continue;
410        }
411        if c == '\\' && !in_single {
412            escaped = true;
413            continue;
414        }
415        if c == '\'' && !in_double {
416            in_single = !in_single;
417            continue;
418        }
419        if c == '"' && !in_single {
420            in_double = !in_double;
421            continue;
422        }
423        if !in_single && !in_double && (c == '>' || c == '<') {
424            let next = chars.get(i + 1);
425            if next == Some(&'&')
426                && chars
427                    .get(i + 2)
428                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
429            {
430                continue;
431            }
432            if is_dev_null_target(&chars, i + 1, c) {
433                continue;
434            }
435            return true;
436        }
437    }
438    false
439}
440
441fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
442    let mut subs = Vec::new();
443    let mut cleaned = String::with_capacity(segment.len());
444    let mut in_single = false;
445    let mut in_double = false;
446    let mut escaped = false;
447    let chars: Vec<char> = segment.chars().collect();
448    let mut i = 0;
449
450    while i < chars.len() {
451        if escaped {
452            escaped = false;
453            cleaned.push(chars[i]);
454            i += 1;
455            continue;
456        }
457        if chars[i] == '\\' && !in_single {
458            escaped = true;
459            cleaned.push(chars[i]);
460            i += 1;
461            continue;
462        }
463        if chars[i] == '\'' && !in_double {
464            in_single = !in_single;
465            cleaned.push(chars[i]);
466            i += 1;
467            continue;
468        }
469        if chars[i] == '"' && !in_single {
470            in_double = !in_double;
471            cleaned.push(chars[i]);
472            i += 1;
473            continue;
474        }
475        if !in_single {
476            if chars[i] == '`' {
477                let start = i + 1;
478                let end = find_matching_backtick(&chars, start).ok_or(())?;
479                let inner: String = chars[start..end].iter().collect();
480                subs.push(inner);
481                cleaned.push('_');
482                i = end + 1;
483                continue;
484            }
485            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
486                let start = i + 2;
487                let end = find_matching_paren(&chars, start).ok_or(())?;
488                let inner: String = chars[start..end].iter().collect();
489                subs.push(inner);
490                cleaned.push('_');
491                i = end + 1;
492                continue;
493            }
494        }
495        cleaned.push(chars[i]);
496        i += 1;
497    }
498    Ok((subs, cleaned))
499}
500
501fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
502    let mut in_single = false;
503    let mut in_double = false;
504    let mut escaped = false;
505    let mut i = start;
506    while i < chars.len() {
507        if escaped {
508            escaped = false;
509            i += 1;
510            continue;
511        }
512        if chars[i] == '\\' && !in_single {
513            escaped = true;
514            i += 1;
515            continue;
516        }
517        if chars[i] == '\'' && !in_double {
518            in_single = !in_single;
519            i += 1;
520            continue;
521        }
522        if chars[i] == '"' && !in_single {
523            in_double = !in_double;
524            i += 1;
525            continue;
526        }
527        if !in_single && !in_double && chars[i] == '`' {
528            return Some(i);
529        }
530        i += 1;
531    }
532    None
533}
534
535fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
536    let mut depth = 1u32;
537    let mut in_single = false;
538    let mut in_double = false;
539    let mut escaped = false;
540    let mut i = start;
541    while i < chars.len() {
542        if escaped {
543            escaped = false;
544            i += 1;
545            continue;
546        }
547        if chars[i] == '\\' && !in_single {
548            escaped = true;
549            i += 1;
550            continue;
551        }
552        if chars[i] == '\'' && !in_double {
553            in_single = !in_single;
554            i += 1;
555            continue;
556        }
557        if chars[i] == '"' && !in_single {
558            in_double = !in_double;
559            i += 1;
560            continue;
561        }
562        if !in_single && !in_double {
563            if chars[i] == '(' {
564                depth += 1;
565            } else if chars[i] == ')' {
566                depth -= 1;
567                if depth == 0 {
568                    return Some(i);
569                }
570            }
571        }
572        i += 1;
573    }
574    None
575}
576
577const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
578
579fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
580    let mut j = start;
581    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
582        j += 1;
583    }
584    while j < chars.len() && chars[j] == ' ' {
585        j += 1;
586    }
587    if j + DEV_NULL.len() > chars.len() {
588        return false;
589    }
590    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
591        return false;
592    }
593    let end = j + DEV_NULL.len();
594    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
595}
596
597fn find_unquoted_space(s: &str) -> Option<usize> {
598    let mut in_single = false;
599    let mut in_double = false;
600    let mut escaped = false;
601    for (i, b) in s.bytes().enumerate() {
602        if escaped {
603            escaped = false;
604            continue;
605        }
606        if b == b'\\' && !in_single {
607            escaped = true;
608            continue;
609        }
610        if b == b'\'' && !in_double {
611            in_single = !in_single;
612            continue;
613        }
614        if b == b'"' && !in_single {
615            in_double = !in_double;
616            continue;
617        }
618        if b == b' ' && !in_single && !in_double {
619            return Some(i);
620        }
621    }
622    None
623}
624
625fn strip_env_prefix_str(segment: &str) -> &str {
626    let mut rest = segment;
627    loop {
628        let trimmed = rest.trim_start();
629        if trimmed.is_empty() {
630            return trimmed;
631        }
632        let bytes = trimmed.as_bytes();
633        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
634            return trimmed;
635        }
636        if let Some(eq_pos) = trimmed.find('=') {
637            let key = &trimmed[..eq_pos];
638            let valid_key = key
639                .bytes()
640                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
641            if !valid_key {
642                return trimmed;
643            }
644            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
645                rest = &trimmed[eq_pos + space_pos..];
646                continue;
647            }
648            return trimmed;
649        }
650        return trimmed;
651    }
652}
653
654#[cfg(test)]
655mod tests {
656    use super::*;
657
658    fn seg(s: &str) -> Segment {
659        Segment(s.to_string())
660    }
661
662    fn tok(s: &str) -> Token {
663        Token(s.to_string())
664    }
665
666    fn toks(words: &[&str]) -> Vec<Token> {
667        words.iter().map(|s| tok(s)).collect()
668    }
669
670    #[test]
671    fn split_pipe() {
672        let segs = CommandLine::new("grep foo | head -5").segments();
673        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
674    }
675
676    #[test]
677    fn split_and() {
678        let segs = CommandLine::new("ls && echo done").segments();
679        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
680    }
681
682    #[test]
683    fn split_semicolon() {
684        let segs = CommandLine::new("ls; echo done").segments();
685        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
686    }
687
688    #[test]
689    fn split_preserves_quoted_pipes() {
690        let segs = CommandLine::new("echo 'a | b' foo").segments();
691        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
692    }
693
694    #[test]
695    fn split_background_operator() {
696        let segs = CommandLine::new("cat file & rm -rf /").segments();
697        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
698    }
699
700    #[test]
701    fn split_newline() {
702        let segs = CommandLine::new("echo foo\necho bar").segments();
703        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
704    }
705
706    #[test]
707    fn unsafe_redirect() {
708        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
709    }
710
711    #[test]
712    fn safe_fd_redirect_stderr_to_stdout() {
713        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
714    }
715
716    #[test]
717    fn safe_fd_redirect_close() {
718        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
719    }
720
721    #[test]
722    fn unsafe_redirect_ampersand_no_digit() {
723        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
724    }
725
726    #[test]
727    fn unsafe_backtick() {
728        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
729    }
730
731    #[test]
732    fn unsafe_command_substitution() {
733        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
734    }
735
736    #[test]
737    fn safe_quoted_dollar_paren() {
738        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
739    }
740
741    #[test]
742    fn safe_quoted_redirect() {
743        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
744    }
745
746    #[test]
747    fn safe_no_special_chars() {
748        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
749    }
750
751    #[test]
752    fn safe_redirect_to_dev_null() {
753        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
754    }
755
756    #[test]
757    fn safe_redirect_stderr_to_dev_null() {
758        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
759    }
760
761    #[test]
762    fn safe_redirect_append_to_dev_null() {
763        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
764    }
765
766    #[test]
767    fn safe_redirect_space_dev_null() {
768        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
769    }
770
771    #[test]
772    fn safe_redirect_input_dev_null() {
773        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
774    }
775
776    #[test]
777    fn safe_redirect_both_dev_null() {
778        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
779    }
780
781    #[test]
782    fn unsafe_redirect_dev_null_prefix() {
783        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
784    }
785
786    #[test]
787    fn unsafe_redirect_dev_null_path_traversal() {
788        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
789    }
790
791    #[test]
792    fn unsafe_redirect_dev_null_subpath() {
793        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
794    }
795
796    #[test]
797    fn unsafe_redirect_to_file() {
798        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
799    }
800
801    #[test]
802    fn has_flag_short() {
803        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
804        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
805    }
806
807    #[test]
808    fn has_flag_long_with_eq() {
809        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
810        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
811    }
812
813    #[test]
814    fn has_flag_combined_short() {
815        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
816        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
817    }
818
819    #[test]
820    fn has_flag_stops_at_double_dash() {
821        let tokens = toks(&["cmd", "--", "-i"]);
822        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
823    }
824
825    #[test]
826    fn has_flag_long_only() {
827        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
828        assert!(has_flag(&tokens, None, Some("--compress-program")));
829    }
830
831    #[test]
832    fn has_flag_long_only_eq() {
833        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
834        assert!(has_flag(&tokens, None, Some("--compress-program")));
835    }
836
837    #[test]
838    fn has_flag_long_only_absent() {
839        let tokens = toks(&["sort", "-r", "file.txt"]);
840        assert!(!has_flag(&tokens, None, Some("--compress-program")));
841    }
842
843    #[test]
844    fn strip_single_env_var() {
845        assert_eq!(
846            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
847            seg("bundle exec rspec")
848        );
849    }
850
851    #[test]
852    fn strip_multiple_env_vars() {
853        assert_eq!(
854            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
855            seg("bundle exec rspec")
856        );
857    }
858
859    #[test]
860    fn strip_no_env_var() {
861        assert_eq!(
862            seg("bundle exec rspec").strip_env_prefix(),
863            seg("bundle exec rspec")
864        );
865    }
866
867    #[test]
868    fn tokenize_simple() {
869        assert_eq!(
870            seg("grep foo file.txt").tokenize(),
871            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
872        );
873    }
874
875    #[test]
876    fn tokenize_quoted() {
877        assert_eq!(
878            seg("echo 'hello world'").tokenize(),
879            Some(vec![tok("echo"), tok("hello world")])
880        );
881    }
882
883    #[test]
884    fn strip_env_quoted_single() {
885        assert_eq!(
886            seg("FOO='bar baz' ls").strip_env_prefix(),
887            seg("ls")
888        );
889    }
890
891    #[test]
892    fn strip_env_quoted_double() {
893        assert_eq!(
894            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
895            seg("ls")
896        );
897    }
898
899    #[test]
900    fn strip_env_quoted_with_equals() {
901        assert_eq!(
902            seg("FOO='a=b' ls").strip_env_prefix(),
903            seg("ls")
904        );
905    }
906
907    #[test]
908    fn strip_env_quoted_multiple() {
909        assert_eq!(
910            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
911            seg("cmd")
912        );
913    }
914
915    #[test]
916    fn command_name_simple() {
917        assert_eq!(tok("ls").command_name(), "ls");
918    }
919
920    #[test]
921    fn command_name_with_path() {
922        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
923    }
924
925    #[test]
926    fn command_name_relative_path() {
927        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
928    }
929
930    #[test]
931    fn fd_redirect_detection() {
932        assert!(tok("2>&1").is_fd_redirect());
933        assert!(tok(">&2").is_fd_redirect());
934        assert!(tok("10>&1").is_fd_redirect());
935        assert!(tok("255>&2").is_fd_redirect());
936        assert!(tok("2>&-").is_fd_redirect());
937        assert!(tok("2>&10").is_fd_redirect());
938        assert!(!tok(">").is_fd_redirect());
939        assert!(!tok("/dev/null").is_fd_redirect());
940        assert!(!tok(">&").is_fd_redirect());
941        assert!(!tok("").is_fd_redirect());
942        assert!(!tok("42").is_fd_redirect());
943        assert!(!tok("123abc").is_fd_redirect());
944    }
945
946    #[test]
947    fn dev_null_redirect_single_token() {
948        assert!(tok(">/dev/null").is_dev_null_redirect());
949        assert!(tok(">>/dev/null").is_dev_null_redirect());
950        assert!(tok("2>/dev/null").is_dev_null_redirect());
951        assert!(tok("2>>/dev/null").is_dev_null_redirect());
952        assert!(tok("</dev/null").is_dev_null_redirect());
953        assert!(tok("10>/dev/null").is_dev_null_redirect());
954        assert!(tok("255>/dev/null").is_dev_null_redirect());
955        assert!(!tok(">/tmp/file").is_dev_null_redirect());
956        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
957        assert!(!tok("ls").is_dev_null_redirect());
958        assert!(!tok("").is_dev_null_redirect());
959        assert!(!tok("42").is_dev_null_redirect());
960        assert!(!tok("<</dev/null").is_dev_null_redirect());
961    }
962
963    #[test]
964    fn redirect_operator_detection() {
965        assert!(tok(">").is_redirect_operator());
966        assert!(tok(">>").is_redirect_operator());
967        assert!(tok("<").is_redirect_operator());
968        assert!(tok("2>").is_redirect_operator());
969        assert!(tok("2>>").is_redirect_operator());
970        assert!(tok("10>").is_redirect_operator());
971        assert!(tok("255>>").is_redirect_operator());
972        assert!(!tok("ls").is_redirect_operator());
973        assert!(!tok(">&1").is_redirect_operator());
974        assert!(!tok("/dev/null").is_redirect_operator());
975        assert!(!tok("").is_redirect_operator());
976        assert!(!tok("42").is_redirect_operator());
977        assert!(!tok("<<").is_redirect_operator());
978    }
979
980    #[test]
981    fn reverse_partial_eq() {
982        let t = tok("hello");
983        assert!("hello" == t);
984        assert!("world" != t);
985        let s: &str = "hello";
986        assert!(s == t);
987    }
988
989    #[test]
990    fn token_deref() {
991        let t = tok("--flag");
992        assert!(t.starts_with("--"));
993        assert!(t.contains("fl"));
994        assert_eq!(t.len(), 6);
995        assert!(!t.is_empty());
996        assert_eq!(t.as_bytes()[0], b'-');
997        assert!(t.eq_ignore_ascii_case("--FLAG"));
998        assert_eq!(t.get(2..), Some("flag"));
999    }
1000
1001    #[test]
1002    fn token_is_one_of() {
1003        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1004        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1005    }
1006
1007    #[test]
1008    fn token_split_value() {
1009        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1010        assert_eq!(tok("--flag").split_value("="), None);
1011    }
1012
1013    #[test]
1014    fn word_set_contains() {
1015        let set = WordSet::new(&["list", "show", "view"]);
1016        assert!(set.contains(&tok("list")));
1017        assert!(set.contains(&tok("view")));
1018        assert!(!set.contains(&tok("delete")));
1019        assert!(set.contains("list"));
1020        assert!(!set.contains("delete"));
1021    }
1022
1023    #[test]
1024    fn word_set_iter() {
1025        let set = WordSet::new(&["a", "b", "c"]);
1026        let items: Vec<&str> = set.iter().collect();
1027        assert_eq!(items, vec!["a", "b", "c"]);
1028    }
1029
1030    #[test]
1031    fn token_as_command_line() {
1032        let cl = tok("ls -la | grep foo").as_command_line();
1033        let segs = cl.segments();
1034        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1035    }
1036
1037    #[test]
1038    fn segment_from_tokens_replacing() {
1039        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1040        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1041        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1042    }
1043
1044    #[test]
1045    fn segment_strip_fd_redirects() {
1046        assert_eq!(
1047            seg("cargo test 2>&1").strip_fd_redirects(),
1048            seg("cargo test")
1049        );
1050        assert_eq!(
1051            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1052            seg("cmd")
1053        );
1054        assert_eq!(
1055            seg("ls -la").strip_fd_redirects(),
1056            seg("ls -la")
1057        );
1058    }
1059
1060    #[test]
1061    fn flag_check_required_present_no_denied() {
1062        let fc = FlagCheck::new(&["--show"], &["--set"]);
1063        assert!(fc.is_safe(&toks(&["--show"])));
1064    }
1065
1066    #[test]
1067    fn flag_check_required_absent() {
1068        let fc = FlagCheck::new(&["--show"], &["--set"]);
1069        assert!(!fc.is_safe(&toks(&["--verbose"])));
1070    }
1071
1072    #[test]
1073    fn flag_check_denied_present() {
1074        let fc = FlagCheck::new(&["--show"], &["--set"]);
1075        assert!(!fc.is_safe(&toks(&["--show", "--set", "key", "val"])));
1076    }
1077
1078    #[test]
1079    fn flag_check_empty_denied() {
1080        let fc = FlagCheck::new(&["--check"], &[]);
1081        assert!(fc.is_safe(&toks(&["--check", "--all"])));
1082    }
1083
1084    #[test]
1085    fn flag_check_empty_tokens() {
1086        let fc = FlagCheck::new(&["--show"], &[]);
1087        assert!(!fc.is_safe(&[]));
1088    }
1089
1090    #[test]
1091    fn content_outside_double_quotes_strips_string() {
1092        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1093    }
1094
1095    #[test]
1096    fn content_outside_double_quotes_preserves_code() {
1097        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1098        assert_eq!(result, r#"{print  } END{print NR}"#);
1099    }
1100
1101    #[test]
1102    fn content_outside_double_quotes_escaped() {
1103        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1104        assert_eq!(result, "{print  }");
1105    }
1106
1107    #[test]
1108    fn content_outside_double_quotes_no_quotes() {
1109        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1110    }
1111
1112    #[test]
1113    fn content_outside_double_quotes_empty() {
1114        assert_eq!(tok("").content_outside_double_quotes(), "");
1115    }
1116
1117    #[test]
1118    fn extract_subs_none() {
1119        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1120        assert!(subs.is_empty());
1121        assert_eq!(cleaned, "echo hello");
1122    }
1123
1124    #[test]
1125    fn extract_subs_dollar_paren() {
1126        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1127        assert_eq!(subs, vec!["ls"]);
1128        assert_eq!(cleaned, "echo _");
1129    }
1130
1131    #[test]
1132    fn extract_subs_backtick() {
1133        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1134        assert_eq!(subs, vec!["pwd"]);
1135        assert_eq!(cleaned, "ls _");
1136    }
1137
1138    #[test]
1139    fn extract_subs_multiple() {
1140        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1141        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1142        assert_eq!(cleaned, "echo _ _");
1143    }
1144
1145    #[test]
1146    fn extract_subs_nested() {
1147        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1148        assert_eq!(subs, vec!["echo $(ls)"]);
1149        assert_eq!(cleaned, "echo _");
1150    }
1151
1152    #[test]
1153    fn extract_subs_quoted_skipped() {
1154        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1155        assert!(subs.is_empty());
1156        assert_eq!(cleaned, "echo '$(safe)' arg");
1157    }
1158
1159    #[test]
1160    fn extract_subs_unmatched_backtick() {
1161        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1162    }
1163
1164    #[test]
1165    fn extract_subs_unmatched_paren() {
1166        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1167    }
1168
1169    #[test]
1170    fn unsafe_redirects_to_file() {
1171        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1172    }
1173
1174    #[test]
1175    fn unsafe_redirects_dev_null_ok() {
1176        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1177    }
1178
1179    #[test]
1180    fn unsafe_redirects_fd_ok() {
1181        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1182    }
1183
1184    #[test]
1185    fn unsafe_redirects_no_backtick_check() {
1186        assert!(!seg("echo `ls`").has_unsafe_redirects());
1187    }
1188
1189    #[test]
1190    fn unsafe_redirects_no_dollar_paren_check() {
1191        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1192    }
1193}