Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub fn contains(&self, s: &str) -> bool {
36        self.0.binary_search(&s).is_ok()
37    }
38
39    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
40        self.0.iter().copied()
41    }
42}
43
44const fn const_less(a: &[u8], b: &[u8]) -> bool {
45    let min = if a.len() < b.len() { a.len() } else { b.len() };
46    let mut i = 0;
47    while i < min {
48        if a[i] < b[i] {
49            return true;
50        }
51        if a[i] > b[i] {
52            return false;
53        }
54        i += 1;
55    }
56    a.len() < b.len()
57}
58
59
60impl CommandLine {
61    pub fn new(s: impl Into<String>) -> Self {
62        Self(s.into())
63    }
64
65    pub fn as_str(&self) -> &str {
66        &self.0
67    }
68
69    pub fn segments(&self) -> Vec<Segment> {
70        split_outside_quotes(&self.0)
71            .into_iter()
72            .map(Segment)
73            .collect()
74    }
75}
76
77impl Segment {
78    pub fn as_str(&self) -> &str {
79        &self.0
80    }
81
82    pub fn is_empty(&self) -> bool {
83        self.0.is_empty()
84    }
85
86    pub fn from_raw(s: String) -> Self {
87        Segment(s)
88    }
89
90    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
91        Segment(shell_words::join(words))
92    }
93
94    pub fn tokenize(&self) -> Option<Vec<Token>> {
95        shell_words::split(&self.0)
96            .ok()
97            .map(|v| v.into_iter().map(Token).collect())
98    }
99
100    pub fn has_unsafe_shell_syntax(&self) -> bool {
101        check_unsafe_shell_syntax(&self.0)
102    }
103
104    pub fn has_unsafe_redirects(&self) -> bool {
105        check_unsafe_redirects(&self.0)
106    }
107
108    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
109        extract_substitutions(&self.0)
110    }
111
112    pub fn strip_env_prefix(&self) -> Segment {
113        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
114    }
115
116    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
117        let words: Vec<&str> = tokens
118            .iter()
119            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
120            .collect();
121        Self::from_words(&words)
122    }
123
124    pub fn strip_fd_redirects(&self) -> Segment {
125        match self.tokenize() {
126            Some(tokens) => {
127                let filtered: Vec<_> = tokens
128                    .into_iter()
129                    .filter(|t| !t.is_fd_redirect())
130                    .collect();
131                Token::join(&filtered)
132            }
133            None => Segment(self.0.clone()),
134        }
135    }
136}
137
138impl Token {
139    #[cfg(test)]
140    pub(crate) fn from_test(s: &str) -> Self {
141        Self(s.to_string())
142    }
143
144    pub fn as_str(&self) -> &str {
145        &self.0
146    }
147
148    pub fn join(tokens: &[Token]) -> Segment {
149        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
150    }
151
152    pub fn as_command_line(&self) -> CommandLine {
153        CommandLine(self.0.clone())
154    }
155
156    pub fn command_name(&self) -> &str {
157        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
158    }
159
160    pub fn is_one_of(&self, options: &[&str]) -> bool {
161        options.contains(&self.as_str())
162    }
163
164    pub fn split_value(&self, sep: &str) -> Option<&str> {
165        self.as_str().split_once(sep).map(|(_, v)| v)
166    }
167
168    pub fn content_outside_double_quotes(&self) -> String {
169        let bytes = self.as_str().as_bytes();
170        let mut result = Vec::with_capacity(bytes.len());
171        let mut i = 0;
172        while i < bytes.len() {
173            if bytes[i] == b'"' {
174                result.push(b' ');
175                i += 1;
176                while i < bytes.len() {
177                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
178                        i += 2;
179                        continue;
180                    }
181                    if bytes[i] == b'"' {
182                        i += 1;
183                        break;
184                    }
185                    i += 1;
186                }
187            } else {
188                result.push(bytes[i]);
189                i += 1;
190            }
191        }
192        String::from_utf8(result).unwrap_or_default()
193    }
194
195    pub fn is_fd_redirect(&self) -> bool {
196        let s = self.as_str();
197        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
198        if rest.len() < 2 || !rest.starts_with(">&") {
199            return false;
200        }
201        let after = &rest[2..];
202        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
203    }
204
205    pub fn is_dev_null_redirect(&self) -> bool {
206        let s = self.as_str();
207        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
208        rest.strip_prefix(">>")
209            .or_else(|| rest.strip_prefix('>'))
210            .or_else(|| rest.strip_prefix('<'))
211            .is_some_and(|after| after == "/dev/null")
212    }
213
214    pub fn is_redirect_operator(&self) -> bool {
215        let s = self.as_str();
216        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
217        matches!(rest, ">" | ">>" | "<")
218    }
219}
220
221impl PartialEq<str> for Token {
222    fn eq(&self, other: &str) -> bool {
223        self.0 == other
224    }
225}
226
227impl PartialEq<&str> for Token {
228    fn eq(&self, other: &&str) -> bool {
229        self.0 == *other
230    }
231}
232
233impl PartialEq<Token> for str {
234    fn eq(&self, other: &Token) -> bool {
235        self == other.as_str()
236    }
237}
238
239impl PartialEq<Token> for &str {
240    fn eq(&self, other: &Token) -> bool {
241        *self == other.as_str()
242    }
243}
244
245impl std::fmt::Display for Token {
246    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247        f.write_str(&self.0)
248    }
249}
250
251pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
252    for token in &tokens[1..] {
253        if token == "--" {
254            return false;
255        }
256        if let Some(long_flag) = long
257            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
258        {
259            return true;
260        }
261        if let Some(short_flag) = short {
262            let short_char = short_flag.trim_start_matches('-');
263            if token.starts_with('-')
264                && !token.starts_with("--")
265                && token[1..].contains(short_char)
266            {
267                return true;
268            }
269        }
270    }
271    false
272}
273
274fn split_outside_quotes(cmd: &str) -> Vec<String> {
275    let mut segments = Vec::new();
276    let mut current = String::new();
277    let mut in_single = false;
278    let mut in_double = false;
279    let mut escaped = false;
280    let mut chars = cmd.chars().peekable();
281
282    while let Some(c) = chars.next() {
283        if escaped {
284            current.push(c);
285            escaped = false;
286            continue;
287        }
288        if c == '\\' && !in_single {
289            escaped = true;
290            current.push(c);
291            continue;
292        }
293        if c == '\'' && !in_double {
294            in_single = !in_single;
295            current.push(c);
296            continue;
297        }
298        if c == '"' && !in_single {
299            in_double = !in_double;
300            current.push(c);
301            continue;
302        }
303        if !in_single && !in_double {
304            if c == '|' {
305                segments.push(std::mem::take(&mut current));
306                continue;
307            }
308            if c == '&' && !current.ends_with('>') {
309                segments.push(std::mem::take(&mut current));
310                if chars.peek() == Some(&'&') {
311                    chars.next();
312                }
313                continue;
314            }
315            if c == ';' || c == '\n' {
316                segments.push(std::mem::take(&mut current));
317                continue;
318            }
319        }
320        current.push(c);
321    }
322    segments.push(current);
323    segments
324        .into_iter()
325        .map(|s| s.trim().to_string())
326        .filter(|s| !s.is_empty())
327        .collect()
328}
329
330fn check_unsafe_shell_syntax(segment: &str) -> bool {
331    let mut in_single = false;
332    let mut in_double = false;
333    let mut escaped = false;
334    let chars: Vec<char> = segment.chars().collect();
335
336    for (i, &c) in chars.iter().enumerate() {
337        if escaped {
338            escaped = false;
339            continue;
340        }
341        if c == '\\' && !in_single {
342            escaped = true;
343            continue;
344        }
345        if c == '\'' && !in_double {
346            in_single = !in_single;
347            continue;
348        }
349        if c == '"' && !in_single {
350            in_double = !in_double;
351            continue;
352        }
353        if !in_single && !in_double {
354            if c == '>' || c == '<' {
355                let next = chars.get(i + 1);
356                if next == Some(&'&')
357                    && chars
358                        .get(i + 2)
359                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
360                {
361                    continue;
362                }
363                if is_dev_null_target(&chars, i + 1, c) {
364                    continue;
365                }
366                return true;
367            }
368            if c == '`' {
369                return true;
370            }
371            if c == '$' && chars.get(i + 1) == Some(&'(') {
372                return true;
373            }
374        }
375    }
376    false
377}
378
379fn check_unsafe_redirects(segment: &str) -> bool {
380    let mut in_single = false;
381    let mut in_double = false;
382    let mut escaped = false;
383    let chars: Vec<char> = segment.chars().collect();
384
385    for (i, &c) in chars.iter().enumerate() {
386        if escaped {
387            escaped = false;
388            continue;
389        }
390        if c == '\\' && !in_single {
391            escaped = true;
392            continue;
393        }
394        if c == '\'' && !in_double {
395            in_single = !in_single;
396            continue;
397        }
398        if c == '"' && !in_single {
399            in_double = !in_double;
400            continue;
401        }
402        if !in_single && !in_double && (c == '>' || c == '<') {
403            let next = chars.get(i + 1);
404            if next == Some(&'&')
405                && chars
406                    .get(i + 2)
407                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
408            {
409                continue;
410            }
411            if is_dev_null_target(&chars, i + 1, c) {
412                continue;
413            }
414            return true;
415        }
416    }
417    false
418}
419
420fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
421    let mut subs = Vec::new();
422    let mut cleaned = String::with_capacity(segment.len());
423    let mut in_single = false;
424    let mut in_double = false;
425    let mut escaped = false;
426    let chars: Vec<char> = segment.chars().collect();
427    let mut i = 0;
428
429    while i < chars.len() {
430        if escaped {
431            escaped = false;
432            cleaned.push(chars[i]);
433            i += 1;
434            continue;
435        }
436        if chars[i] == '\\' && !in_single {
437            escaped = true;
438            cleaned.push(chars[i]);
439            i += 1;
440            continue;
441        }
442        if chars[i] == '\'' && !in_double {
443            in_single = !in_single;
444            cleaned.push(chars[i]);
445            i += 1;
446            continue;
447        }
448        if chars[i] == '"' && !in_single {
449            in_double = !in_double;
450            cleaned.push(chars[i]);
451            i += 1;
452            continue;
453        }
454        if !in_single {
455            if chars[i] == '`' {
456                let start = i + 1;
457                let end = find_matching_backtick(&chars, start).ok_or(())?;
458                let inner: String = chars[start..end].iter().collect();
459                subs.push(inner);
460                cleaned.push('_');
461                i = end + 1;
462                continue;
463            }
464            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
465                let start = i + 2;
466                let end = find_matching_paren(&chars, start).ok_or(())?;
467                let inner: String = chars[start..end].iter().collect();
468                subs.push(inner);
469                cleaned.push('_');
470                i = end + 1;
471                continue;
472            }
473        }
474        cleaned.push(chars[i]);
475        i += 1;
476    }
477    Ok((subs, cleaned))
478}
479
480fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
481    let mut in_single = false;
482    let mut in_double = false;
483    let mut escaped = false;
484    let mut i = start;
485    while i < chars.len() {
486        if escaped {
487            escaped = false;
488            i += 1;
489            continue;
490        }
491        if chars[i] == '\\' && !in_single {
492            escaped = true;
493            i += 1;
494            continue;
495        }
496        if chars[i] == '\'' && !in_double {
497            in_single = !in_single;
498            i += 1;
499            continue;
500        }
501        if chars[i] == '"' && !in_single {
502            in_double = !in_double;
503            i += 1;
504            continue;
505        }
506        if !in_single && !in_double && chars[i] == '`' {
507            return Some(i);
508        }
509        i += 1;
510    }
511    None
512}
513
514fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
515    let mut depth = 1u32;
516    let mut in_single = false;
517    let mut in_double = false;
518    let mut escaped = false;
519    let mut i = start;
520    while i < chars.len() {
521        if escaped {
522            escaped = false;
523            i += 1;
524            continue;
525        }
526        if chars[i] == '\\' && !in_single {
527            escaped = true;
528            i += 1;
529            continue;
530        }
531        if chars[i] == '\'' && !in_double {
532            in_single = !in_single;
533            i += 1;
534            continue;
535        }
536        if chars[i] == '"' && !in_single {
537            in_double = !in_double;
538            i += 1;
539            continue;
540        }
541        if !in_single && !in_double {
542            if chars[i] == '(' {
543                depth += 1;
544            } else if chars[i] == ')' {
545                depth -= 1;
546                if depth == 0 {
547                    return Some(i);
548                }
549            }
550        }
551        i += 1;
552    }
553    None
554}
555
556const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
557
558fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
559    let mut j = start;
560    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
561        j += 1;
562    }
563    while j < chars.len() && chars[j] == ' ' {
564        j += 1;
565    }
566    if j + DEV_NULL.len() > chars.len() {
567        return false;
568    }
569    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
570        return false;
571    }
572    let end = j + DEV_NULL.len();
573    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
574}
575
576fn find_unquoted_space(s: &str) -> Option<usize> {
577    let mut in_single = false;
578    let mut in_double = false;
579    let mut escaped = false;
580    for (i, b) in s.bytes().enumerate() {
581        if escaped {
582            escaped = false;
583            continue;
584        }
585        if b == b'\\' && !in_single {
586            escaped = true;
587            continue;
588        }
589        if b == b'\'' && !in_double {
590            in_single = !in_single;
591            continue;
592        }
593        if b == b'"' && !in_single {
594            in_double = !in_double;
595            continue;
596        }
597        if b == b' ' && !in_single && !in_double {
598            return Some(i);
599        }
600    }
601    None
602}
603
604fn strip_env_prefix_str(segment: &str) -> &str {
605    let mut rest = segment;
606    loop {
607        let trimmed = rest.trim_start();
608        if trimmed.is_empty() {
609            return trimmed;
610        }
611        let bytes = trimmed.as_bytes();
612        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
613            return trimmed;
614        }
615        if let Some(eq_pos) = trimmed.find('=') {
616            let key = &trimmed[..eq_pos];
617            let valid_key = key
618                .bytes()
619                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
620            if !valid_key {
621                return trimmed;
622            }
623            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
624                rest = &trimmed[eq_pos + space_pos..];
625                continue;
626            }
627            return trimmed;
628        }
629        return trimmed;
630    }
631}
632
633#[cfg(test)]
634mod tests {
635    use super::*;
636
637    fn seg(s: &str) -> Segment {
638        Segment(s.to_string())
639    }
640
641    fn tok(s: &str) -> Token {
642        Token(s.to_string())
643    }
644
645    fn toks(words: &[&str]) -> Vec<Token> {
646        words.iter().map(|s| tok(s)).collect()
647    }
648
649    #[test]
650    fn split_pipe() {
651        let segs = CommandLine::new("grep foo | head -5").segments();
652        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
653    }
654
655    #[test]
656    fn split_and() {
657        let segs = CommandLine::new("ls && echo done").segments();
658        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
659    }
660
661    #[test]
662    fn split_semicolon() {
663        let segs = CommandLine::new("ls; echo done").segments();
664        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
665    }
666
667    #[test]
668    fn split_preserves_quoted_pipes() {
669        let segs = CommandLine::new("echo 'a | b' foo").segments();
670        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
671    }
672
673    #[test]
674    fn split_background_operator() {
675        let segs = CommandLine::new("cat file & rm -rf /").segments();
676        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
677    }
678
679    #[test]
680    fn split_newline() {
681        let segs = CommandLine::new("echo foo\necho bar").segments();
682        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
683    }
684
685    #[test]
686    fn unsafe_redirect() {
687        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
688    }
689
690    #[test]
691    fn safe_fd_redirect_stderr_to_stdout() {
692        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
693    }
694
695    #[test]
696    fn safe_fd_redirect_close() {
697        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
698    }
699
700    #[test]
701    fn unsafe_redirect_ampersand_no_digit() {
702        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
703    }
704
705    #[test]
706    fn unsafe_backtick() {
707        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
708    }
709
710    #[test]
711    fn unsafe_command_substitution() {
712        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
713    }
714
715    #[test]
716    fn safe_quoted_dollar_paren() {
717        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
718    }
719
720    #[test]
721    fn safe_quoted_redirect() {
722        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
723    }
724
725    #[test]
726    fn safe_no_special_chars() {
727        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
728    }
729
730    #[test]
731    fn safe_redirect_to_dev_null() {
732        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
733    }
734
735    #[test]
736    fn safe_redirect_stderr_to_dev_null() {
737        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
738    }
739
740    #[test]
741    fn safe_redirect_append_to_dev_null() {
742        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
743    }
744
745    #[test]
746    fn safe_redirect_space_dev_null() {
747        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
748    }
749
750    #[test]
751    fn safe_redirect_input_dev_null() {
752        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
753    }
754
755    #[test]
756    fn safe_redirect_both_dev_null() {
757        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
758    }
759
760    #[test]
761    fn unsafe_redirect_dev_null_prefix() {
762        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
763    }
764
765    #[test]
766    fn unsafe_redirect_dev_null_path_traversal() {
767        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
768    }
769
770    #[test]
771    fn unsafe_redirect_dev_null_subpath() {
772        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
773    }
774
775    #[test]
776    fn unsafe_redirect_to_file() {
777        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
778    }
779
780    #[test]
781    fn has_flag_short() {
782        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
783        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
784    }
785
786    #[test]
787    fn has_flag_long_with_eq() {
788        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
789        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
790    }
791
792    #[test]
793    fn has_flag_combined_short() {
794        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
795        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
796    }
797
798    #[test]
799    fn has_flag_stops_at_double_dash() {
800        let tokens = toks(&["cmd", "--", "-i"]);
801        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
802    }
803
804    #[test]
805    fn has_flag_long_only() {
806        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
807        assert!(has_flag(&tokens, None, Some("--compress-program")));
808    }
809
810    #[test]
811    fn has_flag_long_only_eq() {
812        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
813        assert!(has_flag(&tokens, None, Some("--compress-program")));
814    }
815
816    #[test]
817    fn has_flag_long_only_absent() {
818        let tokens = toks(&["sort", "-r", "file.txt"]);
819        assert!(!has_flag(&tokens, None, Some("--compress-program")));
820    }
821
822    #[test]
823    fn strip_single_env_var() {
824        assert_eq!(
825            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
826            seg("bundle exec rspec")
827        );
828    }
829
830    #[test]
831    fn strip_multiple_env_vars() {
832        assert_eq!(
833            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
834            seg("bundle exec rspec")
835        );
836    }
837
838    #[test]
839    fn strip_no_env_var() {
840        assert_eq!(
841            seg("bundle exec rspec").strip_env_prefix(),
842            seg("bundle exec rspec")
843        );
844    }
845
846    #[test]
847    fn tokenize_simple() {
848        assert_eq!(
849            seg("grep foo file.txt").tokenize(),
850            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
851        );
852    }
853
854    #[test]
855    fn tokenize_quoted() {
856        assert_eq!(
857            seg("echo 'hello world'").tokenize(),
858            Some(vec![tok("echo"), tok("hello world")])
859        );
860    }
861
862    #[test]
863    fn strip_env_quoted_single() {
864        assert_eq!(
865            seg("FOO='bar baz' ls").strip_env_prefix(),
866            seg("ls")
867        );
868    }
869
870    #[test]
871    fn strip_env_quoted_double() {
872        assert_eq!(
873            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
874            seg("ls")
875        );
876    }
877
878    #[test]
879    fn strip_env_quoted_with_equals() {
880        assert_eq!(
881            seg("FOO='a=b' ls").strip_env_prefix(),
882            seg("ls")
883        );
884    }
885
886    #[test]
887    fn strip_env_quoted_multiple() {
888        assert_eq!(
889            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
890            seg("cmd")
891        );
892    }
893
894    #[test]
895    fn command_name_simple() {
896        assert_eq!(tok("ls").command_name(), "ls");
897    }
898
899    #[test]
900    fn command_name_with_path() {
901        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
902    }
903
904    #[test]
905    fn command_name_relative_path() {
906        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
907    }
908
909    #[test]
910    fn fd_redirect_detection() {
911        assert!(tok("2>&1").is_fd_redirect());
912        assert!(tok(">&2").is_fd_redirect());
913        assert!(tok("10>&1").is_fd_redirect());
914        assert!(tok("255>&2").is_fd_redirect());
915        assert!(tok("2>&-").is_fd_redirect());
916        assert!(tok("2>&10").is_fd_redirect());
917        assert!(!tok(">").is_fd_redirect());
918        assert!(!tok("/dev/null").is_fd_redirect());
919        assert!(!tok(">&").is_fd_redirect());
920        assert!(!tok("").is_fd_redirect());
921        assert!(!tok("42").is_fd_redirect());
922        assert!(!tok("123abc").is_fd_redirect());
923    }
924
925    #[test]
926    fn dev_null_redirect_single_token() {
927        assert!(tok(">/dev/null").is_dev_null_redirect());
928        assert!(tok(">>/dev/null").is_dev_null_redirect());
929        assert!(tok("2>/dev/null").is_dev_null_redirect());
930        assert!(tok("2>>/dev/null").is_dev_null_redirect());
931        assert!(tok("</dev/null").is_dev_null_redirect());
932        assert!(tok("10>/dev/null").is_dev_null_redirect());
933        assert!(tok("255>/dev/null").is_dev_null_redirect());
934        assert!(!tok(">/tmp/file").is_dev_null_redirect());
935        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
936        assert!(!tok("ls").is_dev_null_redirect());
937        assert!(!tok("").is_dev_null_redirect());
938        assert!(!tok("42").is_dev_null_redirect());
939        assert!(!tok("<</dev/null").is_dev_null_redirect());
940    }
941
942    #[test]
943    fn redirect_operator_detection() {
944        assert!(tok(">").is_redirect_operator());
945        assert!(tok(">>").is_redirect_operator());
946        assert!(tok("<").is_redirect_operator());
947        assert!(tok("2>").is_redirect_operator());
948        assert!(tok("2>>").is_redirect_operator());
949        assert!(tok("10>").is_redirect_operator());
950        assert!(tok("255>>").is_redirect_operator());
951        assert!(!tok("ls").is_redirect_operator());
952        assert!(!tok(">&1").is_redirect_operator());
953        assert!(!tok("/dev/null").is_redirect_operator());
954        assert!(!tok("").is_redirect_operator());
955        assert!(!tok("42").is_redirect_operator());
956        assert!(!tok("<<").is_redirect_operator());
957    }
958
959    #[test]
960    fn reverse_partial_eq() {
961        let t = tok("hello");
962        assert!("hello" == t);
963        assert!("world" != t);
964        let s: &str = "hello";
965        assert!(s == t);
966    }
967
968    #[test]
969    fn token_deref() {
970        let t = tok("--flag");
971        assert!(t.starts_with("--"));
972        assert!(t.contains("fl"));
973        assert_eq!(t.len(), 6);
974        assert!(!t.is_empty());
975        assert_eq!(t.as_bytes()[0], b'-');
976        assert!(t.eq_ignore_ascii_case("--FLAG"));
977        assert_eq!(t.get(2..), Some("flag"));
978    }
979
980    #[test]
981    fn token_is_one_of() {
982        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
983        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
984    }
985
986    #[test]
987    fn token_split_value() {
988        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
989        assert_eq!(tok("--flag").split_value("="), None);
990    }
991
992    #[test]
993    fn word_set_contains() {
994        let set = WordSet::new(&["list", "show", "view"]);
995        assert!(set.contains(&tok("list")));
996        assert!(set.contains(&tok("view")));
997        assert!(!set.contains(&tok("delete")));
998        assert!(set.contains("list"));
999        assert!(!set.contains("delete"));
1000    }
1001
1002    #[test]
1003    fn word_set_iter() {
1004        let set = WordSet::new(&["a", "b", "c"]);
1005        let items: Vec<&str> = set.iter().collect();
1006        assert_eq!(items, vec!["a", "b", "c"]);
1007    }
1008
1009    #[test]
1010    fn token_as_command_line() {
1011        let cl = tok("ls -la | grep foo").as_command_line();
1012        let segs = cl.segments();
1013        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1014    }
1015
1016    #[test]
1017    fn segment_from_tokens_replacing() {
1018        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1019        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1020        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1021    }
1022
1023    #[test]
1024    fn segment_strip_fd_redirects() {
1025        assert_eq!(
1026            seg("cargo test 2>&1").strip_fd_redirects(),
1027            seg("cargo test")
1028        );
1029        assert_eq!(
1030            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1031            seg("cmd")
1032        );
1033        assert_eq!(
1034            seg("ls -la").strip_fd_redirects(),
1035            seg("ls -la")
1036        );
1037    }
1038
1039    #[test]
1040    fn content_outside_double_quotes_strips_string() {
1041        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1042    }
1043
1044    #[test]
1045    fn content_outside_double_quotes_preserves_code() {
1046        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1047        assert_eq!(result, r#"{print  } END{print NR}"#);
1048    }
1049
1050    #[test]
1051    fn content_outside_double_quotes_escaped() {
1052        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1053        assert_eq!(result, "{print  }");
1054    }
1055
1056    #[test]
1057    fn content_outside_double_quotes_no_quotes() {
1058        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1059    }
1060
1061    #[test]
1062    fn content_outside_double_quotes_empty() {
1063        assert_eq!(tok("").content_outside_double_quotes(), "");
1064    }
1065
1066    #[test]
1067    fn extract_subs_none() {
1068        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1069        assert!(subs.is_empty());
1070        assert_eq!(cleaned, "echo hello");
1071    }
1072
1073    #[test]
1074    fn extract_subs_dollar_paren() {
1075        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1076        assert_eq!(subs, vec!["ls"]);
1077        assert_eq!(cleaned, "echo _");
1078    }
1079
1080    #[test]
1081    fn extract_subs_backtick() {
1082        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1083        assert_eq!(subs, vec!["pwd"]);
1084        assert_eq!(cleaned, "ls _");
1085    }
1086
1087    #[test]
1088    fn extract_subs_multiple() {
1089        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1090        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1091        assert_eq!(cleaned, "echo _ _");
1092    }
1093
1094    #[test]
1095    fn extract_subs_nested() {
1096        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1097        assert_eq!(subs, vec!["echo $(ls)"]);
1098        assert_eq!(cleaned, "echo _");
1099    }
1100
1101    #[test]
1102    fn extract_subs_quoted_skipped() {
1103        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1104        assert!(subs.is_empty());
1105        assert_eq!(cleaned, "echo '$(safe)' arg");
1106    }
1107
1108    #[test]
1109    fn extract_subs_unmatched_backtick() {
1110        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1111    }
1112
1113    #[test]
1114    fn extract_subs_unmatched_paren() {
1115        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1116    }
1117
1118    #[test]
1119    fn unsafe_redirects_to_file() {
1120        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1121    }
1122
1123    #[test]
1124    fn unsafe_redirects_dev_null_ok() {
1125        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1126    }
1127
1128    #[test]
1129    fn unsafe_redirects_fd_ok() {
1130        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1131    }
1132
1133    #[test]
1134    fn unsafe_redirects_no_backtick_check() {
1135        assert!(!seg("echo `ls`").has_unsafe_redirects());
1136    }
1137
1138    #[test]
1139    fn unsafe_redirects_no_dollar_paren_check() {
1140        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1141    }
1142}