Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub const fn flags(words: &'static [&'static str]) -> Self {
36        let mut i = 0;
37        while i < words.len() {
38            let b = words[i].as_bytes();
39            assert!(b.len() >= 2, "WordSet::flags: flag too short (need at least 2 chars)");
40            assert!(b[0] == b'-', "WordSet::flags: flag must start with '-'");
41            if b[1] == b'-' {
42                assert!(b.len() >= 3, "WordSet::flags: long flag needs at least 3 chars (e.g. --x)");
43            }
44            i += 1;
45        }
46        Self::new(words)
47    }
48
49    pub fn contains(&self, s: &str) -> bool {
50        self.0.binary_search(&s).is_ok()
51    }
52
53    pub fn contains_short(&self, b: u8) -> bool {
54        let target = [b'-', b];
55        std::str::from_utf8(&target).is_ok_and(|s| self.0.binary_search(&s).is_ok())
56    }
57
58    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
59        self.0.iter().copied()
60    }
61}
62
63const fn const_less(a: &[u8], b: &[u8]) -> bool {
64    let min = if a.len() < b.len() { a.len() } else { b.len() };
65    let mut i = 0;
66    while i < min {
67        if a[i] < b[i] {
68            return true;
69        }
70        if a[i] > b[i] {
71            return false;
72        }
73        i += 1;
74    }
75    a.len() < b.len()
76}
77
78
79impl CommandLine {
80    pub fn new(s: impl Into<String>) -> Self {
81        Self(s.into())
82    }
83
84    pub fn as_str(&self) -> &str {
85        &self.0
86    }
87
88    pub fn segments(&self) -> Vec<Segment> {
89        split_outside_quotes(&self.0)
90            .into_iter()
91            .map(Segment)
92            .collect()
93    }
94}
95
96impl Segment {
97    pub fn as_str(&self) -> &str {
98        &self.0
99    }
100
101    pub fn is_empty(&self) -> bool {
102        self.0.is_empty()
103    }
104
105    pub fn from_raw(s: String) -> Self {
106        Segment(s)
107    }
108
109    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
110        Segment(shell_words::join(words))
111    }
112
113    pub fn tokenize(&self) -> Option<Vec<Token>> {
114        shell_words::split(&self.0)
115            .ok()
116            .map(|v| v.into_iter().map(Token).collect())
117    }
118
119    pub fn has_unsafe_shell_syntax(&self) -> bool {
120        check_unsafe_shell_syntax(&self.0)
121    }
122
123    pub fn has_unsafe_redirects(&self) -> bool {
124        check_unsafe_redirects(&self.0)
125    }
126
127    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
128        extract_substitutions(&self.0)
129    }
130
131    pub fn strip_env_prefix(&self) -> Segment {
132        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
133    }
134
135    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
136        let words: Vec<&str> = tokens
137            .iter()
138            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
139            .collect();
140        Self::from_words(&words)
141    }
142
143    pub fn strip_fd_redirects(&self) -> Segment {
144        match self.tokenize() {
145            Some(tokens) => {
146                let filtered: Vec<_> = tokens
147                    .into_iter()
148                    .filter(|t| !t.is_fd_redirect())
149                    .collect();
150                Token::join(&filtered)
151            }
152            None => Segment(self.0.clone()),
153        }
154    }
155}
156
157impl Token {
158    #[cfg(test)]
159    pub(crate) fn from_test(s: &str) -> Self {
160        Self(s.to_string())
161    }
162
163    pub fn as_str(&self) -> &str {
164        &self.0
165    }
166
167    pub fn join(tokens: &[Token]) -> Segment {
168        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
169    }
170
171    pub fn as_command_line(&self) -> CommandLine {
172        CommandLine(self.0.clone())
173    }
174
175    pub fn command_name(&self) -> &str {
176        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
177    }
178
179    pub fn is_one_of(&self, options: &[&str]) -> bool {
180        options.contains(&self.as_str())
181    }
182
183    pub fn split_value(&self, sep: &str) -> Option<&str> {
184        self.as_str().split_once(sep).map(|(_, v)| v)
185    }
186
187    pub fn content_outside_double_quotes(&self) -> String {
188        let bytes = self.as_str().as_bytes();
189        let mut result = Vec::with_capacity(bytes.len());
190        let mut i = 0;
191        while i < bytes.len() {
192            if bytes[i] == b'"' {
193                result.push(b' ');
194                i += 1;
195                while i < bytes.len() {
196                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
197                        i += 2;
198                        continue;
199                    }
200                    if bytes[i] == b'"' {
201                        i += 1;
202                        break;
203                    }
204                    i += 1;
205                }
206            } else {
207                result.push(bytes[i]);
208                i += 1;
209            }
210        }
211        String::from_utf8(result).unwrap_or_default()
212    }
213
214    pub fn is_fd_redirect(&self) -> bool {
215        let s = self.as_str();
216        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
217        if rest.len() < 2 || !rest.starts_with(">&") {
218            return false;
219        }
220        let after = &rest[2..];
221        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
222    }
223
224    pub fn is_dev_null_redirect(&self) -> bool {
225        let s = self.as_str();
226        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
227        rest.strip_prefix(">>")
228            .or_else(|| rest.strip_prefix('>'))
229            .or_else(|| rest.strip_prefix('<'))
230            .is_some_and(|after| after == "/dev/null")
231    }
232
233    pub fn is_redirect_operator(&self) -> bool {
234        let s = self.as_str();
235        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
236        matches!(rest, ">" | ">>" | "<")
237    }
238}
239
240impl PartialEq<str> for Token {
241    fn eq(&self, other: &str) -> bool {
242        self.0 == other
243    }
244}
245
246impl PartialEq<&str> for Token {
247    fn eq(&self, other: &&str) -> bool {
248        self.0 == *other
249    }
250}
251
252impl PartialEq<Token> for str {
253    fn eq(&self, other: &Token) -> bool {
254        self == other.as_str()
255    }
256}
257
258impl PartialEq<Token> for &str {
259    fn eq(&self, other: &Token) -> bool {
260        *self == other.as_str()
261    }
262}
263
264impl std::fmt::Display for Token {
265    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
266        f.write_str(&self.0)
267    }
268}
269
270pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
271    for token in &tokens[1..] {
272        if token == "--" {
273            return false;
274        }
275        if let Some(long_flag) = long
276            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
277        {
278            return true;
279        }
280        if let Some(short_flag) = short {
281            let short_char = short_flag.trim_start_matches('-');
282            if token.starts_with('-')
283                && !token.starts_with("--")
284                && token[1..].contains(short_char)
285            {
286                return true;
287            }
288        }
289    }
290    false
291}
292
293fn split_outside_quotes(cmd: &str) -> Vec<String> {
294    let mut segments = Vec::new();
295    let mut current = String::new();
296    let mut in_single = false;
297    let mut in_double = false;
298    let mut escaped = false;
299    let mut chars = cmd.chars().peekable();
300
301    while let Some(c) = chars.next() {
302        if escaped {
303            current.push(c);
304            escaped = false;
305            continue;
306        }
307        if c == '\\' && !in_single {
308            escaped = true;
309            current.push(c);
310            continue;
311        }
312        if c == '\'' && !in_double {
313            in_single = !in_single;
314            current.push(c);
315            continue;
316        }
317        if c == '"' && !in_single {
318            in_double = !in_double;
319            current.push(c);
320            continue;
321        }
322        if !in_single && !in_double {
323            if c == '|' {
324                segments.push(std::mem::take(&mut current));
325                continue;
326            }
327            if c == '&' && !current.ends_with('>') {
328                segments.push(std::mem::take(&mut current));
329                if chars.peek() == Some(&'&') {
330                    chars.next();
331                }
332                continue;
333            }
334            if c == ';' || c == '\n' {
335                segments.push(std::mem::take(&mut current));
336                continue;
337            }
338        }
339        current.push(c);
340    }
341    segments.push(current);
342    segments
343        .into_iter()
344        .map(|s| s.trim().to_string())
345        .filter(|s| !s.is_empty())
346        .collect()
347}
348
349fn check_unsafe_shell_syntax(segment: &str) -> bool {
350    let mut in_single = false;
351    let mut in_double = false;
352    let mut escaped = false;
353    let chars: Vec<char> = segment.chars().collect();
354    let mut skip_until = 0;
355
356    for (i, &c) in chars.iter().enumerate() {
357        if i < skip_until {
358            continue;
359        }
360        if escaped {
361            escaped = false;
362            continue;
363        }
364        if c == '\\' && !in_single {
365            escaped = true;
366            continue;
367        }
368        if c == '\'' && !in_double {
369            in_single = !in_single;
370            continue;
371        }
372        if c == '"' && !in_single {
373            in_double = !in_double;
374            continue;
375        }
376        if !in_single && !in_double {
377            if c == '>' || c == '<' {
378                if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
379                    skip_until = i + 3;
380                    continue;
381                }
382                let next = chars.get(i + 1);
383                if next == Some(&'&')
384                    && chars
385                        .get(i + 2)
386                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
387                {
388                    continue;
389                }
390                if is_dev_null_target(&chars, i + 1, c) {
391                    continue;
392                }
393                return true;
394            }
395            if c == '`' {
396                return true;
397            }
398            if c == '$' && chars.get(i + 1) == Some(&'(') {
399                return true;
400            }
401        }
402    }
403    false
404}
405
406fn check_unsafe_redirects(segment: &str) -> bool {
407    let mut in_single = false;
408    let mut in_double = false;
409    let mut escaped = false;
410    let chars: Vec<char> = segment.chars().collect();
411    let mut skip_until = 0;
412
413    for (i, &c) in chars.iter().enumerate() {
414        if i < skip_until {
415            continue;
416        }
417        if escaped {
418            escaped = false;
419            continue;
420        }
421        if c == '\\' && !in_single {
422            escaped = true;
423            continue;
424        }
425        if c == '\'' && !in_double {
426            in_single = !in_single;
427            continue;
428        }
429        if c == '"' && !in_single {
430            in_double = !in_double;
431            continue;
432        }
433        if !in_single && !in_double && (c == '>' || c == '<') {
434            if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
435                skip_until = i + 3;
436                continue;
437            }
438            let next = chars.get(i + 1);
439            if next == Some(&'&')
440                && chars
441                    .get(i + 2)
442                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
443            {
444                continue;
445            }
446            if is_dev_null_target(&chars, i + 1, c) {
447                continue;
448            }
449            return true;
450        }
451    }
452    false
453}
454
455fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
456    let mut subs = Vec::new();
457    let mut cleaned = String::with_capacity(segment.len());
458    let mut in_single = false;
459    let mut in_double = false;
460    let mut escaped = false;
461    let chars: Vec<char> = segment.chars().collect();
462    let mut i = 0;
463
464    while i < chars.len() {
465        if escaped {
466            escaped = false;
467            cleaned.push(chars[i]);
468            i += 1;
469            continue;
470        }
471        if chars[i] == '\\' && !in_single {
472            escaped = true;
473            cleaned.push(chars[i]);
474            i += 1;
475            continue;
476        }
477        if chars[i] == '\'' && !in_double {
478            in_single = !in_single;
479            cleaned.push(chars[i]);
480            i += 1;
481            continue;
482        }
483        if chars[i] == '"' && !in_single {
484            in_double = !in_double;
485            cleaned.push(chars[i]);
486            i += 1;
487            continue;
488        }
489        if !in_single {
490            if chars[i] == '`' {
491                let start = i + 1;
492                let end = find_matching_backtick(&chars, start).ok_or(())?;
493                let inner: String = chars[start..end].iter().collect();
494                subs.push(inner);
495                cleaned.push('_');
496                i = end + 1;
497                continue;
498            }
499            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
500                let start = i + 2;
501                let end = find_matching_paren(&chars, start).ok_or(())?;
502                let inner: String = chars[start..end].iter().collect();
503                subs.push(inner);
504                cleaned.push('_');
505                i = end + 1;
506                continue;
507            }
508        }
509        cleaned.push(chars[i]);
510        i += 1;
511    }
512    Ok((subs, cleaned))
513}
514
515fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
516    let mut in_single = false;
517    let mut in_double = false;
518    let mut escaped = false;
519    let mut i = start;
520    while i < chars.len() {
521        if escaped {
522            escaped = false;
523            i += 1;
524            continue;
525        }
526        if chars[i] == '\\' && !in_single {
527            escaped = true;
528            i += 1;
529            continue;
530        }
531        if chars[i] == '\'' && !in_double {
532            in_single = !in_single;
533            i += 1;
534            continue;
535        }
536        if chars[i] == '"' && !in_single {
537            in_double = !in_double;
538            i += 1;
539            continue;
540        }
541        if !in_single && !in_double && chars[i] == '`' {
542            return Some(i);
543        }
544        i += 1;
545    }
546    None
547}
548
549fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
550    let mut depth = 1u32;
551    let mut in_single = false;
552    let mut in_double = false;
553    let mut escaped = false;
554    let mut i = start;
555    while i < chars.len() {
556        if escaped {
557            escaped = false;
558            i += 1;
559            continue;
560        }
561        if chars[i] == '\\' && !in_single {
562            escaped = true;
563            i += 1;
564            continue;
565        }
566        if chars[i] == '\'' && !in_double {
567            in_single = !in_single;
568            i += 1;
569            continue;
570        }
571        if chars[i] == '"' && !in_single {
572            in_double = !in_double;
573            i += 1;
574            continue;
575        }
576        if !in_single && !in_double {
577            if chars[i] == '(' {
578                depth += 1;
579            } else if chars[i] == ')' {
580                depth -= 1;
581                if depth == 0 {
582                    return Some(i);
583                }
584            }
585        }
586        i += 1;
587    }
588    None
589}
590
591const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
592
593fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
594    let mut j = start;
595    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
596        j += 1;
597    }
598    while j < chars.len() && chars[j] == ' ' {
599        j += 1;
600    }
601    if j + DEV_NULL.len() > chars.len() {
602        return false;
603    }
604    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
605        return false;
606    }
607    let end = j + DEV_NULL.len();
608    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
609}
610
611fn find_unquoted_space(s: &str) -> Option<usize> {
612    let mut in_single = false;
613    let mut in_double = false;
614    let mut escaped = false;
615    for (i, b) in s.bytes().enumerate() {
616        if escaped {
617            escaped = false;
618            continue;
619        }
620        if b == b'\\' && !in_single {
621            escaped = true;
622            continue;
623        }
624        if b == b'\'' && !in_double {
625            in_single = !in_single;
626            continue;
627        }
628        if b == b'"' && !in_single {
629            in_double = !in_double;
630            continue;
631        }
632        if b == b' ' && !in_single && !in_double {
633            return Some(i);
634        }
635    }
636    None
637}
638
639fn strip_env_prefix_str(segment: &str) -> &str {
640    let mut rest = segment;
641    loop {
642        let trimmed = rest.trim_start();
643        if trimmed.is_empty() {
644            return trimmed;
645        }
646        let bytes = trimmed.as_bytes();
647        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
648            return trimmed;
649        }
650        if let Some(eq_pos) = trimmed.find('=') {
651            let key = &trimmed[..eq_pos];
652            let valid_key = key
653                .bytes()
654                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
655            if !valid_key {
656                return trimmed;
657            }
658            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
659                rest = &trimmed[eq_pos + space_pos..];
660                continue;
661            }
662            return trimmed;
663        }
664        return trimmed;
665    }
666}
667
668#[cfg(test)]
669mod tests {
670    use super::*;
671
672    fn seg(s: &str) -> Segment {
673        Segment(s.to_string())
674    }
675
676    fn tok(s: &str) -> Token {
677        Token(s.to_string())
678    }
679
680    fn toks(words: &[&str]) -> Vec<Token> {
681        words.iter().map(|s| tok(s)).collect()
682    }
683
684    #[test]
685    fn split_pipe() {
686        let segs = CommandLine::new("grep foo | head -5").segments();
687        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
688    }
689
690    #[test]
691    fn split_and() {
692        let segs = CommandLine::new("ls && echo done").segments();
693        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
694    }
695
696    #[test]
697    fn split_semicolon() {
698        let segs = CommandLine::new("ls; echo done").segments();
699        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
700    }
701
702    #[test]
703    fn split_preserves_quoted_pipes() {
704        let segs = CommandLine::new("echo 'a | b' foo").segments();
705        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
706    }
707
708    #[test]
709    fn split_background_operator() {
710        let segs = CommandLine::new("cat file & rm -rf /").segments();
711        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
712    }
713
714    #[test]
715    fn split_newline() {
716        let segs = CommandLine::new("echo foo\necho bar").segments();
717        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
718    }
719
720    #[test]
721    fn unsafe_redirect() {
722        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
723    }
724
725    #[test]
726    fn safe_fd_redirect_stderr_to_stdout() {
727        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
728    }
729
730    #[test]
731    fn safe_fd_redirect_close() {
732        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
733    }
734
735    #[test]
736    fn unsafe_redirect_ampersand_no_digit() {
737        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
738    }
739
740    #[test]
741    fn unsafe_backtick() {
742        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
743    }
744
745    #[test]
746    fn unsafe_command_substitution() {
747        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
748    }
749
750    #[test]
751    fn safe_quoted_dollar_paren() {
752        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
753    }
754
755    #[test]
756    fn safe_quoted_redirect() {
757        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
758    }
759
760    #[test]
761    fn safe_no_special_chars() {
762        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
763    }
764
765    #[test]
766    fn safe_redirect_to_dev_null() {
767        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
768    }
769
770    #[test]
771    fn safe_redirect_stderr_to_dev_null() {
772        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
773    }
774
775    #[test]
776    fn safe_redirect_append_to_dev_null() {
777        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
778    }
779
780    #[test]
781    fn safe_redirect_space_dev_null() {
782        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
783    }
784
785    #[test]
786    fn safe_redirect_input_dev_null() {
787        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
788    }
789
790    #[test]
791    fn safe_redirect_both_dev_null() {
792        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
793    }
794
795    #[test]
796    fn unsafe_redirect_dev_null_prefix() {
797        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
798    }
799
800    #[test]
801    fn unsafe_redirect_dev_null_path_traversal() {
802        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
803    }
804
805    #[test]
806    fn unsafe_redirect_dev_null_subpath() {
807        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
808    }
809
810    #[test]
811    fn unsafe_redirect_to_file() {
812        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
813    }
814
815    #[test]
816    fn safe_here_string() {
817        assert!(!seg("grep -c , <<< 'hello world'").has_unsafe_shell_syntax());
818    }
819
820    #[test]
821    fn safe_here_string_double_quoted() {
822        assert!(!seg("cat <<< \"some text\"").has_unsafe_shell_syntax());
823    }
824
825    #[test]
826    fn unsafe_heredoc_still_blocked() {
827        assert!(seg("cat << EOF").has_unsafe_shell_syntax());
828    }
829
830    #[test]
831    fn unsafe_input_redirect_still_blocked() {
832        assert!(seg("cmd < file.txt").has_unsafe_shell_syntax());
833    }
834
835    #[test]
836    fn has_flag_short() {
837        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
838        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
839    }
840
841    #[test]
842    fn has_flag_long_with_eq() {
843        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
844        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
845    }
846
847    #[test]
848    fn has_flag_combined_short() {
849        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
850        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
851    }
852
853    #[test]
854    fn has_flag_stops_at_double_dash() {
855        let tokens = toks(&["cmd", "--", "-i"]);
856        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
857    }
858
859    #[test]
860    fn has_flag_long_only() {
861        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
862        assert!(has_flag(&tokens, None, Some("--compress-program")));
863    }
864
865    #[test]
866    fn has_flag_long_only_eq() {
867        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
868        assert!(has_flag(&tokens, None, Some("--compress-program")));
869    }
870
871    #[test]
872    fn has_flag_long_only_absent() {
873        let tokens = toks(&["sort", "-r", "file.txt"]);
874        assert!(!has_flag(&tokens, None, Some("--compress-program")));
875    }
876
877    #[test]
878    fn strip_single_env_var() {
879        assert_eq!(
880            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
881            seg("bundle exec rspec")
882        );
883    }
884
885    #[test]
886    fn strip_multiple_env_vars() {
887        assert_eq!(
888            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
889            seg("bundle exec rspec")
890        );
891    }
892
893    #[test]
894    fn strip_no_env_var() {
895        assert_eq!(
896            seg("bundle exec rspec").strip_env_prefix(),
897            seg("bundle exec rspec")
898        );
899    }
900
901    #[test]
902    fn tokenize_simple() {
903        assert_eq!(
904            seg("grep foo file.txt").tokenize(),
905            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
906        );
907    }
908
909    #[test]
910    fn tokenize_quoted() {
911        assert_eq!(
912            seg("echo 'hello world'").tokenize(),
913            Some(vec![tok("echo"), tok("hello world")])
914        );
915    }
916
917    #[test]
918    fn strip_env_quoted_single() {
919        assert_eq!(
920            seg("FOO='bar baz' ls").strip_env_prefix(),
921            seg("ls")
922        );
923    }
924
925    #[test]
926    fn strip_env_quoted_double() {
927        assert_eq!(
928            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
929            seg("ls")
930        );
931    }
932
933    #[test]
934    fn strip_env_quoted_with_equals() {
935        assert_eq!(
936            seg("FOO='a=b' ls").strip_env_prefix(),
937            seg("ls")
938        );
939    }
940
941    #[test]
942    fn strip_env_quoted_multiple() {
943        assert_eq!(
944            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
945            seg("cmd")
946        );
947    }
948
949    #[test]
950    fn command_name_simple() {
951        assert_eq!(tok("ls").command_name(), "ls");
952    }
953
954    #[test]
955    fn command_name_with_path() {
956        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
957    }
958
959    #[test]
960    fn command_name_relative_path() {
961        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
962    }
963
964    #[test]
965    fn fd_redirect_detection() {
966        assert!(tok("2>&1").is_fd_redirect());
967        assert!(tok(">&2").is_fd_redirect());
968        assert!(tok("10>&1").is_fd_redirect());
969        assert!(tok("255>&2").is_fd_redirect());
970        assert!(tok("2>&-").is_fd_redirect());
971        assert!(tok("2>&10").is_fd_redirect());
972        assert!(!tok(">").is_fd_redirect());
973        assert!(!tok("/dev/null").is_fd_redirect());
974        assert!(!tok(">&").is_fd_redirect());
975        assert!(!tok("").is_fd_redirect());
976        assert!(!tok("42").is_fd_redirect());
977        assert!(!tok("123abc").is_fd_redirect());
978    }
979
980    #[test]
981    fn dev_null_redirect_single_token() {
982        assert!(tok(">/dev/null").is_dev_null_redirect());
983        assert!(tok(">>/dev/null").is_dev_null_redirect());
984        assert!(tok("2>/dev/null").is_dev_null_redirect());
985        assert!(tok("2>>/dev/null").is_dev_null_redirect());
986        assert!(tok("</dev/null").is_dev_null_redirect());
987        assert!(tok("10>/dev/null").is_dev_null_redirect());
988        assert!(tok("255>/dev/null").is_dev_null_redirect());
989        assert!(!tok(">/tmp/file").is_dev_null_redirect());
990        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
991        assert!(!tok("ls").is_dev_null_redirect());
992        assert!(!tok("").is_dev_null_redirect());
993        assert!(!tok("42").is_dev_null_redirect());
994        assert!(!tok("<</dev/null").is_dev_null_redirect());
995    }
996
997    #[test]
998    fn redirect_operator_detection() {
999        assert!(tok(">").is_redirect_operator());
1000        assert!(tok(">>").is_redirect_operator());
1001        assert!(tok("<").is_redirect_operator());
1002        assert!(tok("2>").is_redirect_operator());
1003        assert!(tok("2>>").is_redirect_operator());
1004        assert!(tok("10>").is_redirect_operator());
1005        assert!(tok("255>>").is_redirect_operator());
1006        assert!(!tok("ls").is_redirect_operator());
1007        assert!(!tok(">&1").is_redirect_operator());
1008        assert!(!tok("/dev/null").is_redirect_operator());
1009        assert!(!tok("").is_redirect_operator());
1010        assert!(!tok("42").is_redirect_operator());
1011        assert!(!tok("<<").is_redirect_operator());
1012    }
1013
1014    #[test]
1015    fn reverse_partial_eq() {
1016        let t = tok("hello");
1017        assert!("hello" == t);
1018        assert!("world" != t);
1019        let s: &str = "hello";
1020        assert!(s == t);
1021    }
1022
1023    #[test]
1024    fn token_deref() {
1025        let t = tok("--flag");
1026        assert!(t.starts_with("--"));
1027        assert!(t.contains("fl"));
1028        assert_eq!(t.len(), 6);
1029        assert!(!t.is_empty());
1030        assert_eq!(t.as_bytes()[0], b'-');
1031        assert!(t.eq_ignore_ascii_case("--FLAG"));
1032        assert_eq!(t.get(2..), Some("flag"));
1033    }
1034
1035    #[test]
1036    fn token_is_one_of() {
1037        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1038        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1039    }
1040
1041    #[test]
1042    fn token_split_value() {
1043        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1044        assert_eq!(tok("--flag").split_value("="), None);
1045    }
1046
1047    #[test]
1048    fn word_set_contains() {
1049        let set = WordSet::new(&["list", "show", "view"]);
1050        assert!(set.contains(&tok("list")));
1051        assert!(set.contains(&tok("view")));
1052        assert!(!set.contains(&tok("delete")));
1053        assert!(set.contains("list"));
1054        assert!(!set.contains("delete"));
1055    }
1056
1057    #[test]
1058    fn word_set_iter() {
1059        let set = WordSet::new(&["a", "b", "c"]);
1060        let items: Vec<&str> = set.iter().collect();
1061        assert_eq!(items, vec!["a", "b", "c"]);
1062    }
1063
1064    #[test]
1065    fn token_as_command_line() {
1066        let cl = tok("ls -la | grep foo").as_command_line();
1067        let segs = cl.segments();
1068        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1069    }
1070
1071    #[test]
1072    fn segment_from_tokens_replacing() {
1073        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1074        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1075        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1076    }
1077
1078    #[test]
1079    fn segment_strip_fd_redirects() {
1080        assert_eq!(
1081            seg("cargo test 2>&1").strip_fd_redirects(),
1082            seg("cargo test")
1083        );
1084        assert_eq!(
1085            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1086            seg("cmd")
1087        );
1088        assert_eq!(
1089            seg("ls -la").strip_fd_redirects(),
1090            seg("ls -la")
1091        );
1092    }
1093
1094    #[test]
1095    fn content_outside_double_quotes_strips_string() {
1096        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1097    }
1098
1099    #[test]
1100    fn content_outside_double_quotes_preserves_code() {
1101        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1102        assert_eq!(result, r#"{print  } END{print NR}"#);
1103    }
1104
1105    #[test]
1106    fn content_outside_double_quotes_escaped() {
1107        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1108        assert_eq!(result, "{print  }");
1109    }
1110
1111    #[test]
1112    fn content_outside_double_quotes_no_quotes() {
1113        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1114    }
1115
1116    #[test]
1117    fn content_outside_double_quotes_empty() {
1118        assert_eq!(tok("").content_outside_double_quotes(), "");
1119    }
1120
1121    #[test]
1122    fn extract_subs_none() {
1123        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1124        assert!(subs.is_empty());
1125        assert_eq!(cleaned, "echo hello");
1126    }
1127
1128    #[test]
1129    fn extract_subs_dollar_paren() {
1130        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1131        assert_eq!(subs, vec!["ls"]);
1132        assert_eq!(cleaned, "echo _");
1133    }
1134
1135    #[test]
1136    fn extract_subs_backtick() {
1137        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1138        assert_eq!(subs, vec!["pwd"]);
1139        assert_eq!(cleaned, "ls _");
1140    }
1141
1142    #[test]
1143    fn extract_subs_multiple() {
1144        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1145        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1146        assert_eq!(cleaned, "echo _ _");
1147    }
1148
1149    #[test]
1150    fn extract_subs_nested() {
1151        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1152        assert_eq!(subs, vec!["echo $(ls)"]);
1153        assert_eq!(cleaned, "echo _");
1154    }
1155
1156    #[test]
1157    fn extract_subs_quoted_skipped() {
1158        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1159        assert!(subs.is_empty());
1160        assert_eq!(cleaned, "echo '$(safe)' arg");
1161    }
1162
1163    #[test]
1164    fn extract_subs_unmatched_backtick() {
1165        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1166    }
1167
1168    #[test]
1169    fn extract_subs_unmatched_paren() {
1170        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1171    }
1172
1173    #[test]
1174    fn unsafe_redirects_to_file() {
1175        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1176    }
1177
1178    #[test]
1179    fn unsafe_redirects_dev_null_ok() {
1180        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1181    }
1182
1183    #[test]
1184    fn unsafe_redirects_fd_ok() {
1185        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1186    }
1187
1188    #[test]
1189    fn unsafe_redirects_no_backtick_check() {
1190        assert!(!seg("echo `ls`").has_unsafe_redirects());
1191    }
1192
1193    #[test]
1194    fn unsafe_redirects_no_dollar_paren_check() {
1195        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1196    }
1197
1198    #[test]
1199    fn unsafe_redirects_here_string_ok() {
1200        assert!(!seg("grep -c , <<< 'hello'").has_unsafe_redirects());
1201    }
1202
1203    #[test]
1204    fn unsafe_redirects_heredoc_still_blocked() {
1205        assert!(seg("cat << EOF").has_unsafe_redirects());
1206    }
1207}