Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub const fn flags(words: &'static [&'static str]) -> Self {
36        let mut i = 0;
37        while i < words.len() {
38            let b = words[i].as_bytes();
39            assert!(b.len() >= 2, "WordSet::flags: flag too short (need at least 2 chars)");
40            assert!(b[0] == b'-', "WordSet::flags: flag must start with '-'");
41            if b[1] == b'-' {
42                assert!(b.len() >= 3, "WordSet::flags: long flag needs at least 3 chars (e.g. --x)");
43            }
44            i += 1;
45        }
46        Self::new(words)
47    }
48
49    pub fn contains(&self, s: &str) -> bool {
50        self.0.binary_search(&s).is_ok()
51    }
52
53    pub fn contains_short(&self, b: u8) -> bool {
54        let target = [b'-', b];
55        std::str::from_utf8(&target).is_ok_and(|s| self.0.binary_search(&s).is_ok())
56    }
57
58    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
59        self.0.iter().copied()
60    }
61}
62
63const fn const_less(a: &[u8], b: &[u8]) -> bool {
64    let min = if a.len() < b.len() { a.len() } else { b.len() };
65    let mut i = 0;
66    while i < min {
67        if a[i] < b[i] {
68            return true;
69        }
70        if a[i] > b[i] {
71            return false;
72        }
73        i += 1;
74    }
75    a.len() < b.len()
76}
77
78
79impl CommandLine {
80    pub fn new(s: impl Into<String>) -> Self {
81        Self(s.into())
82    }
83
84    pub fn as_str(&self) -> &str {
85        &self.0
86    }
87
88    pub fn segments(&self) -> Vec<Segment> {
89        split_outside_quotes(&self.0)
90            .into_iter()
91            .map(Segment)
92            .collect()
93    }
94}
95
96impl Segment {
97    pub fn as_str(&self) -> &str {
98        &self.0
99    }
100
101    pub fn is_empty(&self) -> bool {
102        self.0.is_empty()
103    }
104
105    pub fn from_raw(s: String) -> Self {
106        Segment(s)
107    }
108
109    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
110        Segment(shell_words::join(words))
111    }
112
113    pub fn tokenize(&self) -> Option<Vec<Token>> {
114        shell_words::split(&self.0)
115            .ok()
116            .map(|v| v.into_iter().map(Token).collect())
117    }
118
119    pub fn has_unsafe_shell_syntax(&self) -> bool {
120        check_unsafe_shell_syntax(&self.0)
121    }
122
123    pub fn has_unsafe_redirects(&self) -> bool {
124        check_unsafe_redirects(&self.0)
125    }
126
127    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
128        extract_substitutions(&self.0)
129    }
130
131    pub fn strip_env_prefix(&self) -> Segment {
132        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
133    }
134
135    pub fn is_bare_assignment(&self) -> bool {
136        is_bare_assignment(&self.0)
137    }
138
139    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
140        let words: Vec<&str> = tokens
141            .iter()
142            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
143            .collect();
144        Self::from_words(&words)
145    }
146
147    pub fn strip_fd_redirects(&self) -> Segment {
148        match self.tokenize() {
149            Some(tokens) => {
150                let filtered: Vec<_> = tokens
151                    .into_iter()
152                    .filter(|t| !t.is_fd_redirect())
153                    .collect();
154                Token::join(&filtered)
155            }
156            None => Segment(self.0.clone()),
157        }
158    }
159}
160
161impl Token {
162    #[cfg(test)]
163    pub(crate) fn from_test(s: &str) -> Self {
164        Self(s.to_string())
165    }
166
167    pub fn as_str(&self) -> &str {
168        &self.0
169    }
170
171    pub fn join(tokens: &[Token]) -> Segment {
172        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
173    }
174
175    pub fn as_command_line(&self) -> CommandLine {
176        CommandLine(self.0.clone())
177    }
178
179    pub fn command_name(&self) -> &str {
180        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
181    }
182
183    pub fn is_one_of(&self, options: &[&str]) -> bool {
184        options.contains(&self.as_str())
185    }
186
187    pub fn split_value(&self, sep: &str) -> Option<&str> {
188        self.as_str().split_once(sep).map(|(_, v)| v)
189    }
190
191    pub fn content_outside_double_quotes(&self) -> String {
192        let bytes = self.as_str().as_bytes();
193        let mut result = Vec::with_capacity(bytes.len());
194        let mut i = 0;
195        while i < bytes.len() {
196            if bytes[i] == b'"' {
197                result.push(b' ');
198                i += 1;
199                while i < bytes.len() {
200                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
201                        i += 2;
202                        continue;
203                    }
204                    if bytes[i] == b'"' {
205                        i += 1;
206                        break;
207                    }
208                    i += 1;
209                }
210            } else {
211                result.push(bytes[i]);
212                i += 1;
213            }
214        }
215        String::from_utf8(result).unwrap_or_default()
216    }
217
218    pub fn is_fd_redirect(&self) -> bool {
219        let s = self.as_str();
220        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
221        if rest.len() < 2 || !rest.starts_with(">&") {
222            return false;
223        }
224        let after = &rest[2..];
225        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
226    }
227
228    pub fn is_dev_null_redirect(&self) -> bool {
229        let s = self.as_str();
230        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
231        rest.strip_prefix(">>")
232            .or_else(|| rest.strip_prefix('>'))
233            .or_else(|| rest.strip_prefix('<'))
234            .is_some_and(|after| after == "/dev/null")
235    }
236
237    pub fn is_redirect_operator(&self) -> bool {
238        let s = self.as_str();
239        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
240        matches!(rest, ">" | ">>" | "<")
241    }
242}
243
244impl PartialEq<str> for Token {
245    fn eq(&self, other: &str) -> bool {
246        self.0 == other
247    }
248}
249
250impl PartialEq<&str> for Token {
251    fn eq(&self, other: &&str) -> bool {
252        self.0 == *other
253    }
254}
255
256impl PartialEq<Token> for str {
257    fn eq(&self, other: &Token) -> bool {
258        self == other.as_str()
259    }
260}
261
262impl PartialEq<Token> for &str {
263    fn eq(&self, other: &Token) -> bool {
264        *self == other.as_str()
265    }
266}
267
268impl std::fmt::Display for Token {
269    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270        f.write_str(&self.0)
271    }
272}
273
274pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
275    for token in &tokens[1..] {
276        if token == "--" {
277            return false;
278        }
279        if let Some(long_flag) = long
280            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
281        {
282            return true;
283        }
284        if let Some(short_flag) = short {
285            let short_char = short_flag.trim_start_matches('-');
286            if token.starts_with('-')
287                && !token.starts_with("--")
288                && token[1..].contains(short_char)
289            {
290                return true;
291            }
292        }
293    }
294    false
295}
296
297fn split_outside_quotes(cmd: &str) -> Vec<String> {
298    let mut segments = Vec::new();
299    let mut current = String::new();
300    let mut in_single = false;
301    let mut in_double = false;
302    let mut escaped = false;
303    let mut paren_depth: u32 = 0;
304    let mut in_backtick = false;
305    let mut chars = cmd.chars().peekable();
306
307    while let Some(c) = chars.next() {
308        if escaped {
309            current.push(c);
310            escaped = false;
311            continue;
312        }
313        if c == '\\' && !in_single {
314            escaped = true;
315            current.push(c);
316            continue;
317        }
318        if c == '\'' && !in_double && paren_depth == 0 && !in_backtick {
319            in_single = !in_single;
320            current.push(c);
321            continue;
322        }
323        if c == '"' && !in_single && paren_depth == 0 && !in_backtick {
324            in_double = !in_double;
325            current.push(c);
326            continue;
327        }
328        if !in_single && !in_double {
329            if c == '`' {
330                in_backtick = !in_backtick;
331                current.push(c);
332                continue;
333            }
334            if c == '$' && chars.peek() == Some(&'(') {
335                paren_depth += 1;
336                current.push(c);
337                if let Some(open) = chars.next() {
338                    current.push(open);
339                }
340                continue;
341            }
342            if c == '(' && paren_depth > 0 {
343                paren_depth += 1;
344                current.push(c);
345                continue;
346            }
347            if c == ')' && paren_depth > 0 {
348                paren_depth -= 1;
349                current.push(c);
350                continue;
351            }
352            if paren_depth == 0 && !in_backtick {
353                if c == '|' {
354                    segments.push(std::mem::take(&mut current));
355                    continue;
356                }
357                if c == '&' && !current.ends_with('>') {
358                    segments.push(std::mem::take(&mut current));
359                    if chars.peek() == Some(&'&') {
360                        chars.next();
361                    }
362                    continue;
363                }
364                if c == ';' || c == '\n' {
365                    segments.push(std::mem::take(&mut current));
366                    continue;
367                }
368            }
369        }
370        current.push(c);
371    }
372    segments.push(current);
373    segments
374        .into_iter()
375        .map(|s| s.trim().to_string())
376        .filter(|s| !s.is_empty())
377        .collect()
378}
379
380fn check_unsafe_shell_syntax(segment: &str) -> bool {
381    let mut in_single = false;
382    let mut in_double = false;
383    let mut escaped = false;
384    let chars: Vec<char> = segment.chars().collect();
385    let mut skip_until = 0;
386
387    for (i, &c) in chars.iter().enumerate() {
388        if i < skip_until {
389            continue;
390        }
391        if escaped {
392            escaped = false;
393            continue;
394        }
395        if c == '\\' && !in_single {
396            escaped = true;
397            continue;
398        }
399        if c == '\'' && !in_double {
400            in_single = !in_single;
401            continue;
402        }
403        if c == '"' && !in_single {
404            in_double = !in_double;
405            continue;
406        }
407        if !in_single && !in_double {
408            if c == '>' || c == '<' {
409                if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
410                    skip_until = i + 3;
411                    continue;
412                }
413                let next = chars.get(i + 1);
414                if next == Some(&'&')
415                    && chars
416                        .get(i + 2)
417                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
418                {
419                    continue;
420                }
421                if is_dev_null_target(&chars, i + 1, c) {
422                    continue;
423                }
424                return true;
425            }
426            if c == '`' {
427                return true;
428            }
429            if c == '$' && chars.get(i + 1) == Some(&'(') {
430                return true;
431            }
432        }
433    }
434    false
435}
436
437fn check_unsafe_redirects(segment: &str) -> bool {
438    let mut in_single = false;
439    let mut in_double = false;
440    let mut escaped = false;
441    let chars: Vec<char> = segment.chars().collect();
442    let mut skip_until = 0;
443
444    for (i, &c) in chars.iter().enumerate() {
445        if i < skip_until {
446            continue;
447        }
448        if escaped {
449            escaped = false;
450            continue;
451        }
452        if c == '\\' && !in_single {
453            escaped = true;
454            continue;
455        }
456        if c == '\'' && !in_double {
457            in_single = !in_single;
458            continue;
459        }
460        if c == '"' && !in_single {
461            in_double = !in_double;
462            continue;
463        }
464        if !in_single && !in_double && (c == '>' || c == '<') {
465            if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
466                skip_until = i + 3;
467                continue;
468            }
469            let next = chars.get(i + 1);
470            if next == Some(&'&')
471                && chars
472                    .get(i + 2)
473                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
474            {
475                continue;
476            }
477            if is_dev_null_target(&chars, i + 1, c) {
478                continue;
479            }
480            return true;
481        }
482    }
483    false
484}
485
486fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
487    let mut subs = Vec::new();
488    let mut cleaned = String::with_capacity(segment.len());
489    let mut in_single = false;
490    let mut in_double = false;
491    let mut escaped = false;
492    let chars: Vec<char> = segment.chars().collect();
493    let mut i = 0;
494
495    while i < chars.len() {
496        if escaped {
497            escaped = false;
498            cleaned.push(chars[i]);
499            i += 1;
500            continue;
501        }
502        if chars[i] == '\\' && !in_single {
503            escaped = true;
504            cleaned.push(chars[i]);
505            i += 1;
506            continue;
507        }
508        if chars[i] == '\'' && !in_double {
509            in_single = !in_single;
510            cleaned.push(chars[i]);
511            i += 1;
512            continue;
513        }
514        if chars[i] == '"' && !in_single {
515            in_double = !in_double;
516            cleaned.push(chars[i]);
517            i += 1;
518            continue;
519        }
520        if !in_single {
521            if chars[i] == '`' {
522                let start = i + 1;
523                let end = find_matching_backtick(&chars, start).ok_or(())?;
524                let inner: String = chars[start..end].iter().collect();
525                subs.push(inner);
526                cleaned.push('_');
527                i = end + 1;
528                continue;
529            }
530            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
531                let start = i + 2;
532                let end = find_matching_paren(&chars, start).ok_or(())?;
533                let inner: String = chars[start..end].iter().collect();
534                subs.push(inner);
535                cleaned.push('_');
536                i = end + 1;
537                continue;
538            }
539        }
540        cleaned.push(chars[i]);
541        i += 1;
542    }
543    Ok((subs, cleaned))
544}
545
546fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
547    let mut in_single = false;
548    let mut in_double = false;
549    let mut escaped = false;
550    let mut i = start;
551    while i < chars.len() {
552        if escaped {
553            escaped = false;
554            i += 1;
555            continue;
556        }
557        if chars[i] == '\\' && !in_single {
558            escaped = true;
559            i += 1;
560            continue;
561        }
562        if chars[i] == '\'' && !in_double {
563            in_single = !in_single;
564            i += 1;
565            continue;
566        }
567        if chars[i] == '"' && !in_single {
568            in_double = !in_double;
569            i += 1;
570            continue;
571        }
572        if !in_single && !in_double && chars[i] == '`' {
573            return Some(i);
574        }
575        i += 1;
576    }
577    None
578}
579
580fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
581    let mut depth = 1u32;
582    let mut in_single = false;
583    let mut in_double = false;
584    let mut escaped = false;
585    let mut i = start;
586    while i < chars.len() {
587        if escaped {
588            escaped = false;
589            i += 1;
590            continue;
591        }
592        if chars[i] == '\\' && !in_single {
593            escaped = true;
594            i += 1;
595            continue;
596        }
597        if chars[i] == '\'' && !in_double {
598            in_single = !in_single;
599            i += 1;
600            continue;
601        }
602        if chars[i] == '"' && !in_single {
603            in_double = !in_double;
604            i += 1;
605            continue;
606        }
607        if !in_single && !in_double {
608            if chars[i] == '(' {
609                depth += 1;
610            } else if chars[i] == ')' {
611                depth -= 1;
612                if depth == 0 {
613                    return Some(i);
614                }
615            }
616        }
617        i += 1;
618    }
619    None
620}
621
622const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
623
624fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
625    let mut j = start;
626    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
627        j += 1;
628    }
629    while j < chars.len() && chars[j] == ' ' {
630        j += 1;
631    }
632    if j + DEV_NULL.len() > chars.len() {
633        return false;
634    }
635    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
636        return false;
637    }
638    let end = j + DEV_NULL.len();
639    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
640}
641
642fn find_unquoted_space(s: &str) -> Option<usize> {
643    let mut in_single = false;
644    let mut in_double = false;
645    let mut escaped = false;
646    for (i, b) in s.bytes().enumerate() {
647        if escaped {
648            escaped = false;
649            continue;
650        }
651        if b == b'\\' && !in_single {
652            escaped = true;
653            continue;
654        }
655        if b == b'\'' && !in_double {
656            in_single = !in_single;
657            continue;
658        }
659        if b == b'"' && !in_single {
660            in_double = !in_double;
661            continue;
662        }
663        if b == b' ' && !in_single && !in_double {
664            return Some(i);
665        }
666    }
667    None
668}
669
670fn is_shell_var_name(s: &str) -> bool {
671    let bytes = s.as_bytes();
672    !bytes.is_empty()
673        && (bytes[0].is_ascii_alphabetic() || bytes[0] == b'_')
674        && bytes[1..]
675            .iter()
676            .all(|b| b.is_ascii_alphanumeric() || *b == b'_')
677}
678
679fn is_bare_assignment(segment: &str) -> bool {
680    let trimmed = segment.trim();
681    if trimmed.is_empty() {
682        return false;
683    }
684    let mut rest = trimmed;
685    let mut found = false;
686    loop {
687        let trimmed = rest.trim_start();
688        if trimmed.is_empty() {
689            return found;
690        }
691        let Some(eq_pos) = trimmed.find('=') else {
692            return false;
693        };
694        let key = &trimmed[..eq_pos];
695        if !is_shell_var_name(key) {
696            return false;
697        }
698        let after_eq = &trimmed[eq_pos..];
699        match find_unquoted_space(after_eq) {
700            Some(space_pos) => {
701                rest = &after_eq[space_pos..];
702                found = true;
703            }
704            None => return true,
705        }
706    }
707}
708
709fn strip_env_prefix_str(segment: &str) -> &str {
710    let mut rest = segment;
711    loop {
712        let trimmed = rest.trim_start();
713        if trimmed.is_empty() {
714            return trimmed;
715        }
716        let bytes = trimmed.as_bytes();
717        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
718            return trimmed;
719        }
720        if let Some(eq_pos) = trimmed.find('=') {
721            let key = &trimmed[..eq_pos];
722            let valid_key = key
723                .bytes()
724                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
725            if !valid_key {
726                return trimmed;
727            }
728            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
729                rest = &trimmed[eq_pos + space_pos..];
730                continue;
731            }
732            return trimmed;
733        }
734        return trimmed;
735    }
736}
737
738#[cfg(test)]
739mod tests {
740    use super::*;
741
742    fn seg(s: &str) -> Segment {
743        Segment(s.to_string())
744    }
745
746    fn tok(s: &str) -> Token {
747        Token(s.to_string())
748    }
749
750    fn toks(words: &[&str]) -> Vec<Token> {
751        words.iter().map(|s| tok(s)).collect()
752    }
753
754    #[test]
755    fn split_pipe() {
756        let segs = CommandLine::new("grep foo | head -5").segments();
757        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
758    }
759
760    #[test]
761    fn split_and() {
762        let segs = CommandLine::new("ls && echo done").segments();
763        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
764    }
765
766    #[test]
767    fn split_semicolon() {
768        let segs = CommandLine::new("ls; echo done").segments();
769        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
770    }
771
772    #[test]
773    fn split_preserves_quoted_pipes() {
774        let segs = CommandLine::new("echo 'a | b' foo").segments();
775        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
776    }
777
778    #[test]
779    fn split_background_operator() {
780        let segs = CommandLine::new("cat file & rm -rf /").segments();
781        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
782    }
783
784    #[test]
785    fn split_newline() {
786        let segs = CommandLine::new("echo foo\necho bar").segments();
787        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
788    }
789
790    #[test]
791    fn unsafe_redirect() {
792        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
793    }
794
795    #[test]
796    fn safe_fd_redirect_stderr_to_stdout() {
797        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
798    }
799
800    #[test]
801    fn safe_fd_redirect_close() {
802        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
803    }
804
805    #[test]
806    fn unsafe_redirect_ampersand_no_digit() {
807        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
808    }
809
810    #[test]
811    fn unsafe_backtick() {
812        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
813    }
814
815    #[test]
816    fn unsafe_command_substitution() {
817        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
818    }
819
820    #[test]
821    fn safe_quoted_dollar_paren() {
822        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
823    }
824
825    #[test]
826    fn safe_quoted_redirect() {
827        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
828    }
829
830    #[test]
831    fn safe_no_special_chars() {
832        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
833    }
834
835    #[test]
836    fn safe_redirect_to_dev_null() {
837        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
838    }
839
840    #[test]
841    fn safe_redirect_stderr_to_dev_null() {
842        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
843    }
844
845    #[test]
846    fn safe_redirect_append_to_dev_null() {
847        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
848    }
849
850    #[test]
851    fn safe_redirect_space_dev_null() {
852        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
853    }
854
855    #[test]
856    fn safe_redirect_input_dev_null() {
857        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
858    }
859
860    #[test]
861    fn safe_redirect_both_dev_null() {
862        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
863    }
864
865    #[test]
866    fn unsafe_redirect_dev_null_prefix() {
867        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
868    }
869
870    #[test]
871    fn unsafe_redirect_dev_null_path_traversal() {
872        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
873    }
874
875    #[test]
876    fn unsafe_redirect_dev_null_subpath() {
877        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
878    }
879
880    #[test]
881    fn unsafe_redirect_to_file() {
882        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
883    }
884
885    #[test]
886    fn safe_here_string() {
887        assert!(!seg("grep -c , <<< 'hello world'").has_unsafe_shell_syntax());
888    }
889
890    #[test]
891    fn safe_here_string_double_quoted() {
892        assert!(!seg("cat <<< \"some text\"").has_unsafe_shell_syntax());
893    }
894
895    #[test]
896    fn unsafe_heredoc_still_blocked() {
897        assert!(seg("cat << EOF").has_unsafe_shell_syntax());
898    }
899
900    #[test]
901    fn unsafe_input_redirect_still_blocked() {
902        assert!(seg("cmd < file.txt").has_unsafe_shell_syntax());
903    }
904
905    #[test]
906    fn has_flag_short() {
907        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
908        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
909    }
910
911    #[test]
912    fn has_flag_long_with_eq() {
913        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
914        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
915    }
916
917    #[test]
918    fn has_flag_combined_short() {
919        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
920        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
921    }
922
923    #[test]
924    fn has_flag_stops_at_double_dash() {
925        let tokens = toks(&["cmd", "--", "-i"]);
926        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
927    }
928
929    #[test]
930    fn has_flag_long_only() {
931        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
932        assert!(has_flag(&tokens, None, Some("--compress-program")));
933    }
934
935    #[test]
936    fn has_flag_long_only_eq() {
937        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
938        assert!(has_flag(&tokens, None, Some("--compress-program")));
939    }
940
941    #[test]
942    fn has_flag_long_only_absent() {
943        let tokens = toks(&["sort", "-r", "file.txt"]);
944        assert!(!has_flag(&tokens, None, Some("--compress-program")));
945    }
946
947    #[test]
948    fn strip_single_env_var() {
949        assert_eq!(
950            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
951            seg("bundle exec rspec")
952        );
953    }
954
955    #[test]
956    fn strip_multiple_env_vars() {
957        assert_eq!(
958            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
959            seg("bundle exec rspec")
960        );
961    }
962
963    #[test]
964    fn strip_no_env_var() {
965        assert_eq!(
966            seg("bundle exec rspec").strip_env_prefix(),
967            seg("bundle exec rspec")
968        );
969    }
970
971    #[test]
972    fn tokenize_simple() {
973        assert_eq!(
974            seg("grep foo file.txt").tokenize(),
975            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
976        );
977    }
978
979    #[test]
980    fn tokenize_quoted() {
981        assert_eq!(
982            seg("echo 'hello world'").tokenize(),
983            Some(vec![tok("echo"), tok("hello world")])
984        );
985    }
986
987    #[test]
988    fn strip_env_quoted_single() {
989        assert_eq!(
990            seg("FOO='bar baz' ls").strip_env_prefix(),
991            seg("ls")
992        );
993    }
994
995    #[test]
996    fn strip_env_quoted_double() {
997        assert_eq!(
998            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
999            seg("ls")
1000        );
1001    }
1002
1003    #[test]
1004    fn strip_env_quoted_with_equals() {
1005        assert_eq!(
1006            seg("FOO='a=b' ls").strip_env_prefix(),
1007            seg("ls")
1008        );
1009    }
1010
1011    #[test]
1012    fn strip_env_quoted_multiple() {
1013        assert_eq!(
1014            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
1015            seg("cmd")
1016        );
1017    }
1018
1019    #[test]
1020    fn bare_assignment_single() {
1021        assert!(seg("out=_").is_bare_assignment());
1022    }
1023
1024    #[test]
1025    fn bare_assignment_multiple() {
1026        assert!(seg("a=_ b=_").is_bare_assignment());
1027    }
1028
1029    #[test]
1030    fn bare_assignment_lowercase() {
1031        assert!(seg("result=hello").is_bare_assignment());
1032    }
1033
1034    #[test]
1035    fn bare_assignment_uppercase() {
1036        assert!(seg("FOO=bar").is_bare_assignment());
1037    }
1038
1039    #[test]
1040    fn bare_assignment_underscore_prefix() {
1041        assert!(seg("_foo=bar").is_bare_assignment());
1042    }
1043
1044    #[test]
1045    fn bare_assignment_quoted_value() {
1046        assert!(seg("out='hello world'").is_bare_assignment());
1047    }
1048
1049    #[test]
1050    fn not_bare_assignment_with_command() {
1051        assert!(!seg("FOO=bar ls").is_bare_assignment());
1052    }
1053
1054    #[test]
1055    fn not_bare_assignment_no_equals() {
1056        assert!(!seg("foobar").is_bare_assignment());
1057    }
1058
1059    #[test]
1060    fn not_bare_assignment_empty() {
1061        assert!(!seg("").is_bare_assignment());
1062    }
1063
1064    #[test]
1065    fn not_bare_assignment_starts_with_digit() {
1066        assert!(!seg("1foo=bar").is_bare_assignment());
1067    }
1068
1069    #[test]
1070    fn command_name_simple() {
1071        assert_eq!(tok("ls").command_name(), "ls");
1072    }
1073
1074    #[test]
1075    fn command_name_with_path() {
1076        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
1077    }
1078
1079    #[test]
1080    fn command_name_relative_path() {
1081        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
1082    }
1083
1084    #[test]
1085    fn fd_redirect_detection() {
1086        assert!(tok("2>&1").is_fd_redirect());
1087        assert!(tok(">&2").is_fd_redirect());
1088        assert!(tok("10>&1").is_fd_redirect());
1089        assert!(tok("255>&2").is_fd_redirect());
1090        assert!(tok("2>&-").is_fd_redirect());
1091        assert!(tok("2>&10").is_fd_redirect());
1092        assert!(!tok(">").is_fd_redirect());
1093        assert!(!tok("/dev/null").is_fd_redirect());
1094        assert!(!tok(">&").is_fd_redirect());
1095        assert!(!tok("").is_fd_redirect());
1096        assert!(!tok("42").is_fd_redirect());
1097        assert!(!tok("123abc").is_fd_redirect());
1098    }
1099
1100    #[test]
1101    fn dev_null_redirect_single_token() {
1102        assert!(tok(">/dev/null").is_dev_null_redirect());
1103        assert!(tok(">>/dev/null").is_dev_null_redirect());
1104        assert!(tok("2>/dev/null").is_dev_null_redirect());
1105        assert!(tok("2>>/dev/null").is_dev_null_redirect());
1106        assert!(tok("</dev/null").is_dev_null_redirect());
1107        assert!(tok("10>/dev/null").is_dev_null_redirect());
1108        assert!(tok("255>/dev/null").is_dev_null_redirect());
1109        assert!(!tok(">/tmp/file").is_dev_null_redirect());
1110        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
1111        assert!(!tok("ls").is_dev_null_redirect());
1112        assert!(!tok("").is_dev_null_redirect());
1113        assert!(!tok("42").is_dev_null_redirect());
1114        assert!(!tok("<</dev/null").is_dev_null_redirect());
1115    }
1116
1117    #[test]
1118    fn redirect_operator_detection() {
1119        assert!(tok(">").is_redirect_operator());
1120        assert!(tok(">>").is_redirect_operator());
1121        assert!(tok("<").is_redirect_operator());
1122        assert!(tok("2>").is_redirect_operator());
1123        assert!(tok("2>>").is_redirect_operator());
1124        assert!(tok("10>").is_redirect_operator());
1125        assert!(tok("255>>").is_redirect_operator());
1126        assert!(!tok("ls").is_redirect_operator());
1127        assert!(!tok(">&1").is_redirect_operator());
1128        assert!(!tok("/dev/null").is_redirect_operator());
1129        assert!(!tok("").is_redirect_operator());
1130        assert!(!tok("42").is_redirect_operator());
1131        assert!(!tok("<<").is_redirect_operator());
1132    }
1133
1134    #[test]
1135    fn reverse_partial_eq() {
1136        let t = tok("hello");
1137        assert!("hello" == t);
1138        assert!("world" != t);
1139        let s: &str = "hello";
1140        assert!(s == t);
1141    }
1142
1143    #[test]
1144    fn token_deref() {
1145        let t = tok("--flag");
1146        assert!(t.starts_with("--"));
1147        assert!(t.contains("fl"));
1148        assert_eq!(t.len(), 6);
1149        assert!(!t.is_empty());
1150        assert_eq!(t.as_bytes()[0], b'-');
1151        assert!(t.eq_ignore_ascii_case("--FLAG"));
1152        assert_eq!(t.get(2..), Some("flag"));
1153    }
1154
1155    #[test]
1156    fn token_is_one_of() {
1157        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1158        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1159    }
1160
1161    #[test]
1162    fn token_split_value() {
1163        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1164        assert_eq!(tok("--flag").split_value("="), None);
1165    }
1166
1167    #[test]
1168    fn word_set_contains() {
1169        let set = WordSet::new(&["list", "show", "view"]);
1170        assert!(set.contains(&tok("list")));
1171        assert!(set.contains(&tok("view")));
1172        assert!(!set.contains(&tok("delete")));
1173        assert!(set.contains("list"));
1174        assert!(!set.contains("delete"));
1175    }
1176
1177    #[test]
1178    fn word_set_iter() {
1179        let set = WordSet::new(&["a", "b", "c"]);
1180        let items: Vec<&str> = set.iter().collect();
1181        assert_eq!(items, vec!["a", "b", "c"]);
1182    }
1183
1184    #[test]
1185    fn token_as_command_line() {
1186        let cl = tok("ls -la | grep foo").as_command_line();
1187        let segs = cl.segments();
1188        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1189    }
1190
1191    #[test]
1192    fn segment_from_tokens_replacing() {
1193        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1194        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1195        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1196    }
1197
1198    #[test]
1199    fn segment_strip_fd_redirects() {
1200        assert_eq!(
1201            seg("cargo test 2>&1").strip_fd_redirects(),
1202            seg("cargo test")
1203        );
1204        assert_eq!(
1205            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1206            seg("cmd")
1207        );
1208        assert_eq!(
1209            seg("ls -la").strip_fd_redirects(),
1210            seg("ls -la")
1211        );
1212    }
1213
1214    #[test]
1215    fn content_outside_double_quotes_strips_string() {
1216        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1217    }
1218
1219    #[test]
1220    fn content_outside_double_quotes_preserves_code() {
1221        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1222        assert_eq!(result, r#"{print  } END{print NR}"#);
1223    }
1224
1225    #[test]
1226    fn content_outside_double_quotes_escaped() {
1227        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1228        assert_eq!(result, "{print  }");
1229    }
1230
1231    #[test]
1232    fn content_outside_double_quotes_no_quotes() {
1233        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1234    }
1235
1236    #[test]
1237    fn content_outside_double_quotes_empty() {
1238        assert_eq!(tok("").content_outside_double_quotes(), "");
1239    }
1240
1241    #[test]
1242    fn extract_subs_none() {
1243        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1244        assert!(subs.is_empty());
1245        assert_eq!(cleaned, "echo hello");
1246    }
1247
1248    #[test]
1249    fn extract_subs_dollar_paren() {
1250        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1251        assert_eq!(subs, vec!["ls"]);
1252        assert_eq!(cleaned, "echo _");
1253    }
1254
1255    #[test]
1256    fn extract_subs_backtick() {
1257        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1258        assert_eq!(subs, vec!["pwd"]);
1259        assert_eq!(cleaned, "ls _");
1260    }
1261
1262    #[test]
1263    fn extract_subs_multiple() {
1264        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1265        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1266        assert_eq!(cleaned, "echo _ _");
1267    }
1268
1269    #[test]
1270    fn extract_subs_nested() {
1271        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1272        assert_eq!(subs, vec!["echo $(ls)"]);
1273        assert_eq!(cleaned, "echo _");
1274    }
1275
1276    #[test]
1277    fn extract_subs_quoted_skipped() {
1278        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1279        assert!(subs.is_empty());
1280        assert_eq!(cleaned, "echo '$(safe)' arg");
1281    }
1282
1283    #[test]
1284    fn extract_subs_unmatched_backtick() {
1285        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1286    }
1287
1288    #[test]
1289    fn extract_subs_unmatched_paren() {
1290        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1291    }
1292
1293    #[test]
1294    fn unsafe_redirects_to_file() {
1295        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1296    }
1297
1298    #[test]
1299    fn unsafe_redirects_dev_null_ok() {
1300        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1301    }
1302
1303    #[test]
1304    fn unsafe_redirects_fd_ok() {
1305        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1306    }
1307
1308    #[test]
1309    fn unsafe_redirects_no_backtick_check() {
1310        assert!(!seg("echo `ls`").has_unsafe_redirects());
1311    }
1312
1313    #[test]
1314    fn unsafe_redirects_no_dollar_paren_check() {
1315        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1316    }
1317
1318    #[test]
1319    fn unsafe_redirects_here_string_ok() {
1320        assert!(!seg("grep -c , <<< 'hello'").has_unsafe_redirects());
1321    }
1322
1323    #[test]
1324    fn unsafe_redirects_heredoc_still_blocked() {
1325        assert!(seg("cat << EOF").has_unsafe_redirects());
1326    }
1327}