Skip to main content

safe_chains/
parse.rs

1use std::ops::Deref;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct CommandLine(String);
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct Segment(String);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Token(String);
11
12impl Deref for Token {
13    type Target = str;
14    fn deref(&self) -> &str {
15        &self.0
16    }
17}
18
19#[derive(Copy, Clone)]
20pub struct WordSet(&'static [&'static str]);
21
22impl WordSet {
23    pub const fn new(words: &'static [&'static str]) -> Self {
24        let mut i = 1;
25        while i < words.len() {
26            assert!(
27                const_less(words[i - 1].as_bytes(), words[i].as_bytes()),
28                "WordSet: entries must be sorted, no duplicates"
29            );
30            i += 1;
31        }
32        Self(words)
33    }
34
35    pub const fn flags(words: &'static [&'static str]) -> Self {
36        let mut i = 0;
37        while i < words.len() {
38            let b = words[i].as_bytes();
39            assert!(b.len() >= 2, "WordSet::flags: flag too short (need at least 2 chars)");
40            assert!(b[0] == b'-', "WordSet::flags: flag must start with '-'");
41            if b[1] == b'-' {
42                assert!(b.len() >= 3, "WordSet::flags: long flag needs at least 3 chars (e.g. --x)");
43            }
44            i += 1;
45        }
46        Self::new(words)
47    }
48
49    pub fn contains(&self, s: &str) -> bool {
50        self.0.binary_search(&s).is_ok()
51    }
52
53    pub fn contains_short(&self, b: u8) -> bool {
54        let target = [b'-', b];
55        std::str::from_utf8(&target).is_ok_and(|s| self.0.binary_search(&s).is_ok())
56    }
57
58    pub fn iter(&self) -> impl Iterator<Item = &'static str> + '_ {
59        self.0.iter().copied()
60    }
61}
62
63const fn const_less(a: &[u8], b: &[u8]) -> bool {
64    let min = if a.len() < b.len() { a.len() } else { b.len() };
65    let mut i = 0;
66    while i < min {
67        if a[i] < b[i] {
68            return true;
69        }
70        if a[i] > b[i] {
71            return false;
72        }
73        i += 1;
74    }
75    a.len() < b.len()
76}
77
78
79impl CommandLine {
80    pub fn new(s: impl Into<String>) -> Self {
81        Self(s.into())
82    }
83
84    pub fn as_str(&self) -> &str {
85        &self.0
86    }
87
88    pub fn segments(&self) -> Vec<Segment> {
89        split_outside_quotes(&self.0)
90            .into_iter()
91            .map(Segment)
92            .collect()
93    }
94}
95
96impl Segment {
97    pub fn as_str(&self) -> &str {
98        &self.0
99    }
100
101    pub fn is_empty(&self) -> bool {
102        self.0.is_empty()
103    }
104
105    pub fn from_raw(s: String) -> Self {
106        Segment(s)
107    }
108
109    pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
110        Segment(shell_words::join(words))
111    }
112
113    pub fn tokenize(&self) -> Option<Vec<Token>> {
114        shell_words::split(&self.0)
115            .ok()
116            .map(|v| v.into_iter().map(Token).collect())
117    }
118
119    pub fn has_unsafe_shell_syntax(&self) -> bool {
120        check_unsafe_shell_syntax(&self.0)
121    }
122
123    pub fn has_unsafe_redirects(&self) -> bool {
124        check_unsafe_redirects(&self.0)
125    }
126
127    pub(crate) fn extract_substitutions(&self) -> Result<(Vec<String>, String), ()> {
128        extract_substitutions(&self.0)
129    }
130
131    pub fn strip_env_prefix(&self) -> Segment {
132        Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
133    }
134
135    pub fn is_bare_assignment(&self) -> bool {
136        is_bare_assignment(&self.0)
137    }
138
139    pub fn unwrap_subshell(&self) -> Option<&str> {
140        unwrap_subshell(&self.0)
141    }
142
143    pub fn from_tokens_replacing(tokens: &[Token], find: &str, replace: &str) -> Self {
144        let words: Vec<&str> = tokens
145            .iter()
146            .map(|t| if t.as_str() == find { replace } else { t.as_str() })
147            .collect();
148        Self::from_words(&words)
149    }
150
151    pub fn strip_fd_redirects(&self) -> Segment {
152        match self.tokenize() {
153            Some(tokens) => {
154                let filtered: Vec<_> = tokens
155                    .into_iter()
156                    .filter(|t| !t.is_fd_redirect())
157                    .collect();
158                Token::join(&filtered)
159            }
160            None => Segment(self.0.clone()),
161        }
162    }
163}
164
165impl Token {
166    #[cfg(test)]
167    pub(crate) fn from_test(s: &str) -> Self {
168        Self(s.to_string())
169    }
170
171    pub fn as_str(&self) -> &str {
172        &self.0
173    }
174
175    pub fn join(tokens: &[Token]) -> Segment {
176        Segment(shell_words::join(tokens.iter().map(|t| t.as_str())))
177    }
178
179    pub fn as_command_line(&self) -> CommandLine {
180        CommandLine(self.0.clone())
181    }
182
183    pub fn command_name(&self) -> &str {
184        self.as_str().rsplit('/').next().unwrap_or(self.as_str())
185    }
186
187    pub fn is_one_of(&self, options: &[&str]) -> bool {
188        options.contains(&self.as_str())
189    }
190
191    pub fn split_value(&self, sep: &str) -> Option<&str> {
192        self.as_str().split_once(sep).map(|(_, v)| v)
193    }
194
195    pub fn content_outside_double_quotes(&self) -> String {
196        let bytes = self.as_str().as_bytes();
197        let mut result = Vec::with_capacity(bytes.len());
198        let mut i = 0;
199        while i < bytes.len() {
200            if bytes[i] == b'"' {
201                result.push(b' ');
202                i += 1;
203                while i < bytes.len() {
204                    if bytes[i] == b'\\' && i + 1 < bytes.len() {
205                        i += 2;
206                        continue;
207                    }
208                    if bytes[i] == b'"' {
209                        i += 1;
210                        break;
211                    }
212                    i += 1;
213                }
214            } else {
215                result.push(bytes[i]);
216                i += 1;
217            }
218        }
219        String::from_utf8(result).unwrap_or_default()
220    }
221
222    pub fn is_fd_redirect(&self) -> bool {
223        let s = self.as_str();
224        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
225        if rest.len() < 2 || !rest.starts_with(">&") {
226            return false;
227        }
228        let after = &rest[2..];
229        !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
230    }
231
232    pub fn is_dev_null_redirect(&self) -> bool {
233        let s = self.as_str();
234        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
235        rest.strip_prefix(">>")
236            .or_else(|| rest.strip_prefix('>'))
237            .or_else(|| rest.strip_prefix('<'))
238            .is_some_and(|after| after == "/dev/null")
239    }
240
241    pub fn is_redirect_operator(&self) -> bool {
242        let s = self.as_str();
243        let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
244        matches!(rest, ">" | ">>" | "<")
245    }
246}
247
248impl PartialEq<str> for Token {
249    fn eq(&self, other: &str) -> bool {
250        self.0 == other
251    }
252}
253
254impl PartialEq<&str> for Token {
255    fn eq(&self, other: &&str) -> bool {
256        self.0 == *other
257    }
258}
259
260impl PartialEq<Token> for str {
261    fn eq(&self, other: &Token) -> bool {
262        self == other.as_str()
263    }
264}
265
266impl PartialEq<Token> for &str {
267    fn eq(&self, other: &Token) -> bool {
268        *self == other.as_str()
269    }
270}
271
272impl std::fmt::Display for Token {
273    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274        f.write_str(&self.0)
275    }
276}
277
278pub fn has_flag(tokens: &[Token], short: Option<&str>, long: Option<&str>) -> bool {
279    for token in &tokens[1..] {
280        if token == "--" {
281            return false;
282        }
283        if let Some(long_flag) = long
284            && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
285        {
286            return true;
287        }
288        if let Some(short_flag) = short {
289            let short_char = short_flag.trim_start_matches('-');
290            if token.starts_with('-')
291                && !token.starts_with("--")
292                && token[1..].contains(short_char)
293            {
294                return true;
295            }
296        }
297    }
298    false
299}
300
301fn split_outside_quotes(cmd: &str) -> Vec<String> {
302    let mut segments = Vec::new();
303    let mut current = String::new();
304    let mut in_single = false;
305    let mut in_double = false;
306    let mut escaped = false;
307    let mut paren_depth: u32 = 0;
308    let mut in_backtick = false;
309    let mut chars = cmd.chars().peekable();
310
311    while let Some(c) = chars.next() {
312        if escaped {
313            current.push(c);
314            escaped = false;
315            continue;
316        }
317        if c == '\\' && !in_single {
318            escaped = true;
319            current.push(c);
320            continue;
321        }
322        if c == '\'' && !in_double && paren_depth == 0 && !in_backtick {
323            in_single = !in_single;
324            current.push(c);
325            continue;
326        }
327        if c == '"' && !in_single && paren_depth == 0 && !in_backtick {
328            in_double = !in_double;
329            current.push(c);
330            continue;
331        }
332        if !in_single && !in_double {
333            if c == '`' {
334                in_backtick = !in_backtick;
335                current.push(c);
336                continue;
337            }
338            if c == '$' && chars.peek() == Some(&'(') {
339                paren_depth += 1;
340                current.push(c);
341                if let Some(open) = chars.next() {
342                    current.push(open);
343                }
344                continue;
345            }
346            if c == '(' {
347                paren_depth += 1;
348                current.push(c);
349                continue;
350            }
351            if c == ')' && paren_depth > 0 {
352                paren_depth -= 1;
353                current.push(c);
354                continue;
355            }
356            if paren_depth == 0 && !in_backtick {
357                if c == '|' {
358                    segments.push(std::mem::take(&mut current));
359                    continue;
360                }
361                if c == '&' && !current.ends_with('>') {
362                    segments.push(std::mem::take(&mut current));
363                    if chars.peek() == Some(&'&') {
364                        chars.next();
365                    }
366                    continue;
367                }
368                if c == ';' || c == '\n' {
369                    segments.push(std::mem::take(&mut current));
370                    continue;
371                }
372            }
373        }
374        current.push(c);
375    }
376    segments.push(current);
377    segments
378        .into_iter()
379        .map(|s| s.trim().to_string())
380        .filter(|s| !s.is_empty())
381        .collect()
382}
383
384fn check_unsafe_shell_syntax(segment: &str) -> bool {
385    let mut in_single = false;
386    let mut in_double = false;
387    let mut escaped = false;
388    let chars: Vec<char> = segment.chars().collect();
389    let mut skip_until = 0;
390
391    for (i, &c) in chars.iter().enumerate() {
392        if i < skip_until {
393            continue;
394        }
395        if escaped {
396            escaped = false;
397            continue;
398        }
399        if c == '\\' && !in_single {
400            escaped = true;
401            continue;
402        }
403        if c == '\'' && !in_double {
404            in_single = !in_single;
405            continue;
406        }
407        if c == '"' && !in_single {
408            in_double = !in_double;
409            continue;
410        }
411        if !in_single && !in_double {
412            if c == '>' || c == '<' {
413                if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
414                    skip_until = i + 3;
415                    continue;
416                }
417                let next = chars.get(i + 1);
418                if next == Some(&'&')
419                    && chars
420                        .get(i + 2)
421                        .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
422                {
423                    continue;
424                }
425                if is_dev_null_target(&chars, i + 1, c) {
426                    continue;
427                }
428                return true;
429            }
430            if c == '`' {
431                return true;
432            }
433            if c == '$' && chars.get(i + 1) == Some(&'(') {
434                return true;
435            }
436        }
437    }
438    false
439}
440
441fn check_unsafe_redirects(segment: &str) -> bool {
442    let mut in_single = false;
443    let mut in_double = false;
444    let mut escaped = false;
445    let chars: Vec<char> = segment.chars().collect();
446    let mut skip_until = 0;
447
448    for (i, &c) in chars.iter().enumerate() {
449        if i < skip_until {
450            continue;
451        }
452        if escaped {
453            escaped = false;
454            continue;
455        }
456        if c == '\\' && !in_single {
457            escaped = true;
458            continue;
459        }
460        if c == '\'' && !in_double {
461            in_single = !in_single;
462            continue;
463        }
464        if c == '"' && !in_single {
465            in_double = !in_double;
466            continue;
467        }
468        if !in_single && !in_double && (c == '>' || c == '<') {
469            if c == '<' && chars.get(i + 1) == Some(&'<') && chars.get(i + 2) == Some(&'<') {
470                skip_until = i + 3;
471                continue;
472            }
473            let next = chars.get(i + 1);
474            if next == Some(&'&')
475                && chars
476                    .get(i + 2)
477                    .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
478            {
479                continue;
480            }
481            if is_dev_null_target(&chars, i + 1, c) {
482                continue;
483            }
484            return true;
485        }
486    }
487    false
488}
489
490fn extract_substitutions(segment: &str) -> Result<(Vec<String>, String), ()> {
491    let mut subs = Vec::new();
492    let mut cleaned = String::with_capacity(segment.len());
493    let mut in_single = false;
494    let mut in_double = false;
495    let mut escaped = false;
496    let chars: Vec<char> = segment.chars().collect();
497    let mut i = 0;
498
499    while i < chars.len() {
500        if escaped {
501            escaped = false;
502            cleaned.push(chars[i]);
503            i += 1;
504            continue;
505        }
506        if chars[i] == '\\' && !in_single {
507            escaped = true;
508            cleaned.push(chars[i]);
509            i += 1;
510            continue;
511        }
512        if chars[i] == '\'' && !in_double {
513            in_single = !in_single;
514            cleaned.push(chars[i]);
515            i += 1;
516            continue;
517        }
518        if chars[i] == '"' && !in_single {
519            in_double = !in_double;
520            cleaned.push(chars[i]);
521            i += 1;
522            continue;
523        }
524        if !in_single {
525            if chars[i] == '`' {
526                let start = i + 1;
527                let end = find_matching_backtick(&chars, start).ok_or(())?;
528                let inner: String = chars[start..end].iter().collect();
529                subs.push(inner);
530                cleaned.push('_');
531                i = end + 1;
532                continue;
533            }
534            if chars[i] == '$' && chars.get(i + 1) == Some(&'(') {
535                let start = i + 2;
536                let end = find_matching_paren(&chars, start).ok_or(())?;
537                let inner: String = chars[start..end].iter().collect();
538                subs.push(inner);
539                cleaned.push('_');
540                i = end + 1;
541                continue;
542            }
543        }
544        cleaned.push(chars[i]);
545        i += 1;
546    }
547    Ok((subs, cleaned))
548}
549
550fn find_matching_backtick(chars: &[char], start: usize) -> Option<usize> {
551    let mut in_single = false;
552    let mut in_double = false;
553    let mut escaped = false;
554    let mut i = start;
555    while i < chars.len() {
556        if escaped {
557            escaped = false;
558            i += 1;
559            continue;
560        }
561        if chars[i] == '\\' && !in_single {
562            escaped = true;
563            i += 1;
564            continue;
565        }
566        if chars[i] == '\'' && !in_double {
567            in_single = !in_single;
568            i += 1;
569            continue;
570        }
571        if chars[i] == '"' && !in_single {
572            in_double = !in_double;
573            i += 1;
574            continue;
575        }
576        if !in_single && !in_double && chars[i] == '`' {
577            return Some(i);
578        }
579        i += 1;
580    }
581    None
582}
583
584fn find_matching_paren(chars: &[char], start: usize) -> Option<usize> {
585    let mut depth = 1u32;
586    let mut in_single = false;
587    let mut in_double = false;
588    let mut escaped = false;
589    let mut i = start;
590    while i < chars.len() {
591        if escaped {
592            escaped = false;
593            i += 1;
594            continue;
595        }
596        if chars[i] == '\\' && !in_single {
597            escaped = true;
598            i += 1;
599            continue;
600        }
601        if chars[i] == '\'' && !in_double {
602            in_single = !in_single;
603            i += 1;
604            continue;
605        }
606        if chars[i] == '"' && !in_single {
607            in_double = !in_double;
608            i += 1;
609            continue;
610        }
611        if !in_single && !in_double {
612            if chars[i] == '(' {
613                depth += 1;
614            } else if chars[i] == ')' {
615                depth -= 1;
616                if depth == 0 {
617                    return Some(i);
618                }
619            }
620        }
621        i += 1;
622    }
623    None
624}
625
626const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
627
628fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
629    let mut j = start;
630    if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
631        j += 1;
632    }
633    while j < chars.len() && chars[j] == ' ' {
634        j += 1;
635    }
636    if j + DEV_NULL.len() > chars.len() {
637        return false;
638    }
639    if chars[j..j + DEV_NULL.len()] != DEV_NULL {
640        return false;
641    }
642    let end = j + DEV_NULL.len();
643    end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
644}
645
646fn find_unquoted_space(s: &str) -> Option<usize> {
647    let mut in_single = false;
648    let mut in_double = false;
649    let mut escaped = false;
650    for (i, b) in s.bytes().enumerate() {
651        if escaped {
652            escaped = false;
653            continue;
654        }
655        if b == b'\\' && !in_single {
656            escaped = true;
657            continue;
658        }
659        if b == b'\'' && !in_double {
660            in_single = !in_single;
661            continue;
662        }
663        if b == b'"' && !in_single {
664            in_double = !in_double;
665            continue;
666        }
667        if b == b' ' && !in_single && !in_double {
668            return Some(i);
669        }
670    }
671    None
672}
673
674fn is_shell_var_name(s: &str) -> bool {
675    let bytes = s.as_bytes();
676    !bytes.is_empty()
677        && (bytes[0].is_ascii_alphabetic() || bytes[0] == b'_')
678        && bytes[1..]
679            .iter()
680            .all(|b| b.is_ascii_alphanumeric() || *b == b'_')
681}
682
683fn is_bare_assignment(segment: &str) -> bool {
684    let trimmed = segment.trim();
685    if trimmed.is_empty() {
686        return false;
687    }
688    let mut rest = trimmed;
689    let mut found = false;
690    loop {
691        let trimmed = rest.trim_start();
692        if trimmed.is_empty() {
693            return found;
694        }
695        let Some(eq_pos) = trimmed.find('=') else {
696            return false;
697        };
698        let key = &trimmed[..eq_pos];
699        if !is_shell_var_name(key) {
700            return false;
701        }
702        let after_eq = &trimmed[eq_pos..];
703        match find_unquoted_space(after_eq) {
704            Some(space_pos) => {
705                rest = &after_eq[space_pos..];
706                found = true;
707            }
708            None => return true,
709        }
710    }
711}
712
713fn unwrap_subshell(segment: &str) -> Option<&str> {
714    let trimmed = segment.trim();
715    if !trimmed.starts_with('(') || !trimmed.ends_with(')') {
716        return None;
717    }
718    let inner = &trimmed[1..trimmed.len() - 1];
719    let mut depth: u32 = 0;
720    let mut in_single = false;
721    let mut in_double = false;
722    let mut escaped = false;
723    for c in inner.chars() {
724        if escaped {
725            escaped = false;
726            continue;
727        }
728        if c == '\\' && !in_single {
729            escaped = true;
730            continue;
731        }
732        if c == '\'' && !in_double {
733            in_single = !in_single;
734            continue;
735        }
736        if c == '"' && !in_single {
737            in_double = !in_double;
738            continue;
739        }
740        if !in_single && !in_double {
741            if c == '(' {
742                depth += 1;
743            } else if c == ')' {
744                if depth == 0 {
745                    return None;
746                }
747                depth -= 1;
748            }
749        }
750    }
751    if depth == 0 { Some(inner) } else { None }
752}
753
754fn strip_env_prefix_str(segment: &str) -> &str {
755    let mut rest = segment;
756    loop {
757        let trimmed = rest.trim_start();
758        if trimmed.is_empty() {
759            return trimmed;
760        }
761        let bytes = trimmed.as_bytes();
762        if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
763            return trimmed;
764        }
765        if let Some(eq_pos) = trimmed.find('=') {
766            let key = &trimmed[..eq_pos];
767            let valid_key = key
768                .bytes()
769                .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
770            if !valid_key {
771                return trimmed;
772            }
773            if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
774                rest = &trimmed[eq_pos + space_pos..];
775                continue;
776            }
777            return trimmed;
778        }
779        return trimmed;
780    }
781}
782
783#[cfg(test)]
784mod tests {
785    use super::*;
786
787    fn seg(s: &str) -> Segment {
788        Segment(s.to_string())
789    }
790
791    fn tok(s: &str) -> Token {
792        Token(s.to_string())
793    }
794
795    fn toks(words: &[&str]) -> Vec<Token> {
796        words.iter().map(|s| tok(s)).collect()
797    }
798
799    #[test]
800    fn split_pipe() {
801        let segs = CommandLine::new("grep foo | head -5").segments();
802        assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
803    }
804
805    #[test]
806    fn split_and() {
807        let segs = CommandLine::new("ls && echo done").segments();
808        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
809    }
810
811    #[test]
812    fn split_semicolon() {
813        let segs = CommandLine::new("ls; echo done").segments();
814        assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
815    }
816
817    #[test]
818    fn split_preserves_quoted_pipes() {
819        let segs = CommandLine::new("echo 'a | b' foo").segments();
820        assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
821    }
822
823    #[test]
824    fn split_background_operator() {
825        let segs = CommandLine::new("cat file & rm -rf /").segments();
826        assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
827    }
828
829    #[test]
830    fn split_newline() {
831        let segs = CommandLine::new("echo foo\necho bar").segments();
832        assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
833    }
834
835    #[test]
836    fn unsafe_redirect() {
837        assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
838    }
839
840    #[test]
841    fn safe_fd_redirect_stderr_to_stdout() {
842        assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
843    }
844
845    #[test]
846    fn safe_fd_redirect_close() {
847        assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
848    }
849
850    #[test]
851    fn unsafe_redirect_ampersand_no_digit() {
852        assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
853    }
854
855    #[test]
856    fn unsafe_backtick() {
857        assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
858    }
859
860    #[test]
861    fn unsafe_command_substitution() {
862        assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
863    }
864
865    #[test]
866    fn safe_quoted_dollar_paren() {
867        assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
868    }
869
870    #[test]
871    fn safe_quoted_redirect() {
872        assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
873    }
874
875    #[test]
876    fn safe_no_special_chars() {
877        assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
878    }
879
880    #[test]
881    fn safe_redirect_to_dev_null() {
882        assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
883    }
884
885    #[test]
886    fn safe_redirect_stderr_to_dev_null() {
887        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
888    }
889
890    #[test]
891    fn safe_redirect_append_to_dev_null() {
892        assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
893    }
894
895    #[test]
896    fn safe_redirect_space_dev_null() {
897        assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
898    }
899
900    #[test]
901    fn safe_redirect_input_dev_null() {
902        assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
903    }
904
905    #[test]
906    fn safe_redirect_both_dev_null() {
907        assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
908    }
909
910    #[test]
911    fn unsafe_redirect_dev_null_prefix() {
912        assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
913    }
914
915    #[test]
916    fn unsafe_redirect_dev_null_path_traversal() {
917        assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
918    }
919
920    #[test]
921    fn unsafe_redirect_dev_null_subpath() {
922        assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
923    }
924
925    #[test]
926    fn unsafe_redirect_to_file() {
927        assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
928    }
929
930    #[test]
931    fn safe_here_string() {
932        assert!(!seg("grep -c , <<< 'hello world'").has_unsafe_shell_syntax());
933    }
934
935    #[test]
936    fn safe_here_string_double_quoted() {
937        assert!(!seg("cat <<< \"some text\"").has_unsafe_shell_syntax());
938    }
939
940    #[test]
941    fn unsafe_heredoc_still_blocked() {
942        assert!(seg("cat << EOF").has_unsafe_shell_syntax());
943    }
944
945    #[test]
946    fn unsafe_input_redirect_still_blocked() {
947        assert!(seg("cmd < file.txt").has_unsafe_shell_syntax());
948    }
949
950    #[test]
951    fn has_flag_short() {
952        let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
953        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
954    }
955
956    #[test]
957    fn has_flag_long_with_eq() {
958        let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
959        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
960    }
961
962    #[test]
963    fn has_flag_combined_short() {
964        let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
965        assert!(has_flag(&tokens, Some("-i"), Some("--in-place")));
966    }
967
968    #[test]
969    fn has_flag_stops_at_double_dash() {
970        let tokens = toks(&["cmd", "--", "-i"]);
971        assert!(!has_flag(&tokens, Some("-i"), Some("--in-place")));
972    }
973
974    #[test]
975    fn has_flag_long_only() {
976        let tokens = toks(&["sort", "--compress-program", "gzip", "file.txt"]);
977        assert!(has_flag(&tokens, None, Some("--compress-program")));
978    }
979
980    #[test]
981    fn has_flag_long_only_eq() {
982        let tokens = toks(&["sort", "--compress-program=gzip", "file.txt"]);
983        assert!(has_flag(&tokens, None, Some("--compress-program")));
984    }
985
986    #[test]
987    fn has_flag_long_only_absent() {
988        let tokens = toks(&["sort", "-r", "file.txt"]);
989        assert!(!has_flag(&tokens, None, Some("--compress-program")));
990    }
991
992    #[test]
993    fn strip_single_env_var() {
994        assert_eq!(
995            seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
996            seg("bundle exec rspec")
997        );
998    }
999
1000    #[test]
1001    fn strip_multiple_env_vars() {
1002        assert_eq!(
1003            seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
1004            seg("bundle exec rspec")
1005        );
1006    }
1007
1008    #[test]
1009    fn strip_no_env_var() {
1010        assert_eq!(
1011            seg("bundle exec rspec").strip_env_prefix(),
1012            seg("bundle exec rspec")
1013        );
1014    }
1015
1016    #[test]
1017    fn tokenize_simple() {
1018        assert_eq!(
1019            seg("grep foo file.txt").tokenize(),
1020            Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
1021        );
1022    }
1023
1024    #[test]
1025    fn tokenize_quoted() {
1026        assert_eq!(
1027            seg("echo 'hello world'").tokenize(),
1028            Some(vec![tok("echo"), tok("hello world")])
1029        );
1030    }
1031
1032    #[test]
1033    fn strip_env_quoted_single() {
1034        assert_eq!(
1035            seg("FOO='bar baz' ls").strip_env_prefix(),
1036            seg("ls")
1037        );
1038    }
1039
1040    #[test]
1041    fn strip_env_quoted_double() {
1042        assert_eq!(
1043            seg("FOO=\"bar baz\" ls").strip_env_prefix(),
1044            seg("ls")
1045        );
1046    }
1047
1048    #[test]
1049    fn strip_env_quoted_with_equals() {
1050        assert_eq!(
1051            seg("FOO='a=b' ls").strip_env_prefix(),
1052            seg("ls")
1053        );
1054    }
1055
1056    #[test]
1057    fn strip_env_quoted_multiple() {
1058        assert_eq!(
1059            seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
1060            seg("cmd")
1061        );
1062    }
1063
1064    #[test]
1065    fn unwrap_subshell_simple() {
1066        assert_eq!(seg("(echo hello)").unwrap_subshell(), Some("echo hello"));
1067    }
1068
1069    #[test]
1070    fn unwrap_subshell_nested() {
1071        assert_eq!(seg("((echo hello))").unwrap_subshell(), Some("(echo hello)"));
1072    }
1073
1074    #[test]
1075    fn unwrap_subshell_with_semicolons() {
1076        assert_eq!(seg("(echo a; echo b)").unwrap_subshell(), Some("echo a; echo b"));
1077    }
1078
1079    #[test]
1080    fn unwrap_subshell_not_subshell() {
1081        assert_eq!(seg("echo hello").unwrap_subshell(), None);
1082    }
1083
1084    #[test]
1085    fn unwrap_subshell_unbalanced() {
1086        assert_eq!(seg("(echo (hello)").unwrap_subshell(), None);
1087    }
1088
1089    #[test]
1090    fn unwrap_subshell_empty() {
1091        assert_eq!(seg("()").unwrap_subshell(), Some(""));
1092    }
1093
1094    #[test]
1095    fn bare_assignment_single() {
1096        assert!(seg("out=_").is_bare_assignment());
1097    }
1098
1099    #[test]
1100    fn bare_assignment_multiple() {
1101        assert!(seg("a=_ b=_").is_bare_assignment());
1102    }
1103
1104    #[test]
1105    fn bare_assignment_lowercase() {
1106        assert!(seg("result=hello").is_bare_assignment());
1107    }
1108
1109    #[test]
1110    fn bare_assignment_uppercase() {
1111        assert!(seg("FOO=bar").is_bare_assignment());
1112    }
1113
1114    #[test]
1115    fn bare_assignment_underscore_prefix() {
1116        assert!(seg("_foo=bar").is_bare_assignment());
1117    }
1118
1119    #[test]
1120    fn bare_assignment_quoted_value() {
1121        assert!(seg("out='hello world'").is_bare_assignment());
1122    }
1123
1124    #[test]
1125    fn not_bare_assignment_with_command() {
1126        assert!(!seg("FOO=bar ls").is_bare_assignment());
1127    }
1128
1129    #[test]
1130    fn not_bare_assignment_no_equals() {
1131        assert!(!seg("foobar").is_bare_assignment());
1132    }
1133
1134    #[test]
1135    fn not_bare_assignment_empty() {
1136        assert!(!seg("").is_bare_assignment());
1137    }
1138
1139    #[test]
1140    fn not_bare_assignment_starts_with_digit() {
1141        assert!(!seg("1foo=bar").is_bare_assignment());
1142    }
1143
1144    #[test]
1145    fn command_name_simple() {
1146        assert_eq!(tok("ls").command_name(), "ls");
1147    }
1148
1149    #[test]
1150    fn command_name_with_path() {
1151        assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
1152    }
1153
1154    #[test]
1155    fn command_name_relative_path() {
1156        assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
1157    }
1158
1159    #[test]
1160    fn fd_redirect_detection() {
1161        assert!(tok("2>&1").is_fd_redirect());
1162        assert!(tok(">&2").is_fd_redirect());
1163        assert!(tok("10>&1").is_fd_redirect());
1164        assert!(tok("255>&2").is_fd_redirect());
1165        assert!(tok("2>&-").is_fd_redirect());
1166        assert!(tok("2>&10").is_fd_redirect());
1167        assert!(!tok(">").is_fd_redirect());
1168        assert!(!tok("/dev/null").is_fd_redirect());
1169        assert!(!tok(">&").is_fd_redirect());
1170        assert!(!tok("").is_fd_redirect());
1171        assert!(!tok("42").is_fd_redirect());
1172        assert!(!tok("123abc").is_fd_redirect());
1173    }
1174
1175    #[test]
1176    fn dev_null_redirect_single_token() {
1177        assert!(tok(">/dev/null").is_dev_null_redirect());
1178        assert!(tok(">>/dev/null").is_dev_null_redirect());
1179        assert!(tok("2>/dev/null").is_dev_null_redirect());
1180        assert!(tok("2>>/dev/null").is_dev_null_redirect());
1181        assert!(tok("</dev/null").is_dev_null_redirect());
1182        assert!(tok("10>/dev/null").is_dev_null_redirect());
1183        assert!(tok("255>/dev/null").is_dev_null_redirect());
1184        assert!(!tok(">/tmp/file").is_dev_null_redirect());
1185        assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
1186        assert!(!tok("ls").is_dev_null_redirect());
1187        assert!(!tok("").is_dev_null_redirect());
1188        assert!(!tok("42").is_dev_null_redirect());
1189        assert!(!tok("<</dev/null").is_dev_null_redirect());
1190    }
1191
1192    #[test]
1193    fn redirect_operator_detection() {
1194        assert!(tok(">").is_redirect_operator());
1195        assert!(tok(">>").is_redirect_operator());
1196        assert!(tok("<").is_redirect_operator());
1197        assert!(tok("2>").is_redirect_operator());
1198        assert!(tok("2>>").is_redirect_operator());
1199        assert!(tok("10>").is_redirect_operator());
1200        assert!(tok("255>>").is_redirect_operator());
1201        assert!(!tok("ls").is_redirect_operator());
1202        assert!(!tok(">&1").is_redirect_operator());
1203        assert!(!tok("/dev/null").is_redirect_operator());
1204        assert!(!tok("").is_redirect_operator());
1205        assert!(!tok("42").is_redirect_operator());
1206        assert!(!tok("<<").is_redirect_operator());
1207    }
1208
1209    #[test]
1210    fn reverse_partial_eq() {
1211        let t = tok("hello");
1212        assert!("hello" == t);
1213        assert!("world" != t);
1214        let s: &str = "hello";
1215        assert!(s == t);
1216    }
1217
1218    #[test]
1219    fn token_deref() {
1220        let t = tok("--flag");
1221        assert!(t.starts_with("--"));
1222        assert!(t.contains("fl"));
1223        assert_eq!(t.len(), 6);
1224        assert!(!t.is_empty());
1225        assert_eq!(t.as_bytes()[0], b'-');
1226        assert!(t.eq_ignore_ascii_case("--FLAG"));
1227        assert_eq!(t.get(2..), Some("flag"));
1228    }
1229
1230    #[test]
1231    fn token_is_one_of() {
1232        assert!(tok("-v").is_one_of(&["-v", "--verbose"]));
1233        assert!(!tok("-q").is_one_of(&["-v", "--verbose"]));
1234    }
1235
1236    #[test]
1237    fn token_split_value() {
1238        assert_eq!(tok("--method=GET").split_value("="), Some("GET"));
1239        assert_eq!(tok("--flag").split_value("="), None);
1240    }
1241
1242    #[test]
1243    fn word_set_contains() {
1244        let set = WordSet::new(&["list", "show", "view"]);
1245        assert!(set.contains(&tok("list")));
1246        assert!(set.contains(&tok("view")));
1247        assert!(!set.contains(&tok("delete")));
1248        assert!(set.contains("list"));
1249        assert!(!set.contains("delete"));
1250    }
1251
1252    #[test]
1253    fn word_set_iter() {
1254        let set = WordSet::new(&["a", "b", "c"]);
1255        let items: Vec<&str> = set.iter().collect();
1256        assert_eq!(items, vec!["a", "b", "c"]);
1257    }
1258
1259    #[test]
1260    fn token_as_command_line() {
1261        let cl = tok("ls -la | grep foo").as_command_line();
1262        let segs = cl.segments();
1263        assert_eq!(segs, vec![seg("ls -la"), seg("grep foo")]);
1264    }
1265
1266    #[test]
1267    fn segment_from_tokens_replacing() {
1268        let tokens = toks(&["find", ".", "-name", "{}", "-print"]);
1269        let result = Segment::from_tokens_replacing(&tokens, "{}", "file");
1270        assert_eq!(result.tokenize().unwrap(), toks(&["find", ".", "-name", "file", "-print"]));
1271    }
1272
1273    #[test]
1274    fn segment_strip_fd_redirects() {
1275        assert_eq!(
1276            seg("cargo test 2>&1").strip_fd_redirects(),
1277            seg("cargo test")
1278        );
1279        assert_eq!(
1280            seg("cmd 2>&1 >&2").strip_fd_redirects(),
1281            seg("cmd")
1282        );
1283        assert_eq!(
1284            seg("ls -la").strip_fd_redirects(),
1285            seg("ls -la")
1286        );
1287    }
1288
1289    #[test]
1290    fn content_outside_double_quotes_strips_string() {
1291        assert_eq!(tok(r#""system""#).content_outside_double_quotes(), " ");
1292    }
1293
1294    #[test]
1295    fn content_outside_double_quotes_preserves_code() {
1296        let result = tok(r#"{print "hello"} END{print NR}"#).content_outside_double_quotes();
1297        assert_eq!(result, r#"{print  } END{print NR}"#);
1298    }
1299
1300    #[test]
1301    fn content_outside_double_quotes_escaped() {
1302        let result = tok(r#"{print "he said \"hi\""}"#).content_outside_double_quotes();
1303        assert_eq!(result, "{print  }");
1304    }
1305
1306    #[test]
1307    fn content_outside_double_quotes_no_quotes() {
1308        assert_eq!(tok("{print $1}").content_outside_double_quotes(), "{print $1}");
1309    }
1310
1311    #[test]
1312    fn content_outside_double_quotes_empty() {
1313        assert_eq!(tok("").content_outside_double_quotes(), "");
1314    }
1315
1316    #[test]
1317    fn extract_subs_none() {
1318        let (subs, cleaned) = seg("echo hello").extract_substitutions().unwrap();
1319        assert!(subs.is_empty());
1320        assert_eq!(cleaned, "echo hello");
1321    }
1322
1323    #[test]
1324    fn extract_subs_dollar_paren() {
1325        let (subs, cleaned) = seg("echo $(ls)").extract_substitutions().unwrap();
1326        assert_eq!(subs, vec!["ls"]);
1327        assert_eq!(cleaned, "echo _");
1328    }
1329
1330    #[test]
1331    fn extract_subs_backtick() {
1332        let (subs, cleaned) = seg("ls `pwd`").extract_substitutions().unwrap();
1333        assert_eq!(subs, vec!["pwd"]);
1334        assert_eq!(cleaned, "ls _");
1335    }
1336
1337    #[test]
1338    fn extract_subs_multiple() {
1339        let (subs, cleaned) = seg("echo $(cmd1) $(cmd2)").extract_substitutions().unwrap();
1340        assert_eq!(subs, vec!["cmd1", "cmd2"]);
1341        assert_eq!(cleaned, "echo _ _");
1342    }
1343
1344    #[test]
1345    fn extract_subs_nested() {
1346        let (subs, cleaned) = seg("echo $(echo $(ls))").extract_substitutions().unwrap();
1347        assert_eq!(subs, vec!["echo $(ls)"]);
1348        assert_eq!(cleaned, "echo _");
1349    }
1350
1351    #[test]
1352    fn extract_subs_quoted_skipped() {
1353        let (subs, cleaned) = seg("echo '$(safe)' arg").extract_substitutions().unwrap();
1354        assert!(subs.is_empty());
1355        assert_eq!(cleaned, "echo '$(safe)' arg");
1356    }
1357
1358    #[test]
1359    fn extract_subs_unmatched_backtick() {
1360        assert!(seg("echo `unclosed").extract_substitutions().is_err());
1361    }
1362
1363    #[test]
1364    fn extract_subs_unmatched_paren() {
1365        assert!(seg("echo $(unclosed").extract_substitutions().is_err());
1366    }
1367
1368    #[test]
1369    fn unsafe_redirects_to_file() {
1370        assert!(seg("echo hello > file.txt").has_unsafe_redirects());
1371    }
1372
1373    #[test]
1374    fn unsafe_redirects_dev_null_ok() {
1375        assert!(!seg("cmd > /dev/null").has_unsafe_redirects());
1376    }
1377
1378    #[test]
1379    fn unsafe_redirects_fd_ok() {
1380        assert!(!seg("cmd 2>&1").has_unsafe_redirects());
1381    }
1382
1383    #[test]
1384    fn unsafe_redirects_no_backtick_check() {
1385        assert!(!seg("echo `ls`").has_unsafe_redirects());
1386    }
1387
1388    #[test]
1389    fn unsafe_redirects_no_dollar_paren_check() {
1390        assert!(!seg("echo $(ls)").has_unsafe_redirects());
1391    }
1392
1393    #[test]
1394    fn unsafe_redirects_here_string_ok() {
1395        assert!(!seg("grep -c , <<< 'hello'").has_unsafe_redirects());
1396    }
1397
1398    #[test]
1399    fn unsafe_redirects_heredoc_still_blocked() {
1400        assert!(seg("cat << EOF").has_unsafe_redirects());
1401    }
1402}