Skip to main content

safe_chains/cst/
parse.rs

1use super::*;
2use winnow::ModalResult;
3use winnow::combinator::{alt, delimited, not, opt, preceded, repeat, separated, terminated};
4use winnow::error::{ContextError, ErrMode};
5use winnow::prelude::*;
6use winnow::token::{any, take_while};
7
8pub fn parse(input: &str) -> Option<Script> {
9    reset_heredoc_queue();
10    let result = script.parse(input).ok();
11    reset_heredoc_queue();
12    result
13}
14
15fn backtrack<T>() -> ModalResult<T> {
16    Err(ErrMode::Backtrack(ContextError::new()))
17}
18
19fn comment(input: &mut &str) -> ModalResult<()> {
20    if input.starts_with('#') {
21        if let Some(pos) = input.find('\n') {
22            *input = &input[pos + 1..];
23        } else {
24            *input = "";
25        }
26    }
27    Ok(())
28}
29
30fn ws(input: &mut &str) -> ModalResult<()> {
31    loop {
32        take_while(0.., [' ', '\t']).void().parse_next(input)?;
33        if input.starts_with('#') {
34            comment(input)?;
35        } else {
36            break;
37        }
38    }
39    Ok(())
40}
41
42fn sep(input: &mut &str) -> ModalResult<()> {
43    loop {
44        take_while(0.., [' ', '\t', ';', '\n']).void().parse_next(input)?;
45        if input.starts_with('#') {
46            comment(input)?;
47        } else {
48            break;
49        }
50    }
51    Ok(())
52}
53
54fn eat_keyword(input: &mut &str, kw: &str) -> ModalResult<()> {
55    if !input.starts_with(kw) {
56        return backtrack();
57    }
58    if input
59        .as_bytes()
60        .get(kw.len())
61        .is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'_')
62    {
63        return backtrack();
64    }
65    *input = &input[kw.len()..];
66    Ok(())
67}
68
69const SCRIPT_STOPS: &[&str] = &["do", "done", "elif", "else", "fi", "then"];
70
71fn at_script_stop(input: &str) -> bool {
72    input.starts_with(')')
73        || input.starts_with('}')
74        || SCRIPT_STOPS.iter().any(|kw| {
75            input.starts_with(kw)
76                && !input
77                    .as_bytes()
78                    .get(kw.len())
79                    .is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'_')
80        })
81}
82
83fn is_word_boundary(c: char) -> bool {
84    matches!(c, ' ' | '\t' | '\n' | ';' | '|' | '&' | ')' | '>' | '<')
85}
86
87fn is_word_literal(c: char) -> bool {
88    !is_word_boundary(c) && !matches!(c, '\'' | '"' | '`' | '\\' | '(' | '$')
89}
90
91fn is_dq_literal(c: char) -> bool {
92    !matches!(c, '"' | '\\' | '`' | '$')
93}
94
95// === Script ===
96
97fn script(input: &mut &str) -> ModalResult<Script> {
98    sep.parse_next(input)?;
99    let mut stmts = Vec::new();
100    while let Some(pl) = opt(pipeline).parse_next(input)? {
101        ws.parse_next(input)?;
102        let op = opt(list_op).parse_next(input)?;
103        stmts.push(Stmt { pipeline: pl, op });
104        // Drain any heredoc bodies pending from this statement before
105        // the next pipeline starts; otherwise the body would be parsed
106        // as the next statement (which would either misvalidate or
107        // misalign the line counter).
108        drain_pending_heredocs(input);
109        if op.is_none() {
110            break;
111        }
112        sep.parse_next(input)?;
113    }
114    Ok(Script(stmts))
115}
116
117fn list_op(input: &mut &str) -> ModalResult<ListOp> {
118    ws.parse_next(input)?;
119    alt((
120        "&&".value(ListOp::And),
121        "||".value(ListOp::Or),
122        '\n'.value(ListOp::Semi),
123        ';'.value(ListOp::Semi),
124        ('&', not('>')).value(ListOp::Amp),
125    ))
126    .parse_next(input)
127}
128
129fn pipe_sep(input: &mut &str) -> ModalResult<()> {
130    (ws, '|', not('|'), ws).void().parse_next(input)
131}
132
133// === Pipeline ===
134
135fn pipeline(input: &mut &str) -> ModalResult<Pipeline> {
136    ws.parse_next(input)?;
137    if at_script_stop(input) {
138        return backtrack();
139    }
140    let bang = opt(terminated('!', ws)).parse_next(input)?.is_some();
141    let commands: Vec<Cmd> = separated(1.., command, pipe_sep).parse_next(input)?;
142    Ok(Pipeline { bang, commands })
143}
144
145// === Command ===
146
147fn command(input: &mut &str) -> ModalResult<Cmd> {
148    ws.parse_next(input)?;
149    if at_script_stop(input) {
150        return backtrack();
151    }
152    alt((
153        subshell,
154        brace_group,
155        for_cmd,
156        while_cmd,
157        until_cmd,
158        if_cmd,
159        simple_cmd.map(Cmd::Simple),
160    ))
161    .parse_next(input)
162}
163
164fn trailing_redirs(input: &mut &str) -> ModalResult<Vec<Redir>> {
165    let mut redirs = Vec::new();
166    loop {
167        ws.parse_next(input)?;
168        if let Some(r) = opt(redirect).parse_next(input)? {
169            redirs.push(r);
170        } else {
171            break;
172        }
173    }
174    Ok(redirs)
175}
176
177fn subshell(input: &mut &str) -> ModalResult<Cmd> {
178    let body = delimited(('(', ws), script, (ws, ')')).parse_next(input)?;
179    let redirs = trailing_redirs(input)?;
180    Ok(Cmd::Subshell { body, redirs })
181}
182
183fn brace_group(input: &mut &str) -> ModalResult<Cmd> {
184    if !input.starts_with('{') {
185        return backtrack();
186    }
187    if !input
188        .as_bytes()
189        .get(1)
190        .is_some_and(|b| matches!(b, b' ' | b'\t' | b'\n'))
191    {
192        return backtrack();
193    }
194    *input = &input[1..];
195    sep.parse_next(input)?;
196    let body = script.parse_next(input)?;
197    if body.0.is_empty() {
198        return backtrack();
199    }
200    sep.parse_next(input)?;
201    if !input.starts_with('}') {
202        return backtrack();
203    }
204    let last_op = body.0.last().and_then(|s| s.op);
205    if last_op.is_none() {
206        return backtrack();
207    }
208    *input = &input[1..];
209    let redirs = trailing_redirs(input)?;
210    Ok(Cmd::BraceGroup { body, redirs })
211}
212
213// === Simple Command ===
214
215fn simple_cmd(input: &mut &str) -> ModalResult<SimpleCmd> {
216    let env: Vec<(String, Word)> =
217        repeat(0.., terminated(assignment, ws)).parse_next(input)?;
218    let mut words = Vec::new();
219    let mut redirs = Vec::new();
220
221    loop {
222        ws.parse_next(input)?;
223        if at_cmd_end(input) {
224            break;
225        }
226        if let Some(r) = opt(redirect).parse_next(input)? {
227            redirs.push(r);
228        } else if let Some(w) = opt(word).parse_next(input)? {
229            words.push(w);
230        } else {
231            break;
232        }
233    }
234
235    if env.is_empty() && words.is_empty() && redirs.is_empty() {
236        return backtrack();
237    }
238    Ok(SimpleCmd { env, words, redirs })
239}
240
241fn at_cmd_end(input: &str) -> bool {
242    input.is_empty()
243        || matches!(
244            input.as_bytes().first(),
245            Some(b'\n' | b';' | b'|' | b'&' | b')')
246        )
247}
248
249fn assignment(input: &mut &str) -> ModalResult<(String, Word)> {
250    let n: &str = take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_')
251        .parse_next(input)?;
252    '='.parse_next(input)?;
253    let value = opt(word)
254        .parse_next(input)?
255        .unwrap_or(Word(vec![WordPart::Lit(String::new())]));
256    Ok((n.to_string(), value))
257}
258
259// === Redirect ===
260
261fn redirect(input: &mut &str) -> ModalResult<Redir> {
262    let fd = opt(fd_prefix).parse_next(input)?;
263    alt((
264        preceded("<<<", (ws, word)).map(|(_, target)| Redir::HereStr(target)),
265        heredoc,
266        preceded(">>", (ws, word)).map(move |(_, target)| Redir::Write {
267            fd: fd.unwrap_or(1),
268            target,
269            append: true,
270        }),
271        preceded(">&", fd_target).map(move |dst| Redir::DupFd {
272            src: fd.unwrap_or(1),
273            dst,
274        }),
275        preceded('>', (ws, word)).map(move |(_, target)| Redir::Write {
276            fd: fd.unwrap_or(1),
277            target,
278            append: false,
279        }),
280        preceded('<', (ws, word)).map(move |(_, target)| Redir::Read {
281            fd: fd.unwrap_or(0),
282            target,
283        }),
284    ))
285    .parse_next(input)
286}
287
288fn heredoc(input: &mut &str) -> ModalResult<Redir> {
289    "<<".parse_next(input)?;
290    let strip_tabs = opt('-').parse_next(input)?.is_some();
291    ws.parse_next(input)?;
292    let delimiter = heredoc_delimiter.parse_next(input)?;
293    // Bash semantics: the heredoc body lives on lines AFTER the
294    // command line is finished, not immediately after `<<DELIM`. The
295    // command line can continue with more redirects, a pipe, etc.
296    // Push the delimiter onto a thread-local queue; the body is
297    // drained at the next `\n`/`;` separator by drain_pending_heredocs.
298    PENDING_HEREDOCS.with(|q| {
299        q.borrow_mut().push(PendingHeredoc {
300            delimiter: delimiter.clone(),
301            strip_tabs,
302        });
303    });
304    Ok(Redir::HereDoc { delimiter, strip_tabs })
305}
306
307#[derive(Debug, Clone)]
308struct PendingHeredoc {
309    delimiter: String,
310    strip_tabs: bool,
311}
312
313thread_local! {
314    static PENDING_HEREDOCS: std::cell::RefCell<Vec<PendingHeredoc>> =
315        const { std::cell::RefCell::new(Vec::new()) };
316}
317
318fn drain_pending_heredocs(input: &mut &str) {
319    let pending: Vec<PendingHeredoc> =
320        PENDING_HEREDOCS.with(|q| std::mem::take(&mut *q.borrow_mut()));
321    for h in pending {
322        if !skip_heredoc_body(input, &h.delimiter, h.strip_tabs) {
323            // Couldn't find the matching delimiter line. Leave input
324            // as-is; the parser will likely fail on the leftover body
325            // text, which is the safe outcome (we deny on parse fail).
326            return;
327        }
328    }
329}
330
331fn skip_heredoc_body(input: &mut &str, delimiter: &str, strip_tabs: bool) -> bool {
332    let s = *input;
333    let bytes = s.as_bytes();
334    let mut line_start = 0;
335    while line_start <= bytes.len() {
336        let line_end = match s[line_start..].find('\n') {
337            Some(rel) => line_start + rel,
338            None => bytes.len(),
339        };
340        let line_bytes = &bytes[line_start..line_end];
341        let line = if strip_tabs {
342            std::str::from_utf8(line_bytes)
343                .unwrap_or("")
344                .trim_start_matches('\t')
345        } else {
346            std::str::from_utf8(line_bytes).unwrap_or("")
347        };
348        if line == delimiter {
349            // Advance past the delimiter line + its newline.
350            let advance = line_end + usize::from(line_end < bytes.len());
351            *input = &s[advance..];
352            return true;
353        }
354        if line_end >= bytes.len() {
355            return false;
356        }
357        line_start = line_end + 1;
358    }
359    false
360}
361
362fn reset_heredoc_queue() {
363    PENDING_HEREDOCS.with(|q| q.borrow_mut().clear());
364}
365
366fn heredoc_delimiter(input: &mut &str) -> ModalResult<String> {
367    alt((
368        delimited('\'', take_while(0.., |c| c != '\''), '\'').map(|s: &str| s.to_string()),
369        delimited('"', take_while(0.., |c| c != '"'), '"').map(|s: &str| s.to_string()),
370        take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_').map(|s: &str| s.to_string()),
371    ))
372    .parse_next(input)
373}
374
375fn fd_prefix(input: &mut &str) -> ModalResult<u32> {
376    let b = input.as_bytes();
377    if b.len() >= 2 && b[0].is_ascii_digit() && matches!(b[1], b'>' | b'<') {
378        let d = (b[0] - b'0') as u32;
379        *input = &input[1..];
380        Ok(d)
381    } else {
382        backtrack()
383    }
384}
385
386fn fd_target(input: &mut &str) -> ModalResult<String> {
387    alt((
388        '-'.value("-".to_string()),
389        take_while(1.., |c: char| c.is_ascii_digit()).map(|s: &str| s.to_string()),
390    ))
391    .parse_next(input)
392}
393
394// === Word ===
395
396fn word(input: &mut &str) -> ModalResult<Word> {
397    repeat(1.., word_part)
398        .map(Word)
399        .parse_next(input)
400}
401
402fn word_part(input: &mut &str) -> ModalResult<WordPart> {
403    if input.is_empty() {
404        return backtrack();
405    }
406    if input.starts_with("<(") || input.starts_with(">(") {
407        return proc_sub(input);
408    }
409    if is_word_boundary(input.as_bytes()[0] as char) {
410        return backtrack();
411    }
412    alt((single_quoted, double_quoted, arith_sub, cmd_sub, backtick_part, escaped, dollar_lit(is_word_literal), lit(is_word_literal)))
413        .parse_next(input)
414}
415
416fn single_quoted(input: &mut &str) -> ModalResult<WordPart> {
417    delimited('\'', take_while(0.., |c| c != '\''), '\'')
418        .map(|s: &str| WordPart::SQuote(s.to_string()))
419        .parse_next(input)
420}
421
422fn double_quoted(input: &mut &str) -> ModalResult<WordPart> {
423    delimited('"', repeat(0.., dq_part).map(Word), '"')
424        .map(WordPart::DQuote)
425        .parse_next(input)
426}
427
428fn cmd_sub(input: &mut &str) -> ModalResult<WordPart> {
429    delimited(("$(", ws), script, (ws, ')'))
430        .map(WordPart::CmdSub)
431        .parse_next(input)
432}
433
434fn proc_sub(input: &mut &str) -> ModalResult<WordPart> {
435    if !(input.starts_with("<(") || input.starts_with(">(")) {
436        return backtrack();
437    }
438    *input = &input[1..];
439    delimited(('(', ws), script, (ws, ')'))
440        .map(WordPart::ProcSub)
441        .parse_next(input)
442}
443
444fn arith_sub(input: &mut &str) -> ModalResult<WordPart> {
445    if !input.starts_with("$((") {
446        return backtrack();
447    }
448    let body_start = 3;
449    let bytes = input.as_bytes();
450    let mut depth: i32 = 1;
451    let mut i = body_start;
452    while i < bytes.len() {
453        match bytes[i] {
454            b'(' => depth += 1,
455            b')' => {
456                if depth == 1 && i + 1 < bytes.len() && bytes[i + 1] == b')' {
457                    let body = input[body_start..i].to_string();
458                    if body.contains("$(") || body.contains('`') {
459                        return backtrack();
460                    }
461                    *input = &input[i + 2..];
462                    return Ok(WordPart::Arith(body));
463                }
464                depth -= 1;
465                if depth < 0 {
466                    return backtrack();
467                }
468            }
469            _ => {}
470        }
471        i += 1;
472    }
473    backtrack()
474}
475
476fn backtick_part(input: &mut &str) -> ModalResult<WordPart> {
477    delimited('`', backtick_inner, '`')
478        .map(WordPart::Backtick)
479        .parse_next(input)
480}
481
482fn escaped(input: &mut &str) -> ModalResult<WordPart> {
483    preceded('\\', any).map(WordPart::Escape).parse_next(input)
484}
485
486fn lit(pred: fn(char) -> bool) -> impl FnMut(&mut &str) -> ModalResult<WordPart> {
487    move |input: &mut &str| {
488        take_while(1.., pred)
489            .map(|s: &str| WordPart::Lit(s.to_string()))
490            .parse_next(input)
491    }
492}
493
494fn dollar_lit(pred: fn(char) -> bool) -> impl FnMut(&mut &str) -> ModalResult<WordPart> {
495    move |input: &mut &str| {
496        ('$', not('(')).void().parse_next(input)?;
497        let rest: &str = take_while(0.., pred).parse_next(input)?;
498        Ok(WordPart::Lit(format!("${rest}")))
499    }
500}
501
502// === Double-quoted parts ===
503
504fn dq_part(input: &mut &str) -> ModalResult<WordPart> {
505    if input.is_empty() || input.starts_with('"') {
506        return backtrack();
507    }
508    alt((dq_escape, arith_sub, cmd_sub, backtick_part, dollar_lit(is_dq_literal), lit(is_dq_literal)))
509        .parse_next(input)
510}
511
512fn dq_escape(input: &mut &str) -> ModalResult<WordPart> {
513    preceded('\\', any)
514        .map(|c: char| match c {
515            '"' | '\\' | '$' | '`' => WordPart::Escape(c),
516            _ => WordPart::Lit(format!("\\{c}")),
517        })
518        .parse_next(input)
519}
520
521// === Backtick inner content ===
522
523fn backtick_inner(input: &mut &str) -> ModalResult<String> {
524    repeat(0.., alt((bt_escape, bt_literal)))
525        .fold(String::new, |mut acc, chunk: &str| {
526            acc.push_str(chunk);
527            acc
528        })
529        .parse_next(input)
530}
531
532fn bt_escape<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
533    ('\\', any).take().parse_next(input)
534}
535
536fn bt_literal<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
537    take_while(1.., |c: char| c != '`' && c != '\\').parse_next(input)
538}
539
540// === Compound Commands ===
541
542fn for_cmd(input: &mut &str) -> ModalResult<Cmd> {
543    eat_keyword(input, "for")?;
544    ws.parse_next(input)?;
545    let var = name.parse_next(input)?;
546    ws.parse_next(input)?;
547
548    let items = if eat_keyword(input, "in").is_ok() {
549        ws.parse_next(input)?;
550        repeat(0.., terminated(word, ws)).parse_next(input)?
551    } else {
552        vec![]
553    };
554
555    let body = do_done_body.parse_next(input)?;
556    Ok(Cmd::For { var, items, body })
557}
558
559fn while_cmd(input: &mut &str) -> ModalResult<Cmd> {
560    eat_keyword(input, "while")?;
561    ws.parse_next(input)?;
562    let cond = script.parse_next(input)?;
563    let body = do_done_body.parse_next(input)?;
564    Ok(Cmd::While { cond, body })
565}
566
567fn until_cmd(input: &mut &str) -> ModalResult<Cmd> {
568    eat_keyword(input, "until")?;
569    ws.parse_next(input)?;
570    let cond = script.parse_next(input)?;
571    let body = do_done_body.parse_next(input)?;
572    Ok(Cmd::Until { cond, body })
573}
574
575fn do_done_body(input: &mut &str) -> ModalResult<Script> {
576    sep.parse_next(input)?;
577    eat_keyword(input, "do")?;
578    sep.parse_next(input)?;
579    let body = script.parse_next(input)?;
580    sep.parse_next(input)?;
581    eat_keyword(input, "done")?;
582    Ok(body)
583}
584
585fn if_cmd(input: &mut &str) -> ModalResult<Cmd> {
586    eat_keyword(input, "if")?;
587    ws.parse_next(input)?;
588    let mut branches = vec![cond_then_body.parse_next(input)?];
589    let mut else_body = None;
590
591    loop {
592        sep.parse_next(input)?;
593        if eat_keyword(input, "elif").is_ok() {
594            ws.parse_next(input)?;
595            branches.push(cond_then_body.parse_next(input)?);
596        } else if eat_keyword(input, "else").is_ok() {
597            sep.parse_next(input)?;
598            else_body = Some(script.parse_next(input)?);
599            break;
600        } else {
601            break;
602        }
603    }
604
605    sep.parse_next(input)?;
606    eat_keyword(input, "fi")?;
607    Ok(Cmd::If { branches, else_body })
608}
609
610fn cond_then_body(input: &mut &str) -> ModalResult<Branch> {
611    let cond = script.parse_next(input)?;
612    sep.parse_next(input)?;
613    eat_keyword(input, "then")?;
614    sep.parse_next(input)?;
615    let body = script.parse_next(input)?;
616    Ok(Branch { cond, body })
617}
618
619fn name(input: &mut &str) -> ModalResult<String> {
620    take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_')
621        .map(|s: &str| s.to_string())
622        .parse_next(input)
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628
629    fn p(input: &str) -> Script {
630        parse(input).unwrap_or_else(|| panic!("failed to parse: {input}"))
631    }
632
633    fn words(script: &Script) -> Vec<String> {
634        match &script.0[0].pipeline.commands[0] {
635            Cmd::Simple(s) => s.words.iter().map(|w| w.eval()).collect(),
636            _ => panic!("expected simple command"),
637        }
638    }
639
640    fn simple(script: &Script) -> &SimpleCmd {
641        match &script.0[0].pipeline.commands[0] {
642            Cmd::Simple(s) => s,
643            _ => panic!("expected simple command"),
644        }
645    }
646
647    #[test]
648    fn simple_command() { assert_eq!(words(&p("echo hello")), ["echo", "hello"]); }
649    #[test]
650    fn flags() { assert_eq!(words(&p("ls -la")), ["ls", "-la"]); }
651    #[test]
652    fn single_quoted() { assert_eq!(words(&p("echo 'hello world'")), ["echo", "hello world"]); }
653    #[test]
654    fn double_quoted() { assert_eq!(words(&p("echo \"hello world\"")), ["echo", "hello world"]); }
655    #[test]
656    fn mixed_quotes() { assert_eq!(words(&p("jq '.key' file.json")), ["jq", ".key", "file.json"]); }
657
658    #[test]
659    fn pipeline_test() { assert_eq!(p("grep foo | head -5").0[0].pipeline.commands.len(), 2); }
660    #[test]
661    fn sequence_and() { assert_eq!(p("ls && echo done").0[0].op, Some(ListOp::And)); }
662    #[test]
663    fn sequence_semi() { assert_eq!(p("ls; echo done").0.len(), 2); }
664    #[test]
665    fn newline_separator() { assert_eq!(p("echo foo\necho bar").0.len(), 2); }
666    #[test]
667    fn blank_line_between_statements() { assert_eq!(p("echo foo\n\necho bar").0.len(), 2); }
668    #[test]
669    fn multiple_blank_lines() { assert_eq!(p("echo foo\n\n\n\necho bar").0.len(), 2); }
670    #[test]
671    fn blank_line_with_whitespace() { assert_eq!(p("echo foo\n   \necho bar").0.len(), 2); }
672    #[test]
673    fn comment_between_statements() { assert_eq!(p("echo foo\n# comment\necho bar").0.len(), 2); }
674    #[test]
675    fn semi_then_blank() { assert_eq!(p("echo foo;\n\necho bar").0.len(), 2); }
676    #[test]
677    fn and_then_blank() { assert_eq!(p("echo foo &&\n\necho bar").0.len(), 2); }
678
679    #[test]
680    fn brace_group_simple() {
681        assert!(matches!(
682            &p("{ echo hello; }").0[0].pipeline.commands[0],
683            Cmd::BraceGroup { body, redirs } if body.0.len() == 1 && redirs.is_empty()
684        ));
685    }
686    #[test]
687    fn brace_group_multiple_stmts() {
688        if let Cmd::BraceGroup { body, .. } = &p("{ echo a; echo b; echo c; }").0[0].pipeline.commands[0] {
689            assert_eq!(body.0.len(), 3);
690        } else { panic!("expected BraceGroup"); }
691    }
692    #[test]
693    fn brace_group_with_redirect() {
694        if let Cmd::BraceGroup { redirs, .. } = &p("{ echo a; echo b; } > /tmp/out.txt").0[0].pipeline.commands[0] {
695            assert_eq!(redirs.len(), 1);
696            assert!(matches!(redirs[0], Redir::Write { .. }));
697        } else { panic!("expected BraceGroup"); }
698    }
699    #[test]
700    fn brace_group_with_append_redirect() {
701        if let Cmd::BraceGroup { redirs, .. } = &p("{ echo a; } >> log.txt").0[0].pipeline.commands[0] {
702            assert!(matches!(redirs[0], Redir::Write { append: true, .. }));
703        } else { panic!("expected BraceGroup"); }
704    }
705    #[test]
706    fn brace_group_with_stderr_redirect() {
707        if let Cmd::BraceGroup { redirs, .. } = &p("{ echo a; } 2>&1").0[0].pipeline.commands[0] {
708            assert!(matches!(redirs[0], Redir::DupFd { src: 2, .. }));
709        } else { panic!("expected BraceGroup"); }
710    }
711    #[test]
712    fn brace_group_newline_separated() {
713        if let Cmd::BraceGroup { body, .. } = &p("{\n  echo a\n  echo b\n}").0[0].pipeline.commands[0] {
714            assert_eq!(body.0.len(), 2);
715        } else { panic!("expected BraceGroup"); }
716    }
717    #[test]
718    fn brace_group_in_pipeline() {
719        let pl = &p("{ echo a; echo b; } | grep a").0[0].pipeline;
720        assert_eq!(pl.commands.len(), 2);
721        assert!(matches!(&pl.commands[0], Cmd::BraceGroup { .. }));
722    }
723    #[test]
724    fn brace_group_followed_by_other() {
725        let stmts = &p("{ echo a; }; echo b").0;
726        assert_eq!(stmts.len(), 2);
727        assert!(matches!(&stmts[0].pipeline.commands[0], Cmd::BraceGroup { .. }));
728    }
729    #[test]
730    fn brace_group_nested() {
731        if let Cmd::BraceGroup { body, .. } = &p("{ { echo inner; }; echo outer; }").0[0].pipeline.commands[0] {
732            assert_eq!(body.0.len(), 2);
733            assert!(matches!(&body.0[0].pipeline.commands[0], Cmd::BraceGroup { .. }));
734        } else { panic!("expected outer BraceGroup"); }
735    }
736    #[test]
737    fn brace_group_with_subshell_inside() {
738        if let Cmd::BraceGroup { body, .. } = &p("{ (echo sub); echo grp; }").0[0].pipeline.commands[0] {
739            assert_eq!(body.0.len(), 2);
740            assert!(matches!(&body.0[0].pipeline.commands[0], Cmd::Subshell { .. }));
741        } else { panic!("expected BraceGroup"); }
742    }
743    #[test]
744    fn brace_open_requires_whitespace() {
745        // {echo (no space) is NOT a brace group; it's a literal word
746        // that becomes part of a simple command. Parser should not
747        // treat it as a brace group.
748        let cmds = &p("{echo a}").0;
749        // Either parsed as a simple_cmd with a literal `{echo` token,
750        // or fails. Either way, it should NOT be a BraceGroup.
751        if !cmds.is_empty() {
752            assert!(!matches!(&cmds[0].pipeline.commands[0], Cmd::BraceGroup { .. }));
753        }
754    }
755    #[test]
756    fn subshell_with_redirect() {
757        if let Cmd::Subshell { redirs, .. } = &p("(echo hello) > /tmp/out.txt").0[0].pipeline.commands[0] {
758            assert_eq!(redirs.len(), 1);
759        } else { panic!("expected Subshell with redir"); }
760    }
761    #[test]
762    fn background() { assert_eq!(p("ls & echo done").0[0].op, Some(ListOp::Amp)); }
763
764    #[test]
765    fn redirect_dev_null() {
766        let s = p("echo hello > /dev/null");
767        let cmd = simple(&s);
768        assert_eq!(cmd.words.len(), 2);
769        assert!(matches!(&cmd.redirs[0], Redir::Write { fd: 1, append: false, .. }));
770    }
771    #[test]
772    fn redirect_stderr() {
773        assert!(matches!(&simple(&p("echo hello 2>&1")).redirs[0], Redir::DupFd { src: 2, dst } if dst == "1"));
774    }
775    #[test]
776    fn here_string() {
777        assert!(matches!(&simple(&p("grep -c , <<< 'hello,world,test'")).redirs[0], Redir::HereStr(_)));
778    }
779    #[test]
780    fn heredoc_bare() {
781        assert!(matches!(&simple(&p("cat <<EOF")).redirs[0], Redir::HereDoc { delimiter, strip_tabs: false } if delimiter == "EOF"));
782    }
783    #[test]
784    fn heredoc_with_content() {
785        let s = p("cat <<EOF\nhello world\nEOF");
786        assert!(matches!(&simple(&s).redirs[0], Redir::HereDoc { delimiter, .. } if delimiter == "EOF"));
787    }
788    #[test]
789    fn heredoc_quoted_delimiter() {
790        assert!(matches!(&simple(&p("cat <<'EOF'")).redirs[0], Redir::HereDoc { delimiter, .. } if delimiter == "EOF"));
791    }
792    #[test]
793    fn heredoc_strip_tabs() {
794        assert!(matches!(&simple(&p("cat <<-EOF")).redirs[0], Redir::HereDoc { strip_tabs: true, .. }));
795    }
796    #[test]
797    fn heredoc_pipe_on_command_line() {
798        // Correct bash: pipe is on the command line BEFORE the body,
799        // body terminator is on its own line.
800        let s = p("cat <<EOF | grep hello\nhello\nEOF");
801        assert_eq!(s.0[0].pipeline.commands.len(), 2);
802    }
803    #[test]
804    fn heredoc_body_does_not_swallow_pipe() {
805        // Regression for the `cat <<EOF | bash\n...\nEOF` bypass: the
806        // heredoc parser must NOT consume the pipe + downstream
807        // commands as part of the body.
808        let s = p("cat <<EOF | bash\nrm\nEOF");
809        assert_eq!(
810            s.0[0].pipeline.commands.len(),
811            2,
812            "pipeline must keep `bash` as a second command"
813        );
814    }
815    #[test]
816    fn heredoc_followed_by_next_statement() {
817        // After the heredoc body terminator, the script can continue
818        // with another statement.
819        let s = p("cat <<EOF\nhello\nEOF\nls");
820        assert_eq!(s.0.len(), 2);
821    }
822
823    #[test]
824    fn env_prefix() {
825        let s = p("FOO='bar baz' ls -la");
826        let cmd = simple(&s);
827        assert_eq!(cmd.env[0].0, "FOO");
828        assert_eq!(cmd.env[0].1.eval(), "bar baz");
829    }
830    #[test]
831    fn cmd_substitution() { assert!(matches!(&simple(&p("echo $(ls)")).words[1].0[0], WordPart::CmdSub(_))); }
832    #[test]
833    fn backtick_substitution() { assert_eq!(simple(&p("ls `pwd`")).words[1].eval(), "__SAFE_CHAINS_SUB__"); }
834    #[test]
835    fn nested_substitution() {
836        if let WordPart::CmdSub(inner) = &simple(&p("echo $(echo $(ls))")).words[1].0[0] {
837            assert!(matches!(&simple(inner).words[1].0[0], WordPart::CmdSub(_)));
838        } else { panic!("expected CmdSub"); }
839    }
840
841    #[test]
842    fn subshell_test() { assert!(matches!(&p("(echo hello)").0[0].pipeline.commands[0], Cmd::Subshell { .. })); }
843    #[test]
844    fn negation() { assert!(p("! echo hello").0[0].pipeline.bang); }
845
846    #[test]
847    fn for_loop() { assert!(matches!(&p("for x in 1 2 3; do echo $x; done").0[0].pipeline.commands[0], Cmd::For { var, .. } if var == "x")); }
848    #[test]
849    fn while_loop() { assert!(matches!(&p("while test -f /tmp/foo; do sleep 1; done").0[0].pipeline.commands[0], Cmd::While { .. })); }
850    #[test]
851    fn if_then_fi() {
852        if let Cmd::If { branches, else_body } = &p("if test -f foo; then echo exists; fi").0[0].pipeline.commands[0] {
853            assert_eq!(branches.len(), 1);
854            assert!(else_body.is_none());
855        } else { panic!("expected If"); }
856    }
857    #[test]
858    fn if_elif_else() {
859        if let Cmd::If { branches, else_body } = &p("if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi").0[0].pipeline.commands[0] {
860            assert_eq!(branches.len(), 2);
861            assert!(else_body.is_some());
862        } else { panic!("expected If"); }
863    }
864
865    #[test]
866    fn escaped_outside_quotes() { assert_eq!(words(&p("echo hello\\ world")), ["echo", "hello world"]); }
867    #[test]
868    fn double_quoted_escape() { assert_eq!(words(&p("echo \"hello\\\"world\"")), ["echo", "hello\"world"]); }
869    #[test]
870    fn assign_subst() { assert_eq!(simple(&p("out=$(ls)")).env[0].0, "out"); }
871
872    #[test]
873    fn unmatched_single_quote_fails() { assert!(parse("echo 'hello").is_none()); }
874    #[test]
875    fn unmatched_double_quote_fails() { assert!(parse("echo \"hello").is_none()); }
876    #[test]
877    fn unclosed_subshell_fails() { assert!(parse("(echo hello").is_none()); }
878    #[test]
879    fn unclosed_cmd_sub_fails() { assert!(parse("echo $(ls").is_none()); }
880    #[test]
881    fn for_missing_do_fails() { assert!(parse("for x in 1 2 3; echo $x; done").is_none()); }
882    #[test]
883    fn if_missing_fi_fails() { assert!(parse("if true; then echo hello").is_none()); }
884
885    #[test]
886    fn subshell_for() {
887        if let Cmd::Subshell { body, .. } = &p("(for x in 1 2; do echo $x; done)").0[0].pipeline.commands[0] {
888            assert!(matches!(&body.0[0].pipeline.commands[0], Cmd::For { .. }));
889        } else { panic!("expected Subshell"); }
890    }
891    #[test]
892    fn proc_sub_input() {
893        let s = p("diff <(sort a.txt) <(sort b.txt)");
894        let cmd = simple(&s);
895        assert_eq!(cmd.words.len(), 3);
896        assert!(matches!(&cmd.words[1].0[0], WordPart::ProcSub(_)));
897        assert!(matches!(&cmd.words[2].0[0], WordPart::ProcSub(_)));
898    }
899    #[test]
900    fn proc_sub_output() {
901        let s = p("tee >(grep error > /dev/null)");
902        let cmd = simple(&s);
903        assert_eq!(cmd.words.len(), 2);
904        assert!(matches!(&cmd.words[1].0[0], WordPart::ProcSub(_)));
905    }
906    #[test]
907    fn comment_only() {
908        let s = p("# just a comment");
909        assert!(s.0.is_empty());
910    }
911    #[test]
912    fn comment_before_command() {
913        let s = p("# comment\necho hello");
914        assert_eq!(words(&s), ["echo", "hello"]);
915    }
916    #[test]
917    fn inline_comment() {
918        let s = p("echo hello # this is a comment");
919        assert_eq!(words(&s), ["echo", "hello"]);
920    }
921    #[test]
922    fn comment_between_commands() {
923        let s = p("echo hello\n# middle comment\necho world");
924        assert_eq!(s.0.len(), 2);
925    }
926    #[test]
927    fn comment_after_semicolon() {
928        let s = p("echo hello; # comment\necho world");
929        assert_eq!(s.0.len(), 2);
930    }
931    #[test]
932    fn comment_in_for_loop() {
933        assert!(parse("for x in 1 2; do\n# loop body\necho $x\ndone").is_some());
934    }
935    #[test]
936    fn quoted_redirect_in_echo() {
937        let s = p("echo 'greater > than' test");
938        let cmd = simple(&s);
939        assert_eq!(cmd.words.len(), 3);
940        assert_eq!(cmd.redirs.len(), 0);
941    }
942
943    #[test]
944    fn parses_all_safe_commands() {
945        let cmds = [
946            "grep foo file.txt", "cat /etc/hosts", "jq '.key' file.json", "base64 -d",
947            "ls -la", "wc -l file.txt", "ps aux", "echo hello", "cat file.txt",
948            "echo $(ls)", "ls `pwd`", "echo $(echo $(ls))", "echo \"$(ls)\"",
949            "out=$(ls)", "out=$(git status)", "a=$(ls) b=$(pwd)",
950            "(echo hello)", "(ls)", "(ls && echo done)", "(echo hello; echo world)",
951            "(ls | grep foo)", "(echo hello) | grep hello", "(ls) && echo done",
952            "((echo hello))", "(for x in 1 2; do echo $x; done)",
953            "echo 'greater > than' test", "echo '$(safe)' arg",
954            "FOO='bar baz' ls -la", "FOO=\"bar baz\" ls -la",
955            "RACK_ENV=test bundle exec rspec spec/foo_spec.rb",
956            "grep foo file.txt | head -5", "cat file | sort | uniq",
957            "ls && echo done", "ls; echo done", "ls & echo done",
958            "grep -c , <<< 'hello,world,test'",
959            "cat <<EOF\nhello world\nEOF",
960            "cat <<'MARKER'\nsome text\nMARKER",
961            "cat <<-EOF\n\thello\nEOF",
962            "echo foo\necho bar", "ls\ncat file.txt",
963            "git log --oneline -20 | head -5",
964            "echo hello > /dev/null", "echo hello 2> /dev/null",
965            "echo hello >> /dev/null", "git log > /dev/null 2>&1",
966            "ls 2>&1", "cargo clippy 2>&1", "git log < /dev/null",
967            "for x in 1 2 3; do echo $x; done",
968            "for f in *.txt; do cat $f | grep pattern; done",
969            "for x in 1 2 3; do; done",
970            "for x in 1 2; do echo $x; done; for y in a b; do echo $y; done",
971            "for x in 1 2; do for y in a b; do echo $x $y; done; done",
972            "for x in 1 2; do echo $x; done && echo finished",
973            "for x in $(seq 1 5); do echo $x; done",
974            "while test -f /tmp/foo; do sleep 1; done",
975            "while ! test -f /tmp/done; do sleep 1; done",
976            "until test -f /tmp/ready; do sleep 1; done",
977            "if test -f foo; then echo exists; fi",
978            "if test -f foo; then echo yes; else echo no; fi",
979            "if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi",
980            "for x in 1 2; do if test $x = 1; then echo one; fi; done",
981            "if true; then for x in 1 2; do echo $x; done; fi",
982            "diff <(sort a.txt) <(sort b.txt)",
983            "comm -23 file.txt <(sort other.txt)",
984            "cat <(echo hello)",
985            "# comment only",
986            "# comment\necho hello",
987            "echo hello # inline comment",
988            "echo one\n# between\necho two",
989            "! echo hello", "! test -f foo",
990            "echo for; echo done; echo if; echo fi",
991        ];
992        let mut failures = Vec::new();
993        for cmd in &cmds {
994            if parse(cmd).is_none() { failures.push(*cmd); }
995        }
996        assert!(failures.is_empty(), "failed on {} commands:\n{}", failures.len(), failures.join("\n"));
997    }
998}