Skip to main content

safe_chains/cst/
parse.rs

1use super::*;
2use winnow::ModalResult;
3use winnow::combinator::{alt, delimited, not, opt, preceded, repeat, separated, terminated};
4use winnow::error::{ContextError, ErrMode};
5use winnow::prelude::*;
6use winnow::token::{any, take_while};
7
8pub fn parse(input: &str) -> Option<Script> {
9    reset_heredoc_queue();
10    let result = script.parse(input).ok();
11    reset_heredoc_queue();
12    result
13}
14
15fn backtrack<T>() -> ModalResult<T> {
16    Err(ErrMode::Backtrack(ContextError::new()))
17}
18
19fn comment(input: &mut &str) -> ModalResult<()> {
20    if input.starts_with('#') {
21        if let Some(pos) = input.find('\n') {
22            *input = &input[pos + 1..];
23        } else {
24            *input = "";
25        }
26    }
27    Ok(())
28}
29
30fn ws(input: &mut &str) -> ModalResult<()> {
31    loop {
32        take_while(0.., [' ', '\t']).void().parse_next(input)?;
33        if input.starts_with('#') {
34            comment(input)?;
35        } else {
36            break;
37        }
38    }
39    Ok(())
40}
41
42fn sep(input: &mut &str) -> ModalResult<()> {
43    loop {
44        take_while(0.., [' ', '\t', ';', '\n']).void().parse_next(input)?;
45        if input.starts_with('#') {
46            comment(input)?;
47        } else {
48            break;
49        }
50    }
51    Ok(())
52}
53
54fn eat_keyword(input: &mut &str, kw: &str) -> ModalResult<()> {
55    if !input.starts_with(kw) {
56        return backtrack();
57    }
58    if input
59        .as_bytes()
60        .get(kw.len())
61        .is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'_')
62    {
63        return backtrack();
64    }
65    *input = &input[kw.len()..];
66    Ok(())
67}
68
69const SCRIPT_STOPS: &[&str] = &["do", "done", "elif", "else", "fi", "then"];
70
71fn at_script_stop(input: &str) -> bool {
72    input.starts_with(')')
73        || input.starts_with('}')
74        || SCRIPT_STOPS.iter().any(|kw| {
75            input.starts_with(kw)
76                && !input
77                    .as_bytes()
78                    .get(kw.len())
79                    .is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'_')
80        })
81}
82
83fn is_word_boundary(c: char) -> bool {
84    matches!(c, ' ' | '\t' | '\n' | ';' | '|' | '&' | ')' | '>' | '<')
85}
86
87fn is_word_literal(c: char) -> bool {
88    !is_word_boundary(c) && !matches!(c, '\'' | '"' | '`' | '\\' | '(' | '$')
89}
90
91fn is_dq_literal(c: char) -> bool {
92    !matches!(c, '"' | '\\' | '`' | '$')
93}
94
95// === Script ===
96
97fn script(input: &mut &str) -> ModalResult<Script> {
98    sep.parse_next(input)?;
99    let mut stmts = Vec::new();
100    while let Some(pl) = opt(pipeline).parse_next(input)? {
101        ws.parse_next(input)?;
102        let op = opt(list_op).parse_next(input)?;
103        stmts.push(Stmt { pipeline: pl, op });
104        // Drain any heredoc bodies pending from this statement before
105        // the next pipeline starts; otherwise the body would be parsed
106        // as the next statement (which would either misvalidate or
107        // misalign the line counter).
108        drain_pending_heredocs(input);
109        if op.is_none() {
110            break;
111        }
112        sep.parse_next(input)?;
113    }
114    Ok(Script(stmts))
115}
116
117fn list_op(input: &mut &str) -> ModalResult<ListOp> {
118    ws.parse_next(input)?;
119    alt((
120        "&&".value(ListOp::And),
121        "||".value(ListOp::Or),
122        '\n'.value(ListOp::Semi),
123        ';'.value(ListOp::Semi),
124        ('&', not('>')).value(ListOp::Amp),
125    ))
126    .parse_next(input)
127}
128
129fn pipe_sep(input: &mut &str) -> ModalResult<()> {
130    (ws, '|', not('|'), ws).void().parse_next(input)
131}
132
133// === Pipeline ===
134
135fn pipeline(input: &mut &str) -> ModalResult<Pipeline> {
136    ws.parse_next(input)?;
137    if at_script_stop(input) {
138        return backtrack();
139    }
140    let bang = opt(terminated('!', ws)).parse_next(input)?.is_some();
141    let commands: Vec<Cmd> = separated(1.., command, pipe_sep).parse_next(input)?;
142    Ok(Pipeline { bang, commands })
143}
144
145// === Command ===
146
147fn command(input: &mut &str) -> ModalResult<Cmd> {
148    ws.parse_next(input)?;
149    if at_script_stop(input) {
150        return backtrack();
151    }
152    alt((
153        subshell,
154        brace_group,
155        for_cmd,
156        while_cmd,
157        until_cmd,
158        if_cmd,
159        double_bracket_cmd,
160        simple_cmd.map(Cmd::Simple),
161    ))
162    .parse_next(input)
163}
164
165fn trailing_redirs(input: &mut &str) -> ModalResult<Vec<Redir>> {
166    let mut redirs = Vec::new();
167    loop {
168        ws.parse_next(input)?;
169        if let Some(r) = opt(redirect).parse_next(input)? {
170            redirs.push(r);
171        } else {
172            break;
173        }
174    }
175    Ok(redirs)
176}
177
178fn subshell(input: &mut &str) -> ModalResult<Cmd> {
179    let body = delimited(('(', ws), script, (ws, ')')).parse_next(input)?;
180    let redirs = trailing_redirs(input)?;
181    Ok(Cmd::Subshell { body, redirs })
182}
183
184fn brace_group(input: &mut &str) -> ModalResult<Cmd> {
185    if !input.starts_with('{') {
186        return backtrack();
187    }
188    if !input
189        .as_bytes()
190        .get(1)
191        .is_some_and(|b| matches!(b, b' ' | b'\t' | b'\n'))
192    {
193        return backtrack();
194    }
195    *input = &input[1..];
196    sep.parse_next(input)?;
197    let body = script.parse_next(input)?;
198    if body.0.is_empty() {
199        return backtrack();
200    }
201    sep.parse_next(input)?;
202    if !input.starts_with('}') {
203        return backtrack();
204    }
205    let last_op = body.0.last().and_then(|s| s.op);
206    if last_op.is_none() {
207        return backtrack();
208    }
209    *input = &input[1..];
210    let redirs = trailing_redirs(input)?;
211    Ok(Cmd::BraceGroup { body, redirs })
212}
213
214// === Simple Command ===
215
216fn simple_cmd(input: &mut &str) -> ModalResult<SimpleCmd> {
217    let env: Vec<(String, Word)> =
218        repeat(0.., terminated(assignment, ws)).parse_next(input)?;
219    let mut words = Vec::new();
220    let mut redirs = Vec::new();
221
222    loop {
223        ws.parse_next(input)?;
224        if at_cmd_end(input) {
225            break;
226        }
227        if let Some(r) = opt(redirect).parse_next(input)? {
228            redirs.push(r);
229        } else if let Some(w) = opt(word).parse_next(input)? {
230            words.push(w);
231        } else {
232            break;
233        }
234    }
235
236    if env.is_empty() && words.is_empty() && redirs.is_empty() {
237        return backtrack();
238    }
239    Ok(SimpleCmd { env, words, redirs })
240}
241
242fn at_cmd_end(input: &str) -> bool {
243    input.is_empty()
244        || matches!(
245            input.as_bytes().first(),
246            Some(b'\n' | b';' | b'|' | b'&' | b')')
247        )
248}
249
250fn assignment(input: &mut &str) -> ModalResult<(String, Word)> {
251    let n: &str = take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_')
252        .parse_next(input)?;
253    '='.parse_next(input)?;
254    let value = opt(word)
255        .parse_next(input)?
256        .unwrap_or(Word(vec![WordPart::Lit(String::new())]));
257    Ok((n.to_string(), value))
258}
259
260// === Redirect ===
261
262fn redirect(input: &mut &str) -> ModalResult<Redir> {
263    let fd = opt(fd_prefix).parse_next(input)?;
264    alt((
265        preceded("<<<", (ws, word)).map(|(_, target)| Redir::HereStr(target)),
266        heredoc,
267        preceded(">>", (ws, word)).map(move |(_, target)| Redir::Write {
268            fd: fd.unwrap_or(1),
269            target,
270            append: true,
271        }),
272        preceded(">&", fd_target).map(move |dst| Redir::DupFd {
273            src: fd.unwrap_or(1),
274            dst,
275        }),
276        preceded('>', (ws, word)).map(move |(_, target)| Redir::Write {
277            fd: fd.unwrap_or(1),
278            target,
279            append: false,
280        }),
281        preceded('<', (ws, word)).map(move |(_, target)| Redir::Read {
282            fd: fd.unwrap_or(0),
283            target,
284        }),
285    ))
286    .parse_next(input)
287}
288
289fn heredoc(input: &mut &str) -> ModalResult<Redir> {
290    "<<".parse_next(input)?;
291    let strip_tabs = opt('-').parse_next(input)?.is_some();
292    ws.parse_next(input)?;
293    let delimiter = heredoc_delimiter.parse_next(input)?;
294    // Bash semantics: the heredoc body lives on lines AFTER the
295    // command line is finished, not immediately after `<<DELIM`. The
296    // command line can continue with more redirects, a pipe, etc.
297    // Push the delimiter onto a thread-local queue; the body is
298    // drained at the next `\n`/`;` separator by drain_pending_heredocs.
299    PENDING_HEREDOCS.with(|q| {
300        q.borrow_mut().push(PendingHeredoc {
301            delimiter: delimiter.clone(),
302            strip_tabs,
303        });
304    });
305    Ok(Redir::HereDoc { delimiter, strip_tabs })
306}
307
308#[derive(Debug, Clone)]
309struct PendingHeredoc {
310    delimiter: String,
311    strip_tabs: bool,
312}
313
314thread_local! {
315    static PENDING_HEREDOCS: std::cell::RefCell<Vec<PendingHeredoc>> =
316        const { std::cell::RefCell::new(Vec::new()) };
317}
318
319fn drain_pending_heredocs(input: &mut &str) {
320    let pending: Vec<PendingHeredoc> =
321        PENDING_HEREDOCS.with(|q| std::mem::take(&mut *q.borrow_mut()));
322    for h in pending {
323        if !skip_heredoc_body(input, &h.delimiter, h.strip_tabs) {
324            // Couldn't find the matching delimiter line. Leave input
325            // as-is; the parser will likely fail on the leftover body
326            // text, which is the safe outcome (we deny on parse fail).
327            return;
328        }
329    }
330}
331
332fn skip_heredoc_body(input: &mut &str, delimiter: &str, strip_tabs: bool) -> bool {
333    let s = *input;
334    let bytes = s.as_bytes();
335    let mut line_start = 0;
336    while line_start <= bytes.len() {
337        let line_end = match s[line_start..].find('\n') {
338            Some(rel) => line_start + rel,
339            None => bytes.len(),
340        };
341        let line_bytes = &bytes[line_start..line_end];
342        let line = if strip_tabs {
343            std::str::from_utf8(line_bytes)
344                .unwrap_or("")
345                .trim_start_matches('\t')
346        } else {
347            std::str::from_utf8(line_bytes).unwrap_or("")
348        };
349        if line == delimiter {
350            // Advance past the delimiter line + its newline.
351            let advance = line_end + usize::from(line_end < bytes.len());
352            *input = &s[advance..];
353            return true;
354        }
355        if line_end >= bytes.len() {
356            return false;
357        }
358        line_start = line_end + 1;
359    }
360    false
361}
362
363fn reset_heredoc_queue() {
364    PENDING_HEREDOCS.with(|q| q.borrow_mut().clear());
365}
366
367fn heredoc_delimiter(input: &mut &str) -> ModalResult<String> {
368    alt((
369        delimited('\'', take_while(0.., |c| c != '\''), '\'').map(|s: &str| s.to_string()),
370        delimited('"', take_while(0.., |c| c != '"'), '"').map(|s: &str| s.to_string()),
371        take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_').map(|s: &str| s.to_string()),
372    ))
373    .parse_next(input)
374}
375
376fn fd_prefix(input: &mut &str) -> ModalResult<u32> {
377    let b = input.as_bytes();
378    if b.len() >= 2 && b[0].is_ascii_digit() && matches!(b[1], b'>' | b'<') {
379        let d = (b[0] - b'0') as u32;
380        *input = &input[1..];
381        Ok(d)
382    } else {
383        backtrack()
384    }
385}
386
387fn fd_target(input: &mut &str) -> ModalResult<String> {
388    alt((
389        '-'.value("-".to_string()),
390        take_while(1.., |c: char| c.is_ascii_digit()).map(|s: &str| s.to_string()),
391    ))
392    .parse_next(input)
393}
394
395// === Word ===
396
397fn word(input: &mut &str) -> ModalResult<Word> {
398    repeat(1.., word_part)
399        .map(Word)
400        .parse_next(input)
401}
402
403fn word_part(input: &mut &str) -> ModalResult<WordPart> {
404    if input.is_empty() {
405        return backtrack();
406    }
407    if input.starts_with("<(") || input.starts_with(">(") {
408        return proc_sub(input);
409    }
410    if is_word_boundary(input.as_bytes()[0] as char) {
411        return backtrack();
412    }
413    alt((single_quoted, double_quoted, arith_sub, cmd_sub, backtick_part, escaped, dollar_lit(is_word_literal), lit(is_word_literal)))
414        .parse_next(input)
415}
416
417fn single_quoted(input: &mut &str) -> ModalResult<WordPart> {
418    delimited('\'', take_while(0.., |c| c != '\''), '\'')
419        .map(|s: &str| WordPart::SQuote(s.to_string()))
420        .parse_next(input)
421}
422
423fn double_quoted(input: &mut &str) -> ModalResult<WordPart> {
424    delimited('"', repeat(0.., dq_part).map(Word), '"')
425        .map(WordPart::DQuote)
426        .parse_next(input)
427}
428
429fn cmd_sub(input: &mut &str) -> ModalResult<WordPart> {
430    delimited(("$(", ws), script, (ws, ')'))
431        .map(WordPart::CmdSub)
432        .parse_next(input)
433}
434
435fn proc_sub(input: &mut &str) -> ModalResult<WordPart> {
436    if !(input.starts_with("<(") || input.starts_with(">(")) {
437        return backtrack();
438    }
439    *input = &input[1..];
440    delimited(('(', ws), script, (ws, ')'))
441        .map(WordPart::ProcSub)
442        .parse_next(input)
443}
444
445fn arith_sub(input: &mut &str) -> ModalResult<WordPart> {
446    if !input.starts_with("$((") {
447        return backtrack();
448    }
449    let body_start = 3;
450    let bytes = input.as_bytes();
451    let mut depth: i32 = 1;
452    let mut i = body_start;
453    while i < bytes.len() {
454        match bytes[i] {
455            b'(' => depth += 1,
456            b')' => {
457                if depth == 1 && i + 1 < bytes.len() && bytes[i + 1] == b')' {
458                    let body = input[body_start..i].to_string();
459                    if body.contains("$(") || body.contains('`') {
460                        return backtrack();
461                    }
462                    *input = &input[i + 2..];
463                    return Ok(WordPart::Arith(body));
464                }
465                depth -= 1;
466                if depth < 0 {
467                    return backtrack();
468                }
469            }
470            _ => {}
471        }
472        i += 1;
473    }
474    backtrack()
475}
476
477fn backtick_part(input: &mut &str) -> ModalResult<WordPart> {
478    delimited('`', backtick_inner, '`')
479        .map(WordPart::Backtick)
480        .parse_next(input)
481}
482
483fn escaped(input: &mut &str) -> ModalResult<WordPart> {
484    preceded('\\', any).map(WordPart::Escape).parse_next(input)
485}
486
487fn lit(pred: fn(char) -> bool) -> impl FnMut(&mut &str) -> ModalResult<WordPart> {
488    move |input: &mut &str| {
489        take_while(1.., pred)
490            .map(|s: &str| WordPart::Lit(s.to_string()))
491            .parse_next(input)
492    }
493}
494
495fn dollar_lit(pred: fn(char) -> bool) -> impl FnMut(&mut &str) -> ModalResult<WordPart> {
496    move |input: &mut &str| {
497        ('$', not('(')).void().parse_next(input)?;
498        let rest: &str = take_while(0.., pred).parse_next(input)?;
499        Ok(WordPart::Lit(format!("${rest}")))
500    }
501}
502
503// === Double-quoted parts ===
504
505fn dq_part(input: &mut &str) -> ModalResult<WordPart> {
506    if input.is_empty() || input.starts_with('"') {
507        return backtrack();
508    }
509    alt((dq_escape, arith_sub, cmd_sub, backtick_part, dollar_lit(is_dq_literal), lit(is_dq_literal)))
510        .parse_next(input)
511}
512
513fn dq_escape(input: &mut &str) -> ModalResult<WordPart> {
514    preceded('\\', any)
515        .map(|c: char| match c {
516            '"' | '\\' | '$' | '`' => WordPart::Escape(c),
517            _ => WordPart::Lit(format!("\\{c}")),
518        })
519        .parse_next(input)
520}
521
522// === Backtick inner content ===
523
524fn backtick_inner(input: &mut &str) -> ModalResult<String> {
525    repeat(0.., alt((bt_escape, bt_literal)))
526        .fold(String::new, |mut acc, chunk: &str| {
527            acc.push_str(chunk);
528            acc
529        })
530        .parse_next(input)
531}
532
533fn bt_escape<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
534    ('\\', any).take().parse_next(input)
535}
536
537fn bt_literal<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
538    take_while(1.., |c: char| c != '`' && c != '\\').parse_next(input)
539}
540
541// === Compound Commands ===
542
543fn for_cmd(input: &mut &str) -> ModalResult<Cmd> {
544    eat_keyword(input, "for")?;
545    ws.parse_next(input)?;
546    let var = name.parse_next(input)?;
547    ws.parse_next(input)?;
548
549    let items = if eat_keyword(input, "in").is_ok() {
550        ws.parse_next(input)?;
551        repeat(0.., terminated(word, ws)).parse_next(input)?
552    } else {
553        vec![]
554    };
555
556    let body = do_done_body.parse_next(input)?;
557    Ok(Cmd::For { var, items, body })
558}
559
560fn while_cmd(input: &mut &str) -> ModalResult<Cmd> {
561    eat_keyword(input, "while")?;
562    ws.parse_next(input)?;
563    let cond = script.parse_next(input)?;
564    let body = do_done_body.parse_next(input)?;
565    Ok(Cmd::While { cond, body })
566}
567
568fn until_cmd(input: &mut &str) -> ModalResult<Cmd> {
569    eat_keyword(input, "until")?;
570    ws.parse_next(input)?;
571    let cond = script.parse_next(input)?;
572    let body = do_done_body.parse_next(input)?;
573    Ok(Cmd::Until { cond, body })
574}
575
576fn do_done_body(input: &mut &str) -> ModalResult<Script> {
577    sep.parse_next(input)?;
578    eat_keyword(input, "do")?;
579    sep.parse_next(input)?;
580    let body = script.parse_next(input)?;
581    sep.parse_next(input)?;
582    eat_keyword(input, "done")?;
583    Ok(body)
584}
585
586fn if_cmd(input: &mut &str) -> ModalResult<Cmd> {
587    eat_keyword(input, "if")?;
588    ws.parse_next(input)?;
589    let mut branches = vec![cond_then_body.parse_next(input)?];
590    let mut else_body = None;
591
592    loop {
593        sep.parse_next(input)?;
594        if eat_keyword(input, "elif").is_ok() {
595            ws.parse_next(input)?;
596            branches.push(cond_then_body.parse_next(input)?);
597        } else if eat_keyword(input, "else").is_ok() {
598            sep.parse_next(input)?;
599            else_body = Some(script.parse_next(input)?);
600            break;
601        } else {
602            break;
603        }
604    }
605
606    sep.parse_next(input)?;
607    eat_keyword(input, "fi")?;
608    Ok(Cmd::If { branches, else_body })
609}
610
611fn cond_then_body(input: &mut &str) -> ModalResult<Branch> {
612    let cond = script.parse_next(input)?;
613    sep.parse_next(input)?;
614    eat_keyword(input, "then")?;
615    sep.parse_next(input)?;
616    let body = script.parse_next(input)?;
617    Ok(Branch { cond, body })
618}
619
620fn double_bracket_cmd(input: &mut &str) -> ModalResult<Cmd> {
621    if !input.starts_with("[[") {
622        return backtrack();
623    }
624    let bytes = input.as_bytes();
625    if bytes.len() < 3 || !matches!(bytes[2], b' ' | b'\t' | b'\n') {
626        return backtrack();
627    }
628    *input = &input[2..];
629
630    let mut words: Vec<Word> = Vec::new();
631    loop {
632        ws.parse_next(input)?;
633        if at_double_bracket_end(input) {
634            *input = &input[2..];
635            let redirs = trailing_redirs(input)?;
636            return Ok(Cmd::DoubleBracket { words, redirs });
637        }
638        if input.is_empty() {
639            return backtrack();
640        }
641        let w = bracket_word.parse_next(input)?;
642        words.push(w);
643    }
644}
645
646fn at_double_bracket_end(input: &str) -> bool {
647    if !input.starts_with("]]") {
648        return false;
649    }
650    let after = &input[2..];
651    after.is_empty()
652        || after.starts_with(|c: char| {
653            matches!(c, ' ' | '\t' | '\n' | ';' | '&' | '|' | ')' | '>' | '<')
654        })
655}
656
657fn bracket_word(input: &mut &str) -> ModalResult<Word> {
658    repeat(1.., bracket_word_part).map(Word).parse_next(input)
659}
660
661fn bracket_word_part(input: &mut &str) -> ModalResult<WordPart> {
662    if input.is_empty() {
663        return backtrack();
664    }
665    if matches!(input.as_bytes()[0], b' ' | b'\t' | b'\n') {
666        return backtrack();
667    }
668    if at_double_bracket_end(input) {
669        return backtrack();
670    }
671    alt((
672        single_quoted,
673        double_quoted,
674        arith_sub,
675        cmd_sub,
676        backtick_part,
677        escaped,
678        dollar_lit(is_bracket_literal),
679        bracket_lit,
680    ))
681    .parse_next(input)
682}
683
684fn is_bracket_literal(c: char) -> bool {
685    !matches!(c, '\'' | '"' | '`' | '\\' | '$' | ' ' | '\t' | '\n')
686}
687
688fn bracket_lit(input: &mut &str) -> ModalResult<WordPart> {
689    // Byte-by-byte scan relies on every stop char being single-byte ASCII —
690    // multibyte UTF-8 continuation bytes always pass `is_bracket_literal` and
691    // get consumed as part of the same `Lit`, so `end` only lands on a char
692    // boundary.
693    let bytes = input.as_bytes();
694    let mut end = 0;
695    while end < bytes.len() {
696        let c = bytes[end] as char;
697        if !is_bracket_literal(c) {
698            break;
699        }
700        if c == ']' && at_double_bracket_end(&input[end..]) {
701            break;
702        }
703        end += 1;
704    }
705    if end == 0 {
706        return backtrack();
707    }
708    let lit = input[..end].to_string();
709    *input = &input[end..];
710    Ok(WordPart::Lit(lit))
711}
712
713fn name(input: &mut &str) -> ModalResult<String> {
714    take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_')
715        .map(|s: &str| s.to_string())
716        .parse_next(input)
717}
718
719#[cfg(test)]
720mod tests {
721    use super::*;
722
723    fn p(input: &str) -> Script {
724        parse(input).unwrap_or_else(|| panic!("failed to parse: {input}"))
725    }
726
727    fn words(script: &Script) -> Vec<String> {
728        match &script.0[0].pipeline.commands[0] {
729            Cmd::Simple(s) => s.words.iter().map(|w| w.eval()).collect(),
730            _ => panic!("expected simple command"),
731        }
732    }
733
734    fn simple(script: &Script) -> &SimpleCmd {
735        match &script.0[0].pipeline.commands[0] {
736            Cmd::Simple(s) => s,
737            _ => panic!("expected simple command"),
738        }
739    }
740
741    #[test]
742    fn simple_command() { assert_eq!(words(&p("echo hello")), ["echo", "hello"]); }
743    #[test]
744    fn flags() { assert_eq!(words(&p("ls -la")), ["ls", "-la"]); }
745    #[test]
746    fn single_quoted() { assert_eq!(words(&p("echo 'hello world'")), ["echo", "hello world"]); }
747    #[test]
748    fn double_quoted() { assert_eq!(words(&p("echo \"hello world\"")), ["echo", "hello world"]); }
749    #[test]
750    fn mixed_quotes() { assert_eq!(words(&p("jq '.key' file.json")), ["jq", ".key", "file.json"]); }
751
752    #[test]
753    fn pipeline_test() { assert_eq!(p("grep foo | head -5").0[0].pipeline.commands.len(), 2); }
754    #[test]
755    fn sequence_and() { assert_eq!(p("ls && echo done").0[0].op, Some(ListOp::And)); }
756    #[test]
757    fn sequence_semi() { assert_eq!(p("ls; echo done").0.len(), 2); }
758    #[test]
759    fn newline_separator() { assert_eq!(p("echo foo\necho bar").0.len(), 2); }
760    #[test]
761    fn blank_line_between_statements() { assert_eq!(p("echo foo\n\necho bar").0.len(), 2); }
762    #[test]
763    fn multiple_blank_lines() { assert_eq!(p("echo foo\n\n\n\necho bar").0.len(), 2); }
764    #[test]
765    fn blank_line_with_whitespace() { assert_eq!(p("echo foo\n   \necho bar").0.len(), 2); }
766    #[test]
767    fn comment_between_statements() { assert_eq!(p("echo foo\n# comment\necho bar").0.len(), 2); }
768    #[test]
769    fn semi_then_blank() { assert_eq!(p("echo foo;\n\necho bar").0.len(), 2); }
770    #[test]
771    fn and_then_blank() { assert_eq!(p("echo foo &&\n\necho bar").0.len(), 2); }
772
773    #[test]
774    fn brace_group_simple() {
775        assert!(matches!(
776            &p("{ echo hello; }").0[0].pipeline.commands[0],
777            Cmd::BraceGroup { body, redirs } if body.0.len() == 1 && redirs.is_empty()
778        ));
779    }
780    #[test]
781    fn brace_group_multiple_stmts() {
782        if let Cmd::BraceGroup { body, .. } = &p("{ echo a; echo b; echo c; }").0[0].pipeline.commands[0] {
783            assert_eq!(body.0.len(), 3);
784        } else { panic!("expected BraceGroup"); }
785    }
786    #[test]
787    fn brace_group_with_redirect() {
788        if let Cmd::BraceGroup { redirs, .. } = &p("{ echo a; echo b; } > /tmp/out.txt").0[0].pipeline.commands[0] {
789            assert_eq!(redirs.len(), 1);
790            assert!(matches!(redirs[0], Redir::Write { .. }));
791        } else { panic!("expected BraceGroup"); }
792    }
793    #[test]
794    fn brace_group_with_append_redirect() {
795        if let Cmd::BraceGroup { redirs, .. } = &p("{ echo a; } >> log.txt").0[0].pipeline.commands[0] {
796            assert!(matches!(redirs[0], Redir::Write { append: true, .. }));
797        } else { panic!("expected BraceGroup"); }
798    }
799    #[test]
800    fn brace_group_with_stderr_redirect() {
801        if let Cmd::BraceGroup { redirs, .. } = &p("{ echo a; } 2>&1").0[0].pipeline.commands[0] {
802            assert!(matches!(redirs[0], Redir::DupFd { src: 2, .. }));
803        } else { panic!("expected BraceGroup"); }
804    }
805    #[test]
806    fn brace_group_newline_separated() {
807        if let Cmd::BraceGroup { body, .. } = &p("{\n  echo a\n  echo b\n}").0[0].pipeline.commands[0] {
808            assert_eq!(body.0.len(), 2);
809        } else { panic!("expected BraceGroup"); }
810    }
811    #[test]
812    fn brace_group_in_pipeline() {
813        let pl = &p("{ echo a; echo b; } | grep a").0[0].pipeline;
814        assert_eq!(pl.commands.len(), 2);
815        assert!(matches!(&pl.commands[0], Cmd::BraceGroup { .. }));
816    }
817    #[test]
818    fn brace_group_followed_by_other() {
819        let stmts = &p("{ echo a; }; echo b").0;
820        assert_eq!(stmts.len(), 2);
821        assert!(matches!(&stmts[0].pipeline.commands[0], Cmd::BraceGroup { .. }));
822    }
823    #[test]
824    fn brace_group_nested() {
825        if let Cmd::BraceGroup { body, .. } = &p("{ { echo inner; }; echo outer; }").0[0].pipeline.commands[0] {
826            assert_eq!(body.0.len(), 2);
827            assert!(matches!(&body.0[0].pipeline.commands[0], Cmd::BraceGroup { .. }));
828        } else { panic!("expected outer BraceGroup"); }
829    }
830    #[test]
831    fn brace_group_with_subshell_inside() {
832        if let Cmd::BraceGroup { body, .. } = &p("{ (echo sub); echo grp; }").0[0].pipeline.commands[0] {
833            assert_eq!(body.0.len(), 2);
834            assert!(matches!(&body.0[0].pipeline.commands[0], Cmd::Subshell { .. }));
835        } else { panic!("expected BraceGroup"); }
836    }
837    #[test]
838    fn brace_open_requires_whitespace() {
839        // {echo (no space) is NOT a brace group; it's a literal word
840        // that becomes part of a simple command. Parser should not
841        // treat it as a brace group.
842        let cmds = &p("{echo a}").0;
843        // Either parsed as a simple_cmd with a literal `{echo` token,
844        // or fails. Either way, it should NOT be a BraceGroup.
845        if !cmds.is_empty() {
846            assert!(!matches!(&cmds[0].pipeline.commands[0], Cmd::BraceGroup { .. }));
847        }
848    }
849    #[test]
850    fn subshell_with_redirect() {
851        if let Cmd::Subshell { redirs, .. } = &p("(echo hello) > /tmp/out.txt").0[0].pipeline.commands[0] {
852            assert_eq!(redirs.len(), 1);
853        } else { panic!("expected Subshell with redir"); }
854    }
855    #[test]
856    fn background() { assert_eq!(p("ls & echo done").0[0].op, Some(ListOp::Amp)); }
857
858    #[test]
859    fn redirect_dev_null() {
860        let s = p("echo hello > /dev/null");
861        let cmd = simple(&s);
862        assert_eq!(cmd.words.len(), 2);
863        assert!(matches!(&cmd.redirs[0], Redir::Write { fd: 1, append: false, .. }));
864    }
865    #[test]
866    fn redirect_stderr() {
867        assert!(matches!(&simple(&p("echo hello 2>&1")).redirs[0], Redir::DupFd { src: 2, dst } if dst == "1"));
868    }
869    #[test]
870    fn here_string() {
871        assert!(matches!(&simple(&p("grep -c , <<< 'hello,world,test'")).redirs[0], Redir::HereStr(_)));
872    }
873    #[test]
874    fn heredoc_bare() {
875        assert!(matches!(&simple(&p("cat <<EOF")).redirs[0], Redir::HereDoc { delimiter, strip_tabs: false } if delimiter == "EOF"));
876    }
877    #[test]
878    fn heredoc_with_content() {
879        let s = p("cat <<EOF\nhello world\nEOF");
880        assert!(matches!(&simple(&s).redirs[0], Redir::HereDoc { delimiter, .. } if delimiter == "EOF"));
881    }
882    #[test]
883    fn heredoc_quoted_delimiter() {
884        assert!(matches!(&simple(&p("cat <<'EOF'")).redirs[0], Redir::HereDoc { delimiter, .. } if delimiter == "EOF"));
885    }
886    #[test]
887    fn heredoc_strip_tabs() {
888        assert!(matches!(&simple(&p("cat <<-EOF")).redirs[0], Redir::HereDoc { strip_tabs: true, .. }));
889    }
890    #[test]
891    fn heredoc_pipe_on_command_line() {
892        // Correct bash: pipe is on the command line BEFORE the body,
893        // body terminator is on its own line.
894        let s = p("cat <<EOF | grep hello\nhello\nEOF");
895        assert_eq!(s.0[0].pipeline.commands.len(), 2);
896    }
897    #[test]
898    fn heredoc_body_does_not_swallow_pipe() {
899        // Regression for the `cat <<EOF | bash\n...\nEOF` bypass: the
900        // heredoc parser must NOT consume the pipe + downstream
901        // commands as part of the body.
902        let s = p("cat <<EOF | bash\nrm\nEOF");
903        assert_eq!(
904            s.0[0].pipeline.commands.len(),
905            2,
906            "pipeline must keep `bash` as a second command"
907        );
908    }
909    #[test]
910    fn heredoc_followed_by_next_statement() {
911        // After the heredoc body terminator, the script can continue
912        // with another statement.
913        let s = p("cat <<EOF\nhello\nEOF\nls");
914        assert_eq!(s.0.len(), 2);
915    }
916
917    #[test]
918    fn env_prefix() {
919        let s = p("FOO='bar baz' ls -la");
920        let cmd = simple(&s);
921        assert_eq!(cmd.env[0].0, "FOO");
922        assert_eq!(cmd.env[0].1.eval(), "bar baz");
923    }
924    #[test]
925    fn cmd_substitution() { assert!(matches!(&simple(&p("echo $(ls)")).words[1].0[0], WordPart::CmdSub(_))); }
926    #[test]
927    fn backtick_substitution() { assert_eq!(simple(&p("ls `pwd`")).words[1].eval(), "__SAFE_CHAINS_SUB__"); }
928    #[test]
929    fn nested_substitution() {
930        if let WordPart::CmdSub(inner) = &simple(&p("echo $(echo $(ls))")).words[1].0[0] {
931            assert!(matches!(&simple(inner).words[1].0[0], WordPart::CmdSub(_)));
932        } else { panic!("expected CmdSub"); }
933    }
934
935    #[test]
936    fn subshell_test() { assert!(matches!(&p("(echo hello)").0[0].pipeline.commands[0], Cmd::Subshell { .. })); }
937    #[test]
938    fn negation() { assert!(p("! echo hello").0[0].pipeline.bang); }
939
940    #[test]
941    fn for_loop() { assert!(matches!(&p("for x in 1 2 3; do echo $x; done").0[0].pipeline.commands[0], Cmd::For { var, .. } if var == "x")); }
942    #[test]
943    fn while_loop() { assert!(matches!(&p("while test -f /tmp/foo; do sleep 1; done").0[0].pipeline.commands[0], Cmd::While { .. })); }
944    #[test]
945    fn if_then_fi() {
946        if let Cmd::If { branches, else_body } = &p("if test -f foo; then echo exists; fi").0[0].pipeline.commands[0] {
947            assert_eq!(branches.len(), 1);
948            assert!(else_body.is_none());
949        } else { panic!("expected If"); }
950    }
951    #[test]
952    fn if_elif_else() {
953        if let Cmd::If { branches, else_body } = &p("if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi").0[0].pipeline.commands[0] {
954            assert_eq!(branches.len(), 2);
955            assert!(else_body.is_some());
956        } else { panic!("expected If"); }
957    }
958
959    #[test]
960    fn escaped_outside_quotes() { assert_eq!(words(&p("echo hello\\ world")), ["echo", "hello world"]); }
961    #[test]
962    fn double_quoted_escape() { assert_eq!(words(&p("echo \"hello\\\"world\"")), ["echo", "hello\"world"]); }
963    #[test]
964    fn assign_subst() { assert_eq!(simple(&p("out=$(ls)")).env[0].0, "out"); }
965
966    #[test]
967    fn unmatched_single_quote_fails() { assert!(parse("echo 'hello").is_none()); }
968    #[test]
969    fn unmatched_double_quote_fails() { assert!(parse("echo \"hello").is_none()); }
970    #[test]
971    fn unclosed_subshell_fails() { assert!(parse("(echo hello").is_none()); }
972    #[test]
973    fn unclosed_cmd_sub_fails() { assert!(parse("echo $(ls").is_none()); }
974    #[test]
975    fn for_missing_do_fails() { assert!(parse("for x in 1 2 3; echo $x; done").is_none()); }
976    #[test]
977    fn if_missing_fi_fails() { assert!(parse("if true; then echo hello").is_none()); }
978
979    #[test]
980    fn subshell_for() {
981        if let Cmd::Subshell { body, .. } = &p("(for x in 1 2; do echo $x; done)").0[0].pipeline.commands[0] {
982            assert!(matches!(&body.0[0].pipeline.commands[0], Cmd::For { .. }));
983        } else { panic!("expected Subshell"); }
984    }
985    #[test]
986    fn proc_sub_input() {
987        let s = p("diff <(sort a.txt) <(sort b.txt)");
988        let cmd = simple(&s);
989        assert_eq!(cmd.words.len(), 3);
990        assert!(matches!(&cmd.words[1].0[0], WordPart::ProcSub(_)));
991        assert!(matches!(&cmd.words[2].0[0], WordPart::ProcSub(_)));
992    }
993    #[test]
994    fn proc_sub_output() {
995        let s = p("tee >(grep error > /dev/null)");
996        let cmd = simple(&s);
997        assert_eq!(cmd.words.len(), 2);
998        assert!(matches!(&cmd.words[1].0[0], WordPart::ProcSub(_)));
999    }
1000    #[test]
1001    fn comment_only() {
1002        let s = p("# just a comment");
1003        assert!(s.0.is_empty());
1004    }
1005    #[test]
1006    fn comment_before_command() {
1007        let s = p("# comment\necho hello");
1008        assert_eq!(words(&s), ["echo", "hello"]);
1009    }
1010    #[test]
1011    fn inline_comment() {
1012        let s = p("echo hello # this is a comment");
1013        assert_eq!(words(&s), ["echo", "hello"]);
1014    }
1015    #[test]
1016    fn comment_between_commands() {
1017        let s = p("echo hello\n# middle comment\necho world");
1018        assert_eq!(s.0.len(), 2);
1019    }
1020    #[test]
1021    fn comment_after_semicolon() {
1022        let s = p("echo hello; # comment\necho world");
1023        assert_eq!(s.0.len(), 2);
1024    }
1025    #[test]
1026    fn comment_in_for_loop() {
1027        assert!(parse("for x in 1 2; do\n# loop body\necho $x\ndone").is_some());
1028    }
1029    #[test]
1030    fn quoted_redirect_in_echo() {
1031        let s = p("echo 'greater > than' test");
1032        let cmd = simple(&s);
1033        assert_eq!(cmd.words.len(), 3);
1034        assert_eq!(cmd.redirs.len(), 0);
1035    }
1036
1037    #[test]
1038    fn parses_all_safe_commands() {
1039        let cmds = [
1040            "grep foo file.txt", "cat /etc/hosts", "jq '.key' file.json", "base64 -d",
1041            "ls -la", "wc -l file.txt", "ps aux", "echo hello", "cat file.txt",
1042            "echo $(ls)", "ls `pwd`", "echo $(echo $(ls))", "echo \"$(ls)\"",
1043            "out=$(ls)", "out=$(git status)", "a=$(ls) b=$(pwd)",
1044            "(echo hello)", "(ls)", "(ls && echo done)", "(echo hello; echo world)",
1045            "(ls | grep foo)", "(echo hello) | grep hello", "(ls) && echo done",
1046            "((echo hello))", "(for x in 1 2; do echo $x; done)",
1047            "echo 'greater > than' test", "echo '$(safe)' arg",
1048            "FOO='bar baz' ls -la", "FOO=\"bar baz\" ls -la",
1049            "RACK_ENV=test bundle exec rspec spec/foo_spec.rb",
1050            "grep foo file.txt | head -5", "cat file | sort | uniq",
1051            "ls && echo done", "ls; echo done", "ls & echo done",
1052            "grep -c , <<< 'hello,world,test'",
1053            "cat <<EOF\nhello world\nEOF",
1054            "cat <<'MARKER'\nsome text\nMARKER",
1055            "cat <<-EOF\n\thello\nEOF",
1056            "echo foo\necho bar", "ls\ncat file.txt",
1057            "git log --oneline -20 | head -5",
1058            "echo hello > /dev/null", "echo hello 2> /dev/null",
1059            "echo hello >> /dev/null", "git log > /dev/null 2>&1",
1060            "ls 2>&1", "cargo clippy 2>&1", "git log < /dev/null",
1061            "for x in 1 2 3; do echo $x; done",
1062            "for f in *.txt; do cat $f | grep pattern; done",
1063            "for x in 1 2 3; do; done",
1064            "for x in 1 2; do echo $x; done; for y in a b; do echo $y; done",
1065            "for x in 1 2; do for y in a b; do echo $x $y; done; done",
1066            "for x in 1 2; do echo $x; done && echo finished",
1067            "for x in $(seq 1 5); do echo $x; done",
1068            "while test -f /tmp/foo; do sleep 1; done",
1069            "while ! test -f /tmp/done; do sleep 1; done",
1070            "until test -f /tmp/ready; do sleep 1; done",
1071            "if test -f foo; then echo exists; fi",
1072            "if test -f foo; then echo yes; else echo no; fi",
1073            "if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi",
1074            "for x in 1 2; do if test $x = 1; then echo one; fi; done",
1075            "if true; then for x in 1 2; do echo $x; done; fi",
1076            "diff <(sort a.txt) <(sort b.txt)",
1077            "comm -23 file.txt <(sort other.txt)",
1078            "cat <(echo hello)",
1079            "# comment only",
1080            "# comment\necho hello",
1081            "echo hello # inline comment",
1082            "echo one\n# between\necho two",
1083            "! echo hello", "! test -f foo",
1084            "echo for; echo done; echo if; echo fi",
1085        ];
1086        let mut failures = Vec::new();
1087        for cmd in &cmds {
1088            if parse(cmd).is_none() { failures.push(*cmd); }
1089        }
1090        assert!(failures.is_empty(), "failed on {} commands:\n{}", failures.len(), failures.join("\n"));
1091    }
1092}