Skip to main content

reef/
parser.rs

1//! Recursive-descent parser for bash syntax.
2//!
3//! Produces an AST of [`Cmd`] nodes that borrow from the input string.
4//! Uses Pratt parsing for arithmetic expressions.
5
6use std::borrow::Cow;
7
8use crate::ast::*;
9use crate::lexer::{Lexer, ParseError, is_meta};
10
11/// Recursive-descent parser for bash syntax. Produces an AST of [`Cmd`] nodes.
12pub struct Parser<'a> {
13    lex: Lexer<'a>,
14    heredoc_resume: Option<usize>,
15}
16
17impl<'a> Parser<'a> {
18    /// Create a parser for the given bash input.
19    ///
20    /// # Examples
21    ///
22    /// ```
23    /// use reef::parser::Parser;
24    /// let parser = Parser::new("echo hello && echo world");
25    /// let cmds = parser.parse().unwrap();
26    /// assert_eq!(cmds.len(), 1); // one and-or list
27    /// ```
28    #[must_use]
29    pub fn new(input: &'a str) -> Self {
30        Parser {
31            lex: Lexer::new(input),
32            heredoc_resume: None,
33        }
34    }
35
36    /// Parse the input into a list of commands.
37    ///
38    /// Returns a `Vec<Cmd>` representing the top-level command list. Each
39    /// command borrows from the input string — no copying occurs.
40    ///
41    /// # Errors
42    ///
43    /// Returns [`ParseError`] when the input contains invalid or unsupported
44    /// bash syntax — for example, unmatched delimiters, unexpected tokens,
45    /// or unterminated strings.
46    ///
47    /// # Panics
48    ///
49    /// Panics (via internal `.expect()`) if the parser's own invariants are
50    /// violated — for example, consuming a single-element `Vec` that was
51    /// just checked to have exactly one item. These are logic errors, not
52    /// input-dependent, so well-formed callers will never trigger them.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// use reef::parser::Parser;
58    /// let cmds = Parser::new("echo hello").parse().unwrap();
59    /// assert_eq!(cmds.len(), 1);
60    /// ```
61    #[must_use = "parsing produces a result that should be inspected"]
62    pub fn parse(mut self) -> Result<Vec<Cmd<'a>>, ParseError> {
63        self.cmd_list(&[])
64    }
65
66    /// Parse a heredoc body with variable/command expansion (unquoted delimiter).
67    /// Similar to double-quoted parsing but stops at EOF.
68    pub(crate) fn parse_heredoc_body(mut self) -> Result<Vec<Atom<'a>>, ParseError> {
69        let mut atoms = Vec::new();
70        let mut lit_start = self.lex.pos();
71
72        while !self.lex.is_eof() {
73            match self.lex.peek() {
74                b'$' => {
75                    if self.lex.pos() > lit_start {
76                        atoms.push(Atom::Lit(self.lex.slice(lit_start)));
77                    }
78                    atoms.push(self.dollar()?);
79                    lit_start = self.lex.pos();
80                }
81                b'\\' => {
82                    // In heredocs, only \$, \\, \`, and \newline are special
83                    let next = self.lex.peek_at(1);
84                    if matches!(next, b'$' | b'\\' | b'`' | b'\n') {
85                        if self.lex.pos() > lit_start {
86                            atoms.push(Atom::Lit(self.lex.slice(lit_start)));
87                        }
88                        self.lex.bump(); // skip backslash
89                        if self.lex.peek() == b'\n' {
90                            // line continuation — skip newline
91                            self.lex.bump();
92                        } else {
93                            let esc_start = self.lex.pos();
94                            self.lex.bump();
95                            atoms.push(Atom::Escaped(Cow::Borrowed(self.lex.slice(esc_start))));
96                        }
97                        lit_start = self.lex.pos();
98                    } else {
99                        // Not a special escape — keep the backslash as literal
100                        self.lex.bump();
101                    }
102                }
103                b'`' => {
104                    if self.lex.pos() > lit_start {
105                        atoms.push(Atom::Lit(self.lex.slice(lit_start)));
106                    }
107                    atoms.push(self.backtick()?);
108                    lit_start = self.lex.pos();
109                }
110                _ => {
111                    self.lex.bump();
112                }
113            }
114        }
115
116        if self.lex.pos() > lit_start {
117            atoms.push(Atom::Lit(self.lex.slice(lit_start)));
118        }
119        Ok(atoms)
120    }
121
122    // -----------------------------------------------------------------------
123    // Command lists
124    // -----------------------------------------------------------------------
125
126    /// Parse a sequence of commands separated by `;`, `\n`, or `&`.
127    /// Stops when a keyword in `terminators` is found or at EOF.
128    fn cmd_list(&mut self, terminators: &[&[u8]]) -> Result<Vec<Cmd<'a>>, ParseError> {
129        let mut cmds = Vec::new();
130        loop {
131            self.skip_separators();
132            if self.lex.is_eof() {
133                break;
134            }
135            if !terminators.is_empty() && self.lex.at_any_keyword(terminators) {
136                break;
137            }
138            if self.lex.peek() == b'#' {
139                self.lex.skip_comment();
140                continue;
141            }
142            let before = self.lex.pos();
143            cmds.push(self.cmd()?);
144            if self.lex.pos() == before {
145                return Err(self.lex.err("unexpected token"));
146            }
147        }
148        Ok(cmds)
149    }
150
151    /// Parse a single complete command (and-or list, possibly backgrounded).
152    fn cmd(&mut self) -> Result<Cmd<'a>, ParseError> {
153        let list = self.and_or()?;
154        self.lex.skip_blanks();
155        // If a heredoc was encountered, jump past its body
156        if let Some(pos) = self.heredoc_resume.take() {
157            self.lex.set_pos(pos);
158        }
159        if self.lex.eat(b'&') && self.lex.peek() != b'&' && self.lex.peek() != b'>' {
160            Ok(Cmd::Job(list))
161        } else {
162            Ok(Cmd::List(list))
163        }
164    }
165
166    /// Parse an and-or list: `pipeline ( && pipeline | || pipeline )*`
167    fn and_or(&mut self) -> Result<AndOrList<'a>, ParseError> {
168        let first = self.pipeline()?;
169        let mut rest = Vec::new();
170        loop {
171            self.lex.skip_blanks();
172            if self.lex.peek() == b'&' && self.lex.peek_at(1) == b'&' {
173                self.lex.bump_n(2);
174                self.skip_separators();
175                rest.push(AndOr::And(self.pipeline()?));
176            } else if self.lex.peek() == b'|' && self.lex.peek_at(1) == b'|' {
177                self.lex.bump_n(2);
178                self.skip_separators();
179                rest.push(AndOr::Or(self.pipeline()?));
180            } else {
181                break;
182            }
183        }
184        Ok(AndOrList { first, rest })
185    }
186
187    /// Parse a pipeline: `[!] executable ( | executable )*`
188    fn pipeline(&mut self) -> Result<Pipeline<'a>, ParseError> {
189        self.lex.skip_blanks();
190        let negated = self.lex.peek() == b'!' && is_meta(self.lex.peek_at(1));
191        if negated {
192            self.lex.bump();
193            self.lex.skip_blanks();
194        }
195        let first = self.executable()?;
196        self.lex.skip_blanks();
197        if self.lex.peek() != b'|' || self.lex.peek_at(1) == b'|' {
198            return if negated {
199                Ok(Pipeline::Pipe(true, vec![first]))
200            } else {
201                Ok(Pipeline::Single(first))
202            };
203        }
204        let mut cmds = vec![first];
205        while self.lex.peek() == b'|' && self.lex.peek_at(1) != b'|' {
206            // Check for |& (pipe stderr too) — treat as 2>&1 |
207            let pipe_stderr = self.lex.peek_at(1) == b'&';
208            self.lex.bump(); // skip |
209            if pipe_stderr {
210                self.lex.bump(); // skip &
211                // Add 2>&1 redirect to previous command
212                let redir_2to1 =
213                    Redir::DupWrite(Some(2), Word::Simple(WordPart::Bare(Atom::Lit("1"))));
214                Self::add_redirect_to_exec(
215                    cmds.last_mut().expect("pipe has at least one command"),
216                    redir_2to1,
217                );
218            }
219            self.skip_separators();
220            cmds.push(self.executable()?);
221            self.lex.skip_blanks();
222        }
223        Ok(Pipeline::Pipe(negated, cmds))
224    }
225
226    fn add_redirect_to_exec(exec: &mut Executable<'a>, redir: Redir<'a>) {
227        match exec {
228            Executable::Simple(cmd) => {
229                cmd.suffix.push(CmdSuffix::Redirect(redir));
230            }
231            Executable::Compound(cmd) | Executable::FuncDef(_, cmd) => {
232                cmd.redirects.push(redir);
233            }
234        }
235    }
236
237    /// Parse a single executable: compound command, function def, or simple command.
238    fn executable(&mut self) -> Result<Executable<'a>, ParseError> {
239        self.lex.skip_blanks();
240
241        // Standalone (( )) arithmetic
242        if self.lex.peek() == b'(' && self.lex.peek_at(1) == b'(' {
243            let kind = self.standalone_arith()?;
244            return self.wrap_compound(kind);
245        }
246
247        // Compound commands by keyword / delimiter
248        let b = self.lex.peek();
249        if b == b'{' && is_meta(self.lex.peek_at(1)) {
250            let kind = self.brace_group()?;
251            return self.wrap_compound(kind);
252        }
253        if b == b'(' && self.lex.peek_at(1) != b'(' {
254            let kind = self.subshell()?;
255            return self.wrap_compound(kind);
256        }
257        if self.lex.at_keyword(b"for") {
258            let kind = self.for_cmd()?;
259            return self.wrap_compound(kind);
260        }
261        if self.lex.at_keyword(b"while") {
262            let kind = self.while_cmd()?;
263            return self.wrap_compound(kind);
264        }
265        if self.lex.at_keyword(b"until") {
266            let kind = self.until_cmd()?;
267            return self.wrap_compound(kind);
268        }
269        if self.lex.at_keyword(b"if") {
270            let kind = self.if_cmd()?;
271            return self.wrap_compound(kind);
272        }
273        if self.lex.at_keyword(b"case") {
274            let kind = self.case_cmd()?;
275            return self.wrap_compound(kind);
276        }
277        if self.lex.at_keyword(b"select") {
278            return Err(self.lex.err("unsupported: select loop"));
279        }
280        if self.lex.at_keyword(b"[[") {
281            let kind = self.double_bracket()?;
282            return self.wrap_compound(kind);
283        }
284
285        // Check for function definition: name()
286        if self.at_func_def() {
287            return self.func_def();
288        }
289
290        // Simple command
291        Ok(Executable::Simple(self.simple_cmd()?))
292    }
293
294    /// Wrap a compound kind with trailing redirects into an Executable.
295    fn wrap_compound(&mut self, kind: CompoundKind<'a>) -> Result<Executable<'a>, ParseError> {
296        Ok(Executable::Compound(CompoundCmd {
297            kind,
298            redirects: self.collect_redirects()?,
299        }))
300    }
301
302    // -----------------------------------------------------------------------
303    // Compound commands
304    // -----------------------------------------------------------------------
305
306    fn for_cmd(&mut self) -> Result<CompoundKind<'a>, ParseError> {
307        self.lex.eat_str(b"for");
308        self.lex.skip_blanks();
309
310        // C-style for (( init; cond; step ))
311        if self.lex.peek() == b'(' && self.lex.peek_at(1) == b'(' {
312            return self.c_style_for();
313        }
314
315        let var = self.lex.read_name();
316        if var.is_empty() {
317            return Err(self.lex.err("expected variable name after 'for'"));
318        }
319        self.lex.skip_blanks();
320
321        let words = if self.lex.at_keyword(b"in") {
322            self.lex.eat_str(b"in");
323            self.lex.skip_blanks();
324            let mut words = Vec::new();
325            while !self.at_terminator() && !self.lex.at_keyword(b"do") {
326                words.push(self.word()?);
327                self.lex.skip_blanks();
328            }
329            Some(words)
330        } else {
331            None
332        };
333
334        self.eat_separator();
335        self.expect(b"do", "expected 'do' after for loop header")?;
336        let body = self.cmd_list(&[b"done"])?;
337        self.expect(b"done", "expected 'done' to close for loop")?;
338
339        Ok(CompoundKind::For { var, words, body })
340    }
341
342    fn c_style_for(&mut self) -> Result<CompoundKind<'a>, ParseError> {
343        self.lex.bump_n(2); // skip ((
344        self.lex.skip_blanks();
345
346        let init = if self.lex.peek() == b';' {
347            None
348        } else {
349            Some(self.arith(0)?)
350        };
351        self.lex.skip_blanks();
352        if !self.lex.eat(b';') {
353            return Err(self.lex.err("expected ';' in C-style for"));
354        }
355        self.lex.skip_blanks();
356
357        let cond = if self.lex.peek() == b';' {
358            None
359        } else {
360            Some(self.arith(0)?)
361        };
362        self.lex.skip_blanks();
363        if !self.lex.eat(b';') {
364            return Err(self.lex.err("expected ';' in C-style for"));
365        }
366        self.lex.skip_blanks();
367
368        let step = if self.lex.peek() == b')' && self.lex.peek_at(1) == b')' {
369            None
370        } else {
371            Some(self.arith(0)?)
372        };
373        self.lex.skip_blanks();
374
375        if !(self.lex.peek() == b')' && self.lex.peek_at(1) == b')') {
376            return Err(self.lex.err("expected '))' in C-style for"));
377        }
378        self.lex.bump_n(2);
379
380        self.eat_separator();
381        self.expect(b"do", "expected 'do' after for((...)) header")?;
382        let body = self.cmd_list(&[b"done"])?;
383        self.expect(b"done", "expected 'done' to close for loop")?;
384
385        Ok(CompoundKind::CFor {
386            init,
387            cond,
388            step,
389            body,
390        })
391    }
392
393    fn while_cmd(&mut self) -> Result<CompoundKind<'a>, ParseError> {
394        self.lex.eat_str(b"while");
395        self.skip_separators();
396        let guard = self.cmd_list(&[b"do"])?;
397        self.expect(b"do", "expected 'do' after while condition")?;
398        let body = self.cmd_list(&[b"done"])?;
399        self.expect(b"done", "expected 'done' to close while loop")?;
400        Ok(CompoundKind::While(GuardBody { guard, body }))
401    }
402
403    fn until_cmd(&mut self) -> Result<CompoundKind<'a>, ParseError> {
404        self.lex.eat_str(b"until");
405        self.skip_separators();
406        let guard = self.cmd_list(&[b"do"])?;
407        self.expect(b"do", "expected 'do' after until condition")?;
408        let body = self.cmd_list(&[b"done"])?;
409        self.expect(b"done", "expected 'done' to close until loop")?;
410        Ok(CompoundKind::Until(GuardBody { guard, body }))
411    }
412
413    fn if_cmd(&mut self) -> Result<CompoundKind<'a>, ParseError> {
414        self.lex.eat_str(b"if");
415        self.skip_separators();
416
417        let mut conditionals = Vec::new();
418        let guard = self.cmd_list(&[b"then"])?;
419        self.expect(b"then", "expected 'then' after if condition")?;
420        let body = self.cmd_list(&[b"elif", b"else", b"fi"])?;
421        conditionals.push(GuardBody { guard, body });
422
423        while self.lex.at_keyword(b"elif") {
424            self.lex.eat_str(b"elif");
425            self.skip_separators();
426            let guard = self.cmd_list(&[b"then"])?;
427            self.expect(b"then", "expected 'then' after elif condition")?;
428            let body = self.cmd_list(&[b"elif", b"else", b"fi"])?;
429            conditionals.push(GuardBody { guard, body });
430        }
431
432        let else_branch = if self.lex.at_keyword(b"else") {
433            self.lex.eat_str(b"else");
434            self.skip_separators();
435            Some(self.cmd_list(&[b"fi"])?)
436        } else {
437            None
438        };
439
440        self.expect(b"fi", "expected 'fi' to close if statement")?;
441        Ok(CompoundKind::If {
442            conditionals,
443            else_branch,
444        })
445    }
446
447    fn case_cmd(&mut self) -> Result<CompoundKind<'a>, ParseError> {
448        self.lex.eat_str(b"case");
449        self.lex.skip_blanks();
450        let word = self.word()?;
451        self.lex.skip_blanks();
452        self.expect(b"in", "expected 'in' after case word")?;
453        self.skip_separators();
454
455        let mut arms = Vec::new();
456        while !self.lex.at_keyword(b"esac") && !self.lex.is_eof() {
457            // Optional ( before patterns
458            self.lex.skip_blanks();
459            self.lex.eat(b'(');
460            self.lex.skip_blanks();
461
462            let mut patterns = Vec::new();
463            patterns.push(self.word()?);
464            self.lex.skip_blanks();
465            while self.lex.eat(b'|') {
466                self.lex.skip_blanks();
467                patterns.push(self.word()?);
468                self.lex.skip_blanks();
469            }
470
471            self.lex.skip_blanks();
472            self.lex.eat(b')');
473            // Only skip whitespace/newlines here — NOT semicolons.
474            // A bare ;; right after ) means an empty body; skip_separators
475            // would eat the ;; and break the terminator check.
476            self.lex.skip_blanks();
477            while self.lex.peek() == b'\n' {
478                self.lex.bump();
479                self.lex.skip_blanks();
480            }
481
482            let body = self.case_body()?;
483
484            // Eat ;; if present, error on ;& and ;;&
485            self.lex.skip_blanks();
486            if self.lex.peek() == b';' && self.lex.peek_at(1) == b';' {
487                if self.lex.peek_at(2) == b'&' {
488                    return Err(self.lex.err("unsupported: case ;;&"));
489                }
490                self.lex.bump_n(2);
491            } else if self.lex.peek() == b';' && self.lex.peek_at(1) == b'&' {
492                return Err(self.lex.err("unsupported: case fallthrough ;&"));
493            }
494            self.skip_separators();
495
496            arms.push(CaseArm { patterns, body });
497        }
498
499        self.expect(b"esac", "expected 'esac' to close case statement")?;
500        Ok(CompoundKind::Case { word, arms })
501    }
502
503    /// Parse case arm body — like `cmd_list` but stops at `;;`, `;&`, `;;&` and `esac`.
504    fn case_body(&mut self) -> Result<Vec<Cmd<'a>>, ParseError> {
505        let mut cmds = Vec::new();
506        loop {
507            // Skip separators but preserve ;; and ;&
508            loop {
509                self.lex.skip_blanks();
510                if self.lex.peek() == b';' && matches!(self.lex.peek_at(1), b';' | b'&') {
511                    break;
512                }
513                match self.lex.peek() {
514                    b';' | b'\n' => self.lex.bump(),
515                    b'#' => self.lex.skip_comment(),
516                    _ => break,
517                }
518            }
519            if self.lex.is_eof() || self.lex.at_keyword(b"esac") {
520                break;
521            }
522            if self.lex.peek() == b';' && matches!(self.lex.peek_at(1), b';' | b'&') {
523                break;
524            }
525            cmds.push(self.cmd()?);
526        }
527        Ok(cmds)
528    }
529
530    fn brace_group(&mut self) -> Result<CompoundKind<'a>, ParseError> {
531        self.lex.eat(b'{');
532        self.skip_separators();
533        let body = self.cmd_list(&[b"}"])?;
534        self.lex.skip_blanks();
535        if !self.lex.eat(b'}') {
536            return Err(self.lex.err("expected '}'"));
537        }
538        Ok(CompoundKind::Brace(body))
539    }
540
541    fn subshell(&mut self) -> Result<CompoundKind<'a>, ParseError> {
542        self.lex.eat(b'(');
543        self.skip_separators();
544        let body = self.cmd_list(&[b")"])?;
545        self.lex.skip_blanks();
546        if !self.lex.eat(b')') {
547            return Err(self.lex.err("expected ')'"));
548        }
549        Ok(CompoundKind::Subshell(body))
550    }
551
552    /// Parse `[[ ... ]]` — split internal `&&`/`||` into an and-or list.
553    fn double_bracket(&mut self) -> Result<CompoundKind<'a>, ParseError> {
554        fn build_test_cmd(words: Vec<Word<'_>>) -> Cmd<'_> {
555            let mut suffix = Vec::new();
556            suffix.push(CmdSuffix::Word(Word::Simple(WordPart::Bare(Atom::Lit(
557                "[[",
558            )))));
559            for w in words {
560                suffix.push(CmdSuffix::Word(w));
561            }
562            suffix.push(CmdSuffix::Word(Word::Simple(WordPart::Bare(Atom::Lit(
563                "]]",
564            )))));
565            Cmd::List(AndOrList {
566                first: Pipeline::Single(Executable::Simple(SimpleCmd {
567                    prefix: Vec::new(),
568                    suffix,
569                })),
570                rest: Vec::new(),
571            })
572        }
573
574        self.lex.eat_str(b"[[");
575        self.lex.skip_blanks();
576
577        // Collect all tokens inside [[ ]] as a command list.
578        // We need to handle && and || inside [[ ]] as splitting points.
579        let mut segments: Vec<(Vec<Word<'a>>, Option<&'a str>)> = Vec::new();
580        let mut current_words = Vec::new();
581
582        loop {
583            self.lex.skip_blanks();
584            if self.lex.is_eof() {
585                return Err(self.lex.err("unterminated [["));
586            }
587            // Check for ]]
588            if self.lex.peek() == b']' && self.lex.peek_at(1) == b']' {
589                self.lex.bump_n(2);
590                segments.push((current_words, None));
591                break;
592            }
593            // Check for && or || inside [[ ]]
594            if self.lex.peek() == b'&' && self.lex.peek_at(1) == b'&' {
595                segments.push((current_words, Some("&&")));
596                current_words = Vec::new();
597                self.lex.bump_n(2);
598                continue;
599            }
600            if self.lex.peek() == b'|' && self.lex.peek_at(1) == b'|' {
601                segments.push((current_words, Some("||")));
602                current_words = Vec::new();
603                self.lex.bump_n(2);
604                continue;
605            }
606            current_words.push(self.word_bracket()?);
607        }
608
609        if segments.len() == 1 {
610            let (words, _) = segments.into_iter().next().expect("len checked == 1");
611            return Ok(CompoundKind::DoubleBracket(vec![build_test_cmd(words)]));
612        }
613
614        // Multiple segments — build an and-or list
615        let mut iter = segments.into_iter();
616        let (first_words, first_op) = iter.next().expect("segments is non-empty");
617        let first_cmd = build_test_cmd(first_words);
618
619        let first_pipeline = Pipeline::Single(Executable::Compound(CompoundCmd {
620            kind: CompoundKind::DoubleBracket(vec![first_cmd]),
621            redirects: Vec::new(),
622        }));
623
624        let mut rest = Vec::new();
625        let mut pending_op = first_op;
626
627        for (words, op) in iter {
628            let test_cmd = build_test_cmd(words);
629            let pipe = Pipeline::Single(Executable::Compound(CompoundCmd {
630                kind: CompoundKind::DoubleBracket(vec![test_cmd]),
631                redirects: Vec::new(),
632            }));
633            match pending_op {
634                Some("||") => rest.push(AndOr::Or(pipe)),
635                _ => rest.push(AndOr::And(pipe)),
636            }
637            pending_op = op;
638        }
639
640        // Wrap the whole and-or list as a single command
641        let combined = Cmd::List(AndOrList {
642            first: first_pipeline,
643            rest,
644        });
645
646        Ok(CompoundKind::DoubleBracket(vec![combined]))
647    }
648
649    /// Parse `(( expr ))` at command position.
650    fn standalone_arith(&mut self) -> Result<CompoundKind<'a>, ParseError> {
651        self.lex.bump_n(2); // skip ((
652        self.lex.skip_blanks();
653
654        let arith = self.arith(0)?;
655
656        self.lex.skip_blanks();
657        if self.lex.peek() == b')' && self.lex.peek_at(1) == b')' {
658            self.lex.bump_n(2);
659            Ok(CompoundKind::Arithmetic(arith))
660        } else {
661            Err(self.lex.err("expected '))'"))
662        }
663    }
664
665    fn func_def(&mut self) -> Result<Executable<'a>, ParseError> {
666        // Optional 'function' keyword
667        if self.lex.at_keyword(b"function") {
668            self.lex.eat_str(b"function");
669            self.lex.skip_blanks();
670        }
671        let name = self.lex.read_name();
672        if name.is_empty() {
673            return Err(self.lex.err("expected function name"));
674        }
675        self.lex.skip_blanks();
676        // Eat ()
677        if self.lex.eat(b'(') {
678            self.lex.skip_blanks();
679            if !self.lex.eat(b')') {
680                return Err(self.lex.err("expected ')' in function definition"));
681            }
682        }
683        self.skip_separators();
684
685        // Body must be a compound command (usually { ... })
686        let kind = if self.lex.peek() == b'{' && is_meta(self.lex.peek_at(1)) {
687            self.brace_group()?
688        } else if self.lex.eat(b'(') {
689            self.subshell()?
690        } else {
691            return Err(self.lex.err("expected '{' or '(' after function name"));
692        };
693
694        Ok(Executable::FuncDef(
695            name,
696            CompoundCmd {
697                kind,
698                redirects: self.collect_redirects()?,
699            },
700        ))
701    }
702
703    // -----------------------------------------------------------------------
704    // Simple command
705    // -----------------------------------------------------------------------
706
707    fn simple_cmd(&mut self) -> Result<SimpleCmd<'a>, ParseError> {
708        let mut prefix = Vec::new();
709        let mut suffix = Vec::new();
710        let mut saw_word = false;
711
712        loop {
713            self.lex.skip_blanks();
714            if self.at_terminator() {
715                break;
716            }
717
718            // Try redirect
719            if let Some(redir) = self.try_redirect()? {
720                if saw_word {
721                    suffix.push(CmdSuffix::Redirect(redir));
722                } else {
723                    prefix.push(CmdPrefix::Redirect(redir));
724                }
725                continue;
726            }
727
728            // Before the command name: assignments are possible
729            if !saw_word && let Some(assign) = self.try_assignment()? {
730                prefix.push(assign);
731                continue;
732            }
733
734            // Regular word
735            suffix.push(CmdSuffix::Word(self.word()?));
736            saw_word = true;
737        }
738
739        Ok(SimpleCmd { prefix, suffix })
740    }
741
742    /// Try to parse an assignment: `NAME=value`, `NAME=(word ...)`, or `NAME+=(word ...)`.
743    /// Returns None if not at an assignment (doesn't consume anything).
744    fn try_assignment(&mut self) -> Result<Option<CmdPrefix<'a>>, ParseError> {
745        let start = self.lex.pos();
746        let name = self.lex.read_name();
747        if name.is_empty() {
748            self.rewind(start);
749            return Ok(None);
750        }
751
752        // Check for += (array append)
753        let is_append = self.lex.peek() == b'+' && self.lex.peek_at(1) == b'=';
754        if is_append {
755            self.lex.bump_n(2); // skip +=
756        } else if self.lex.peek() == b'=' {
757            self.lex.bump(); // skip =
758        } else {
759            self.rewind(start);
760            return Ok(None);
761        }
762
763        // Array assignment: NAME=(word ...) or NAME+=(word ...)
764        if self.lex.peek() == b'(' {
765            self.lex.bump(); // skip (
766            let words = self.array_elements()?;
767            if is_append {
768                return Ok(Some(CmdPrefix::ArrayAppend(name, words)));
769            }
770            return Ok(Some(CmdPrefix::ArrayAssign(name, words)));
771        }
772
773        // NAME=value or NAME+=value — parse the value if present
774        let value = if self.lex.peek() == 0 || is_meta(self.lex.peek()) {
775            None
776        } else {
777            Some(self.word()?)
778        };
779        Ok(Some(CmdPrefix::Assign(name, value)))
780    }
781
782    /// Parse array elements inside `(...)`.
783    fn array_elements(&mut self) -> Result<Vec<Word<'a>>, ParseError> {
784        let mut words = Vec::new();
785        loop {
786            self.lex.skip_blanks();
787            if self.lex.peek() == b')' {
788                self.lex.bump();
789                break;
790            }
791            if self.lex.is_eof() {
792                return Err(self.lex.err("unterminated array"));
793            }
794            words.push(self.word()?);
795        }
796        Ok(words)
797    }
798
799    // -----------------------------------------------------------------------
800    // Words
801    // -----------------------------------------------------------------------
802
803    /// Parse a complete word (may be a concatenation of multiple parts).
804    /// Parse a word inside `[[ ]]` — `(` and `)` are not metacharacters here.
805    fn word_bracket(&mut self) -> Result<Word<'a>, ParseError> {
806        let mut parts = Vec::new();
807        loop {
808            if self.lex.is_eof() {
809                break;
810            }
811            let b = self.lex.peek();
812            // Inside [[ ]], ( and ) are literal (used in regex patterns)
813            if b == b'(' || b == b')' {
814                let start = self.lex.pos();
815                self.lex.bump();
816                parts.push(WordPart::Bare(Atom::Lit(self.lex.slice(start))));
817                continue;
818            }
819            if is_meta(b) {
820                break;
821            }
822            parts.push(self.word_part()?);
823        }
824        if parts.is_empty() {
825            return Err(self.lex.err("expected word"));
826        }
827        if parts.len() == 1 {
828            Ok(Word::Simple(parts.into_iter().next().expect("len checked == 1")))
829        } else {
830            Ok(Word::Concat(parts))
831        }
832    }
833
834    fn word(&mut self) -> Result<Word<'a>, ParseError> {
835        let mut parts = Vec::new();
836        loop {
837            if self.lex.is_eof() {
838                break;
839            }
840            let b = self.lex.peek();
841            // Process substitution <( or >( — parse even though < and > are meta
842            if b == b'<' && self.lex.peek_at(1) == b'(' {
843                self.lex.bump_n(2);
844                let cmds = self.cmd_list(&[b")"])?;
845                self.lex.skip_blanks();
846                if !self.lex.eat(b')') {
847                    return Err(self.lex.err("expected ')' for process substitution"));
848                }
849                parts.push(WordPart::Bare(Atom::ProcSubIn(cmds)));
850                continue;
851            }
852            if b == b'>' && self.lex.peek_at(1) == b'(' {
853                return Err(self
854                    .lex
855                    .err("unsupported: output process substitution >(...)"));
856            }
857            if is_meta(b) {
858                break;
859            }
860            parts.push(self.word_part()?);
861        }
862        if parts.is_empty() {
863            return Err(self.lex.err("expected word"));
864        }
865        if parts.len() == 1 {
866            Ok(Word::Simple(parts.into_iter().next().expect("len checked == 1")))
867        } else {
868            Ok(Word::Concat(parts))
869        }
870    }
871
872    /// Parse a single word part: bare atoms, double-quoted, or single-quoted.
873    fn word_part(&mut self) -> Result<WordPart<'a>, ParseError> {
874        match self.lex.peek() {
875            b'"' => {
876                self.lex.bump();
877                let atoms = self.dquoted()?;
878                Ok(WordPart::DQuoted(atoms))
879            }
880            b'\'' => {
881                self.lex.bump();
882                let content = self.lex.scan_squote()?;
883                Ok(WordPart::SQuoted(content))
884            }
885            _ => {
886                let atom = self.atom()?;
887                Ok(WordPart::Bare(atom))
888            }
889        }
890    }
891
892    /// Parse atoms inside double quotes until closing `"`.
893    fn dquoted(&mut self) -> Result<Vec<Atom<'a>>, ParseError> {
894        let mut atoms = Vec::new();
895        let mut lit_start = self.lex.pos();
896
897        while !self.lex.is_eof() && self.lex.peek() != b'"' {
898            match self.lex.peek() {
899                b'$' => {
900                    // Flush accumulated literal
901                    if self.lex.pos() > lit_start {
902                        atoms.push(Atom::Lit(self.lex.slice(lit_start)));
903                    }
904                    atoms.push(self.dollar()?);
905                    lit_start = self.lex.pos();
906                }
907                b'\\' => {
908                    // Flush accumulated literal
909                    if self.lex.pos() > lit_start {
910                        atoms.push(Atom::Lit(self.lex.slice(lit_start)));
911                    }
912                    self.lex.bump(); // skip backslash
913                    if self.lex.is_eof() {
914                        break;
915                    }
916                    let escaped_start = self.lex.pos();
917                    self.lex.bump();
918                    atoms.push(Atom::Escaped(Cow::Borrowed(self.lex.slice(escaped_start))));
919                    lit_start = self.lex.pos();
920                }
921                b'`' => {
922                    if self.lex.pos() > lit_start {
923                        atoms.push(Atom::Lit(self.lex.slice(lit_start)));
924                    }
925                    atoms.push(self.backtick()?);
926                    lit_start = self.lex.pos();
927                }
928                _ => {
929                    self.lex.bump();
930                }
931            }
932        }
933
934        // Flush trailing literal
935        if self.lex.pos() > lit_start {
936            atoms.push(Atom::Lit(self.lex.slice(lit_start)));
937        }
938
939        if !self.lex.eat(b'"') {
940            return Err(self.lex.err("unterminated double quote"));
941        }
942        Ok(atoms)
943    }
944
945    /// Parse a single atom in an unquoted context.
946    fn atom(&mut self) -> Result<Atom<'a>, ParseError> {
947        match self.lex.peek() {
948            b'$' => self.dollar(),
949            b'\\' => {
950                self.lex.bump();
951                if self.lex.is_eof() {
952                    Ok(Atom::Lit(""))
953                } else {
954                    let start = self.lex.pos();
955                    self.lex.bump();
956                    Ok(Atom::Escaped(Cow::Borrowed(self.lex.slice(start))))
957                }
958            }
959            b'*' => {
960                self.lex.bump();
961                Ok(Atom::Star)
962            }
963            b'?' => {
964                self.lex.bump();
965                Ok(Atom::Question)
966            }
967            b'[' => {
968                self.lex.bump();
969                Ok(Atom::SquareOpen)
970            }
971            b']' => {
972                self.lex.bump();
973                Ok(Atom::SquareClose)
974            }
975            b'~' => {
976                self.lex.bump();
977                Ok(Atom::Tilde)
978            }
979            b'{' => {
980                // Try brace range {1..5}
981                if let Some(br) = self.try_brace_range() {
982                    Ok(br)
983                } else {
984                    // Adjacent brace expansion check: }{
985                    let start = self.lex.pos();
986                    self.lex.bump();
987                    Ok(Atom::Lit(self.lex.slice(start)))
988                }
989            }
990            b'`' => self.backtick(),
991            _ => {
992                // Read a run of literal characters
993                let start = self.lex.pos();
994                while !self.lex.is_eof() {
995                    let b = self.lex.peek();
996                    if is_meta(b)
997                        || matches!(
998                            b,
999                            b'"' | b'\''
1000                                | b'$'
1001                                | b'\\'
1002                                | b'*'
1003                                | b'?'
1004                                | b'['
1005                                | b']'
1006                                | b'~'
1007                                | b'{'
1008                                | b'`'
1009                        )
1010                    {
1011                        break;
1012                    }
1013                    self.lex.bump();
1014                }
1015                let s = self.lex.slice(start);
1016                if s.is_empty() {
1017                    return Err(self.lex.err("unexpected character"));
1018                }
1019                Ok(Atom::Lit(s))
1020            }
1021        }
1022    }
1023
1024    /// Parse `$...` expansion: `$var`, `${...}`, `$(...)`, `$((...))`, or special param.
1025    fn dollar(&mut self) -> Result<Atom<'a>, ParseError> {
1026        self.lex.bump(); // skip $
1027
1028        match self.lex.peek() {
1029            b'{' => {
1030                self.lex.bump(); // skip {
1031                // ${!var} — indirect expansion, ${!var[@]} — array keys, ${!prefix*} — prefix list
1032                if self.lex.peek() == b'!' {
1033                    self.lex.bump();
1034                    let name = self.lex.read_name();
1035                    if !name.is_empty() && self.lex.peek() == b'[' {
1036                        self.lex.bump(); // skip [
1037                        let idx_byte = self.lex.peek();
1038                        if (idx_byte == b'@' || idx_byte == b'*') && self.lex.peek_at(1) == b']' {
1039                            self.lex.bump_n(2); // skip @] or *]
1040                            if !self.lex.eat(b'}') {
1041                                return Err(self.lex.err("expected '}'"));
1042                            }
1043                            return Err(self.lex.err("unsupported: ${!arr[@]} indirect/keys"));
1044                        }
1045                    }
1046                    // ${!prefix*} or ${!prefix@} — list variable names matching prefix
1047                    if !name.is_empty() && matches!(self.lex.peek(), b'*' | b'@') {
1048                        self.lex.bump(); // skip * or @
1049                        if !self.lex.eat(b'}') {
1050                            return Err(self.lex.err("expected '}'"));
1051                        }
1052                        return Ok(Atom::Subst(Box::new(Subst::PrefixList(name))));
1053                    }
1054                    if name.is_empty() {
1055                        return Err(self.lex.err("expected variable name after ${!"));
1056                    }
1057                    if !self.lex.eat(b'}') {
1058                        return Err(self.lex.err("expected '}'"));
1059                    }
1060                    return Ok(Atom::Subst(Box::new(Subst::Indirect(name))));
1061                }
1062                // ${#param} or ${#arr[@]} — length
1063                if self.lex.peek() == b'#' && self.lex.peek_at(1) != b'}' {
1064                    self.lex.bump();
1065                    let param = self.read_param()?;
1066                    // Check for ${#arr[@]} — array length
1067                    if let Param::Var(name) = param
1068                        && self.lex.peek() == b'['
1069                    {
1070                        self.lex.bump(); // skip [
1071                        let idx_byte = self.lex.peek();
1072                        if (idx_byte == b'@' || idx_byte == b'*') && self.lex.peek_at(1) == b']' {
1073                            self.lex.bump_n(2); // skip @] or *]
1074                            if !self.lex.eat(b'}') {
1075                                return Err(self.lex.err("expected '}'"));
1076                            }
1077                            return Ok(Atom::Subst(Box::new(Subst::ArrayLen(name))));
1078                        }
1079                        return Err(self.lex.err("expected '@]' or '*]' after '#arr['"));
1080                    }
1081                    if !self.lex.eat(b'}') {
1082                        return Err(self.lex.err("expected '}'"));
1083                    }
1084                    return Ok(Atom::Subst(Box::new(Subst::Len(param))));
1085                }
1086                let param = self.read_param()?;
1087                // Check for array indexing: ${arr[...]}
1088                if let Param::Var(name) = param
1089                    && self.lex.peek() == b'['
1090                {
1091                    return self.brace_array_op(name);
1092                }
1093                if self.lex.peek() == b'}' {
1094                    // Bare ${var} — same as $var
1095                    self.lex.bump();
1096                    return Ok(Atom::Param(param));
1097                }
1098                let subst = self.brace_param_op(param)?;
1099                Ok(Atom::Subst(Box::new(subst)))
1100            }
1101            b'(' => {
1102                if self.lex.peek_at(1) == b'(' {
1103                    // $(( arithmetic ))
1104                    self.lex.bump_n(2); // skip ((
1105                    let subst = self.arith_subst()?;
1106                    Ok(Atom::Subst(Box::new(subst)))
1107                } else {
1108                    // $( command )
1109                    self.lex.bump(); // skip (
1110                    let subst = self.cmd_subst()?;
1111                    Ok(Atom::Subst(Box::new(subst)))
1112                }
1113            }
1114            b'\'' => {
1115                // $'...' ANSI-C quoting — scan to closing ', handling \'
1116                self.lex.bump(); // skip opening '
1117                let start = self.lex.pos();
1118                loop {
1119                    if self.lex.is_eof() {
1120                        return Err(self.lex.err("unterminated ANSI-C quote"));
1121                    }
1122                    if self.lex.peek() == b'\\' {
1123                        self.lex.bump();
1124                        if !self.lex.is_eof() {
1125                            self.lex.bump();
1126                        }
1127                        continue;
1128                    }
1129                    if self.lex.peek() == b'\'' {
1130                        let content = self.lex.slice(start);
1131                        self.lex.bump();
1132                        return Ok(Atom::AnsiCQuoted(content));
1133                    }
1134                    self.lex.bump();
1135                }
1136            }
1137            b'@' => {
1138                self.lex.bump();
1139                Ok(Atom::Param(Param::At))
1140            }
1141            b'*' => {
1142                self.lex.bump();
1143                Ok(Atom::Param(Param::Star))
1144            }
1145            b'#' => {
1146                self.lex.bump();
1147                Ok(Atom::Param(Param::Pound))
1148            }
1149            b'?' => {
1150                self.lex.bump();
1151                Ok(Atom::Param(Param::Status))
1152            }
1153            b'$' => {
1154                self.lex.bump();
1155                Ok(Atom::Param(Param::Pid))
1156            }
1157            b'!' => {
1158                self.lex.bump();
1159                Ok(Atom::Param(Param::Bang))
1160            }
1161            b'-' => {
1162                self.lex.bump();
1163                Ok(Atom::Param(Param::Dash))
1164            }
1165            b'0'..=b'9' => {
1166                let start = self.lex.pos();
1167                self.lex.bump();
1168                // Multi-digit only for ${N} syntax, bare $N is single digit
1169                let s = self.lex.slice(start);
1170                let n: u32 = s.parse().unwrap_or(0);
1171                Ok(Atom::Param(Param::Positional(n)))
1172            }
1173            _ => {
1174                // $NAME
1175                let name = self.lex.read_name();
1176                if name.is_empty() {
1177                    // Bare $ — emit as literal
1178                    Ok(Atom::Lit("$"))
1179                } else {
1180                    Ok(Atom::Param(Param::Var(name)))
1181                }
1182            }
1183        }
1184    }
1185
1186    /// Parse the operator part of `${param OP word}`. The param has already been
1187    /// read; cursor is on the operator byte.
1188    fn brace_param_op(&mut self, param: Param<'a>) -> Result<Subst<'a>, ParseError> {
1189        // Colon prefix: ${var:-word}, ${var:=word}, etc. or substring ${var:offset:length}
1190        if self.lex.peek() == b':' {
1191            self.lex.bump();
1192            match self.lex.peek() {
1193                b'-' | b'=' | b'?' | b'+' => {}
1194                _ => {
1195                    // Substring: ${var:offset} or ${var:offset:length}
1196                    let offset_start = self.lex.pos();
1197                    self.scan_substring_part();
1198                    let offset = self.lex.slice(offset_start);
1199                    let length = if self.lex.eat(b':') {
1200                        let len_start = self.lex.pos();
1201                        self.scan_substring_part();
1202                        Some(self.lex.slice(len_start))
1203                    } else {
1204                        None
1205                    };
1206                    if !self.lex.eat(b'}') {
1207                        return Err(self.lex.err("expected '}'"));
1208                    }
1209                    return Ok(Subst::Substring(param, offset, length));
1210                }
1211            }
1212        }
1213
1214        match self.lex.peek() {
1215            // Default/assign/error/alt — shared logic for colon and non-colon forms
1216            b'-' | b'=' | b'?' | b'+' => {
1217                let op = self.lex.peek();
1218                self.lex.bump();
1219                let word = self.brace_param_word()?;
1220                if !self.lex.eat(b'}') {
1221                    return Err(self.lex.err("expected '}'"));
1222                }
1223                match op {
1224                    b'-' => Ok(Subst::Default(param, word)),
1225                    b'=' => Ok(Subst::Assign(param, word)),
1226                    b'?' => Ok(Subst::Error(param, word)),
1227                    b'+' => Ok(Subst::Alt(param, word)),
1228                    _ => unreachable!(),
1229                }
1230            }
1231            b'%' => {
1232                self.lex.bump();
1233                let large = self.lex.eat(b'%');
1234                let word = self.brace_param_word()?;
1235                if !self.lex.eat(b'}') {
1236                    return Err(self.lex.err("expected '}'"));
1237                }
1238                if large {
1239                    Ok(Subst::TrimSuffixLarge(param, word))
1240                } else {
1241                    Ok(Subst::TrimSuffixSmall(param, word))
1242                }
1243            }
1244            b'#' => {
1245                self.lex.bump();
1246                let large = self.lex.eat(b'#');
1247                let word = self.brace_param_word()?;
1248                if !self.lex.eat(b'}') {
1249                    return Err(self.lex.err("expected '}'"));
1250                }
1251                if large {
1252                    Ok(Subst::TrimPrefixLarge(param, word))
1253                } else {
1254                    Ok(Subst::TrimPrefixSmall(param, word))
1255                }
1256            }
1257            b'^' => {
1258                self.lex.bump();
1259                let all = self.lex.eat(b'^');
1260                if !self.lex.eat(b'}') {
1261                    return Err(self
1262                        .lex
1263                        .err("expected '}' (patterned case modification unsupported)"));
1264                }
1265                Ok(Subst::Upper(all, param))
1266            }
1267            b',' => {
1268                self.lex.bump();
1269                let all = self.lex.eat(b',');
1270                if !self.lex.eat(b'}') {
1271                    return Err(self
1272                        .lex
1273                        .err("expected '}' (patterned case modification unsupported)"));
1274                }
1275                Ok(Subst::Lower(all, param))
1276            }
1277            b'/' => {
1278                self.lex.bump();
1279                let (all, prefix, suffix) = match self.lex.peek() {
1280                    b'/' => {
1281                        self.lex.bump();
1282                        (true, false, false)
1283                    }
1284                    b'#' => {
1285                        self.lex.bump();
1286                        (false, true, false)
1287                    }
1288                    b'%' => {
1289                        self.lex.bump();
1290                        (false, false, true)
1291                    }
1292                    _ => (false, false, false),
1293                };
1294                let pattern = self.brace_param_word_until_slash()?;
1295                let replacement = if self.lex.eat(b'/') {
1296                    self.brace_param_word()?
1297                } else {
1298                    None
1299                };
1300                if !self.lex.eat(b'}') {
1301                    return Err(self.lex.err("expected '}'"));
1302                }
1303                if prefix {
1304                    Ok(Subst::ReplacePrefix(param, pattern, replacement))
1305                } else if suffix {
1306                    Ok(Subst::ReplaceSuffix(param, pattern, replacement))
1307                } else if all {
1308                    Ok(Subst::ReplaceAll(param, pattern, replacement))
1309                } else {
1310                    Ok(Subst::Replace(param, pattern, replacement))
1311                }
1312            }
1313            b'@' => {
1314                self.lex.bump();
1315                let op = self.lex.peek();
1316                if !matches!(
1317                    op,
1318                    b'Q' | b'E' | b'P' | b'A' | b'K' | b'a' | b'u' | b'U' | b'L'
1319                ) {
1320                    return Err(self
1321                        .lex
1322                        .err("unsupported parameter transformation operator"));
1323                }
1324                self.lex.bump();
1325                let Param::Var(name) = param else {
1326                    return Err(self
1327                        .lex
1328                        .err("parameter transformation requires a named variable"));
1329                };
1330                if !self.lex.eat(b'}') {
1331                    return Err(self.lex.err("expected '}'"));
1332                }
1333                Ok(Subst::Transform(name, op))
1334            }
1335            _ => Err(self.lex.err("unsupported parameter expansion")),
1336        }
1337    }
1338
1339    /// Parse array indexing after `${name[`.
1340    /// Handles `${arr[n]}`, `${arr[@]}`, `${arr[*]}`, `${arr[@]:offset:len}`.
1341    fn brace_array_op(&mut self, name: &'a str) -> Result<Atom<'a>, ParseError> {
1342        self.lex.bump(); // skip [
1343
1344        let idx_byte = self.lex.peek();
1345        if idx_byte == b'@' || idx_byte == b'*' {
1346            self.lex.bump();
1347            if !self.lex.eat(b']') {
1348                return Err(self.lex.err("expected ']'"));
1349            }
1350            // Check for slice: ${arr[@]:offset:length}
1351            if self.lex.peek() == b':' {
1352                self.lex.bump(); // skip :
1353                let offset = self.read_brace_number()?;
1354                let length = if self.lex.eat(b':') {
1355                    Some(self.read_brace_number()?)
1356                } else {
1357                    None
1358                };
1359                if !self.lex.eat(b'}') {
1360                    return Err(self.lex.err("expected '}'"));
1361                }
1362                return Ok(Atom::Subst(Box::new(Subst::ArraySlice(
1363                    name, offset, length,
1364                ))));
1365            }
1366            if !self.lex.eat(b'}') {
1367                return Err(self.lex.err("expected '}'"));
1368            }
1369            return Ok(Atom::Subst(Box::new(Subst::ArrayAll(name))));
1370        }
1371
1372        // Numeric or expression index: ${arr[n]} or ${arr[$((expr))]}
1373        // Read index as a word (supports $var, $((expr)), etc.)
1374        let idx_word = self.array_index_word()?;
1375        if !self.lex.eat(b']') {
1376            return Err(self.lex.err("expected ']'"));
1377        }
1378        if !self.lex.eat(b'}') {
1379            return Err(self.lex.err("expected '}'"));
1380        }
1381        Ok(Atom::Subst(Box::new(Subst::ArrayElement(name, idx_word))))
1382    }
1383
1384    /// Read a number in `${arr[@]:offset:length}` context.
1385    fn read_brace_number(&mut self) -> Result<&'a str, ParseError> {
1386        let start = self.lex.pos();
1387        // Allow optional leading minus
1388        if self.lex.peek() == b'-' {
1389            self.lex.bump();
1390        }
1391        while self.lex.peek().is_ascii_digit() {
1392            self.lex.bump();
1393        }
1394        let s = self.lex.slice(start);
1395        if s.is_empty() || s == "-" {
1396            return Err(self.lex.err("expected number in array slice"));
1397        }
1398        Ok(s)
1399    }
1400
1401    /// Parse an array index word (inside `[...]`), stopping at `]`.
1402    fn array_index_word(&mut self) -> Result<Word<'a>, ParseError> {
1403        let mut parts = Vec::new();
1404        loop {
1405            let b = self.lex.peek();
1406            if self.lex.is_eof() || b == b']' {
1407                break;
1408            }
1409            match b {
1410                b'$' => {
1411                    parts.push(WordPart::Bare(self.dollar()?));
1412                }
1413                b'"' => {
1414                    self.lex.bump();
1415                    parts.push(WordPart::DQuoted(self.dquoted()?));
1416                }
1417                _ => {
1418                    let start = self.lex.pos();
1419                    while !self.lex.is_eof()
1420                        && self.lex.peek() != b']'
1421                        && self.lex.peek() != b'$'
1422                        && self.lex.peek() != b'"'
1423                    {
1424                        self.lex.bump();
1425                    }
1426                    let s = self.lex.slice(start);
1427                    if !s.is_empty() {
1428                        parts.push(WordPart::Bare(Atom::Lit(s)));
1429                    }
1430                }
1431            }
1432        }
1433        if parts.is_empty() {
1434            return Err(self.lex.err("empty array index"));
1435        }
1436        if parts.len() == 1 {
1437            Ok(Word::Simple(parts.into_iter().next().expect("len checked == 1")))
1438        } else {
1439            Ok(Word::Concat(parts))
1440        }
1441    }
1442
1443    /// Read a Param from the current position (for ${...} parsing).
1444    fn read_param(&mut self) -> Result<Param<'a>, ParseError> {
1445        match self.lex.peek() {
1446            b'@' => {
1447                self.lex.bump();
1448                Ok(Param::At)
1449            }
1450            b'*' => {
1451                self.lex.bump();
1452                Ok(Param::Star)
1453            }
1454            b'#' => {
1455                self.lex.bump();
1456                Ok(Param::Pound)
1457            }
1458            b'?' => {
1459                self.lex.bump();
1460                Ok(Param::Status)
1461            }
1462            b'$' => {
1463                self.lex.bump();
1464                Ok(Param::Pid)
1465            }
1466            b'!' => {
1467                self.lex.bump();
1468                Ok(Param::Bang)
1469            }
1470            b'-' => {
1471                self.lex.bump();
1472                Ok(Param::Dash)
1473            }
1474            b'0'..=b'9' => {
1475                let num = self.lex.read_number();
1476                let n: u32 = num.parse().unwrap_or(0);
1477                Ok(Param::Positional(n))
1478            }
1479            _ => {
1480                let name = self.lex.read_name();
1481                if name.is_empty() {
1482                    Err(self.lex.err("expected parameter name"))
1483                } else {
1484                    Ok(Param::Var(name))
1485                }
1486            }
1487        }
1488    }
1489
1490    /// Parse a word inside `${...}` — stops at unquoted `}`.
1491    #[inline]
1492    fn brace_param_word(&mut self) -> Result<Option<Word<'a>>, ParseError> {
1493        self.brace_param_word_until(b'\0') // NUL never appears — no extra stop
1494    }
1495
1496    /// Like `brace_param_word` but also stops at unquoted `/`.
1497    #[inline]
1498    fn brace_param_word_until_slash(&mut self) -> Result<Option<Word<'a>>, ParseError> {
1499        self.brace_param_word_until(b'/')
1500    }
1501
1502    /// Core: parse a word inside `${...}`, stopping at `}` or `extra_stop`.
1503    fn brace_param_word_until(&mut self, extra: u8) -> Result<Option<Word<'a>>, ParseError> {
1504        if self.lex.peek() == b'}' || (extra != 0 && self.lex.peek() == extra) {
1505            return Ok(None);
1506        }
1507        let mut parts = Vec::new();
1508        loop {
1509            let b = self.lex.peek();
1510            if self.lex.is_eof() || b == b'}' || (extra != 0 && b == extra) {
1511                break;
1512            }
1513            match b {
1514                b'"' => {
1515                    self.lex.bump();
1516                    parts.push(WordPart::DQuoted(self.dquoted()?));
1517                }
1518                b'\'' => {
1519                    self.lex.bump();
1520                    parts.push(WordPart::SQuoted(self.lex.scan_squote()?));
1521                }
1522                b'$' => {
1523                    parts.push(WordPart::Bare(self.dollar()?));
1524                }
1525                b'\\' => {
1526                    self.lex.bump();
1527                    if self.lex.is_eof() {
1528                        break;
1529                    }
1530                    let start = self.lex.pos();
1531                    self.lex.bump();
1532                    parts.push(WordPart::Bare(Atom::Escaped(Cow::Borrowed(
1533                        self.lex.slice(start),
1534                    ))));
1535                }
1536                b'*' => {
1537                    self.lex.bump();
1538                    parts.push(WordPart::Bare(Atom::Star));
1539                }
1540                b'?' => {
1541                    self.lex.bump();
1542                    parts.push(WordPart::Bare(Atom::Question));
1543                }
1544                _ => {
1545                    let start = self.lex.pos();
1546                    while !self.lex.is_eof() {
1547                        let c = self.lex.peek();
1548                        if c == b'}'
1549                            || c == b'"'
1550                            || c == b'\''
1551                            || c == b'$'
1552                            || c == b'\\'
1553                            || c == b'*'
1554                            || c == b'?'
1555                            || (extra != 0 && c == extra)
1556                        {
1557                            break;
1558                        }
1559                        self.lex.bump();
1560                    }
1561                    if self.lex.pos() > start {
1562                        parts.push(WordPart::Bare(Atom::Lit(self.lex.slice(start))));
1563                    }
1564                }
1565            }
1566        }
1567        match parts.len() {
1568            0 => Ok(None),
1569            1 => Ok(Some(Word::Simple(parts.into_iter().next().expect("len checked == 1")))),
1570            _ => Ok(Some(Word::Concat(parts))),
1571        }
1572    }
1573
1574    /// Scan substring offset/length — stops at unquoted `:` or `}`, tracking nesting.
1575    /// Skips quoted strings so that `:` or `}` inside quotes are not treated as
1576    /// delimiters.
1577    fn scan_substring_part(&mut self) {
1578        let mut depth: i32 = 0;
1579        while !self.lex.is_eof() {
1580            let b = self.lex.peek();
1581            if depth == 0 && (b == b':' || b == b'}') {
1582                break;
1583            }
1584            match b {
1585                b'\'' => {
1586                    self.lex.bump();
1587                    while !self.lex.is_eof() && self.lex.peek() != b'\'' {
1588                        self.lex.bump();
1589                    }
1590                    if !self.lex.is_eof() {
1591                        self.lex.bump(); // closing '
1592                    }
1593                }
1594                b'"' => {
1595                    self.lex.bump();
1596                    while !self.lex.is_eof() && self.lex.peek() != b'"' {
1597                        if self.lex.peek() == b'\\' {
1598                            self.lex.bump(); // skip escape
1599                        }
1600                        self.lex.bump();
1601                    }
1602                    if !self.lex.is_eof() {
1603                        self.lex.bump(); // closing "
1604                    }
1605                }
1606                b'(' | b'{' => {
1607                    depth += 1;
1608                    self.lex.bump();
1609                }
1610                b')' | b'}' => {
1611                    depth -= 1;
1612                    self.lex.bump();
1613                }
1614                _ => self.lex.bump(),
1615            }
1616        }
1617    }
1618
1619    /// Parse `$(command)` — cursor is after `$(`.
1620    fn cmd_subst(&mut self) -> Result<Subst<'a>, ParseError> {
1621        let cmds = self.cmd_list(&[b")"])?;
1622        self.lex.skip_blanks();
1623        if !self.lex.eat(b')') {
1624            return Err(self.lex.err("expected ')' for command substitution"));
1625        }
1626        Ok(Subst::Cmd(cmds))
1627    }
1628
1629    /// Parse `$((expr))` — cursor is after `$((`.
1630    fn arith_subst(&mut self) -> Result<Subst<'a>, ParseError> {
1631        self.lex.skip_blanks();
1632        if self.lex.peek() == b')' && self.lex.peek_at(1) == b')' {
1633            self.lex.bump_n(2);
1634            return Ok(Subst::Arith(None));
1635        }
1636        let expr = self.arith(0)?;
1637        self.lex.skip_blanks();
1638        if self.lex.peek() == b')' && self.lex.peek_at(1) == b')' {
1639            self.lex.bump_n(2);
1640            Ok(Subst::Arith(Some(expr)))
1641        } else {
1642            Err(self.lex.err("expected '))' for arithmetic"))
1643        }
1644    }
1645
1646    /// Parse backtick command substitution: `` `...` ``
1647    fn backtick(&mut self) -> Result<Atom<'a>, ParseError> {
1648        self.lex.bump(); // skip opening `
1649        let start = self.lex.pos();
1650        while !self.lex.is_eof() && self.lex.peek() != b'`' {
1651            if self.lex.peek() == b'\\' {
1652                self.lex.bump(); // skip escaped char
1653            }
1654            self.lex.bump();
1655        }
1656        let content = self.lex.slice(start);
1657        if !self.lex.eat(b'`') {
1658            return Err(self.lex.err("unterminated backtick"));
1659        }
1660        // Re-parse the content as a command
1661        let sub_parser = Parser::new(content);
1662        let cmds = sub_parser.parse()?;
1663        Ok(Atom::Subst(Box::new(Subst::Cmd(cmds))))
1664    }
1665
1666    /// Try to parse `{start..end[..step]}` brace range.
1667    /// Returns None if not a brace range (doesn't consume).
1668    fn try_brace_range(&mut self) -> Option<Atom<'a>> {
1669        fn valid_range_val(s: &str) -> bool {
1670            s.parse::<i64>().is_ok() || (s.len() == 1 && s.as_bytes()[0].is_ascii_alphabetic())
1671        }
1672
1673        let start_pos = self.lex.pos();
1674        if self.lex.peek() != b'{' {
1675            return None;
1676        }
1677
1678        // Scan ahead to find } and check for ..
1679        let src = &self.lex.remaining().as_bytes()[1..]; // after {
1680        let close = src.iter().position(|&b| b == b'}')?;
1681        let inner_start = start_pos + 1;
1682        let inner_end = inner_start + close;
1683        let inner = self.lex.slice_range(inner_start, inner_end);
1684
1685        // Must contain ..
1686        let dot_pos = inner.find("..")?;
1687        if dot_pos == 0 {
1688            return None;
1689        }
1690        let first = &inner[..dot_pos];
1691        let rest = &inner[dot_pos + 2..];
1692        if rest.is_empty() {
1693            return None;
1694        }
1695
1696        // Check for optional step: first..end..step
1697        let (end_val, step_val) = if let Some(dot2) = rest.find("..") {
1698            if dot2 == 0 || dot2 + 2 >= rest.len() {
1699                return None;
1700            }
1701            (&rest[..dot2], Some(&rest[dot2 + 2..]))
1702        } else {
1703            (rest, None)
1704        };
1705
1706        // Validate: start and end must be integers or single alpha chars
1707        if !valid_range_val(first) || !valid_range_val(end_val) {
1708            return None;
1709        }
1710        if let Some(step) = step_val
1711            && step.parse::<i64>().is_err()
1712        {
1713            return None;
1714        }
1715
1716        // We need to return &'a str slices into the original input.
1717        // The inner string is a slice of input, so first/end_val/step_val are too.
1718        // But they were created from `inner` which is a slice. We need to compute
1719        // the actual input offsets.
1720        let first_start = inner_start;
1721        let first_end = inner_start + dot_pos;
1722        let end_start = inner_start + dot_pos + 2;
1723        let end_end = if step_val.is_some() {
1724            end_start + rest.find("..").expect("step_val implies second '..' exists")
1725        } else {
1726            inner_end
1727        };
1728        let step_range = step_val.map(|_| {
1729            let s = end_end + 2;
1730            (s, inner_end)
1731        });
1732
1733        // Advance past the }
1734        self.lex.bump_n(inner_end + 1 - start_pos);
1735
1736        // Check for adjacent brace expansion }{
1737        if self.lex.peek() == b'{' {
1738            self.rewind(start_pos);
1739            return None; // will be caught as unsupported
1740        }
1741
1742        let start_slice = self.lex.slice_range(first_start, first_end);
1743        let end_slice = self.lex.slice_range(end_start, end_end);
1744        let step_slice = step_range.map(|(s, e)| self.lex.slice_range(s, e));
1745
1746        Some(Atom::BraceRange {
1747            start: start_slice,
1748            end: end_slice,
1749            step: step_slice,
1750        })
1751    }
1752
1753    // -----------------------------------------------------------------------
1754    // Redirects
1755    // -----------------------------------------------------------------------
1756
1757    /// Try to parse a redirect at the current position.
1758    /// Returns None if not at a redirect operator.
1759    fn try_redirect(&mut self) -> Result<Option<Redir<'a>>, ParseError> {
1760        self.lex.skip_blanks();
1761
1762        // Read optional fd number
1763        let start = self.lex.pos();
1764        let fd_str = self.lex.read_number();
1765        let fd: Option<u16> = if fd_str.is_empty() {
1766            None
1767        } else {
1768            fd_str.parse().ok()
1769        };
1770
1771        let b = self.lex.peek();
1772        let b2 = self.lex.peek_at(1);
1773
1774        match (b, b2) {
1775            (b'<', b'<') if self.lex.peek_at(2) == b'<' => {
1776                // <<<  here-string
1777                self.lex.bump_n(3);
1778                self.lex.skip_blanks();
1779                let word = self.word()?;
1780                let _ = fd; // fd always 0 for here-strings
1781                Ok(Some(Redir::HereString(word)))
1782            }
1783            (b'<', b'<') => {
1784                // << or <<- heredoc
1785                self.lex.bump_n(2);
1786                let strip_tabs = self.lex.eat(b'-');
1787                self.lex.skip_blanks();
1788
1789                // Parse delimiter — check if quoted
1790                let (tag, quoted) = self.read_heredoc_delimiter()?;
1791
1792                // Save position, scan ahead to find body
1793                let save_pos = self.lex.pos();
1794                // Skip to end of current line
1795                while !self.lex.is_eof() && self.lex.peek() != b'\n' {
1796                    self.lex.bump();
1797                }
1798                if self.lex.peek() == b'\n' {
1799                    self.lex.bump();
1800                }
1801                // Read lines until delimiter
1802                let body_start = self.lex.pos();
1803                let mut body_end = body_start;
1804                let mut found = false;
1805                while !self.lex.is_eof() {
1806                    let line_start = self.lex.pos();
1807                    while !self.lex.is_eof() && self.lex.peek() != b'\n' {
1808                        self.lex.bump();
1809                    }
1810                    let line = self.lex.slice(line_start);
1811                    let trimmed = if strip_tabs {
1812                        line.trim_start_matches('\t')
1813                    } else {
1814                        line
1815                    };
1816                    if trimmed == tag {
1817                        body_end = line_start;
1818                        if self.lex.peek() == b'\n' {
1819                            self.lex.bump();
1820                        }
1821                        found = true;
1822                        break;
1823                    }
1824                    if self.lex.peek() == b'\n' {
1825                        self.lex.bump();
1826                    }
1827                }
1828                if !found {
1829                    return Err(self.lex.err("unterminated heredoc"));
1830                }
1831                let body = self.lex.slice_range(body_start, body_end);
1832                let after_heredoc = self.lex.pos();
1833                self.lex.set_pos(save_pos);
1834                self.heredoc_resume = Some(after_heredoc);
1835
1836                let heredoc_body = if quoted {
1837                    HeredocBody::Literal(body)
1838                } else {
1839                    // Parse body for variable/command expansions
1840                    let atoms = Parser::new(body).parse_heredoc_body()?;
1841                    HeredocBody::Interpolated(atoms)
1842                };
1843                let _ = (fd, strip_tabs); // fd always 0, tab-stripping not translated
1844                Ok(Some(Redir::Heredoc(heredoc_body)))
1845            }
1846            (b'<', b'>') => {
1847                // <>
1848                self.lex.bump_n(2);
1849                self.lex.skip_blanks();
1850                let word = self.word()?;
1851                Ok(Some(Redir::ReadWrite(fd, word)))
1852            }
1853            (b'<', b'&') => {
1854                // <&
1855                self.lex.bump_n(2);
1856                self.lex.skip_blanks();
1857                let word = self.word()?;
1858                Ok(Some(Redir::DupRead(fd, word)))
1859            }
1860            (b'<', b'(') if fd.is_none() => {
1861                // <( — process substitution, not a redirect
1862                self.rewind(start);
1863                Ok(None)
1864            }
1865            (b'<', _) => {
1866                // <
1867                self.lex.bump();
1868                self.lex.skip_blanks();
1869                let word = self.word()?;
1870                Ok(Some(Redir::Read(fd, word)))
1871            }
1872            (b'>', b'>') if self.lex.peek_at(2) == b'|' => {
1873                // Unusual, treat as >>|
1874                self.rewind(start);
1875                Ok(None)
1876            }
1877            (b'>', b'>') => {
1878                // >>
1879                self.lex.bump_n(2);
1880                self.lex.skip_blanks();
1881                let word = self.word()?;
1882                Ok(Some(Redir::Append(fd, word)))
1883            }
1884            (b'>', b'|') => {
1885                // >|
1886                self.lex.bump_n(2);
1887                self.lex.skip_blanks();
1888                let word = self.word()?;
1889                Ok(Some(Redir::Clobber(fd, word)))
1890            }
1891            (b'>', b'&') => {
1892                // >&
1893                self.lex.bump_n(2);
1894                self.lex.skip_blanks();
1895                let word = self.word()?;
1896                Ok(Some(Redir::DupWrite(fd, word)))
1897            }
1898            (b'>', b'(') if fd.is_none() => {
1899                // >( — process substitution, not a redirect
1900                self.rewind(start);
1901                Ok(None)
1902            }
1903            (b'>', _) => {
1904                // >
1905                self.lex.bump();
1906                self.lex.skip_blanks();
1907                let word = self.word()?;
1908                Ok(Some(Redir::Write(fd, word)))
1909            }
1910            (b'&', b'>') if fd.is_none() => {
1911                // &> or &>>
1912                self.lex.bump_n(2);
1913                if self.lex.eat(b'>') {
1914                    // &>>
1915                    self.lex.skip_blanks();
1916                    let word = self.word()?;
1917                    Ok(Some(Redir::AppendAll(word)))
1918                } else {
1919                    // &>
1920                    self.lex.skip_blanks();
1921                    let word = self.word()?;
1922                    Ok(Some(Redir::WriteAll(word)))
1923                }
1924            }
1925            _ => {
1926                // Not a redirect — rewind any consumed fd digits
1927                self.rewind(start);
1928                Ok(None)
1929            }
1930        }
1931    }
1932
1933    /// Collect any trailing redirects after a compound command.
1934    fn collect_redirects(&mut self) -> Result<Vec<Redir<'a>>, ParseError> {
1935        let mut redirects = Vec::new();
1936        loop {
1937            self.lex.skip_blanks();
1938            let Some(redir) = self.try_redirect()? else {
1939                break;
1940            };
1941            redirects.push(redir);
1942        }
1943        Ok(redirects)
1944    }
1945
1946    // -----------------------------------------------------------------------
1947    // Arithmetic (Pratt parser)
1948    // -----------------------------------------------------------------------
1949
1950    /// Parse an arithmetic expression with minimum precedence `min_prec`.
1951    pub(crate) fn arith(&mut self, min_prec: u8) -> Result<Arith<'a>, ParseError> {
1952        let mut left = self.arith_atom()?;
1953
1954        loop {
1955            self.lex.skip_blanks();
1956            if let Some((prec, op_len, constructor)) = self.arith_infix_op() {
1957                if prec < min_prec {
1958                    break;
1959                }
1960                self.lex.bump_n(op_len);
1961                let op_end = self.lex.pos();
1962                self.lex.skip_blanks();
1963
1964                // Ternary special case
1965                if op_len == 1 && self.lex.slice_range(op_end - 1, op_end) == "?" {
1966                    let then_val = self.arith(0)?;
1967                    self.lex.skip_blanks();
1968                    if !self.lex.eat(b':') {
1969                        return Err(self.lex.err("expected ':' in ternary"));
1970                    }
1971                    self.lex.skip_blanks();
1972                    let else_val = self.arith(0)?;
1973                    left = Arith::Ternary(Box::new(left), Box::new(then_val), Box::new(else_val));
1974                    continue;
1975                }
1976
1977                // Assignment special case
1978                if op_len == 1 && self.lex.slice_range(op_end - 1, op_end) == "=" {
1979                    if let Arith::Var(name) = left {
1980                        let right = self.arith(prec)?;
1981                        left = Arith::Assign(name, Box::new(right));
1982                        continue;
1983                    }
1984                    return Err(self.lex.err("expected variable for assignment"));
1985                }
1986
1987                let right = self.arith(prec + 1)?;
1988                left = constructor(Box::new(left), Box::new(right));
1989            } else {
1990                // Compound assignment: +=, -=, *=, /=, %=
1991                let b1 = self.lex.peek();
1992                let b2 = self.lex.peek_at(1);
1993                if b2 == b'='
1994                    && matches!(b1, b'+' | b'-' | b'*' | b'/' | b'%')
1995                    && let Arith::Var(name) = &left
1996                {
1997                    let name = *name;
1998                    let make_op: fn(Box<Arith<'a>>, Box<Arith<'a>>) -> Arith<'a> = match b1 {
1999                        b'+' => Arith::Add,
2000                        b'-' => Arith::Sub,
2001                        b'*' => Arith::Mul,
2002                        b'/' => Arith::Div,
2003                        _ => Arith::Rem,
2004                    };
2005                    self.lex.bump_n(2);
2006                    self.lex.skip_blanks();
2007                    let right = self.arith(0)?;
2008                    left = Arith::Assign(
2009                        name,
2010                        Box::new(make_op(Box::new(Arith::Var(name)), Box::new(right))),
2011                    );
2012                    continue;
2013                }
2014                break;
2015            }
2016        }
2017
2018        Ok(left)
2019    }
2020
2021    /// Parse an arithmetic atom (number, variable, prefix op, grouping).
2022    fn arith_atom(&mut self) -> Result<Arith<'a>, ParseError> {
2023        self.lex.skip_blanks();
2024
2025        match self.lex.peek() {
2026            b'(' => {
2027                self.lex.bump();
2028                let expr = self.arith(0)?;
2029                self.lex.skip_blanks();
2030                if !self.lex.eat(b')') {
2031                    return Err(self.lex.err("expected ')' in arithmetic"));
2032                }
2033                Ok(expr)
2034            }
2035            b'+' if self.lex.peek_at(1) == b'+' => {
2036                // ++var
2037                self.lex.bump_n(2);
2038                self.lex.skip_blanks();
2039                let name = self.lex.read_name();
2040                if name.is_empty() {
2041                    return Err(self.lex.err("expected variable after '++'"));
2042                }
2043                Ok(Arith::PreInc(name))
2044            }
2045            b'-' if self.lex.peek_at(1) == b'-' => {
2046                // --var
2047                self.lex.bump_n(2);
2048                self.lex.skip_blanks();
2049                let name = self.lex.read_name();
2050                if name.is_empty() {
2051                    return Err(self.lex.err("expected variable after '--'"));
2052                }
2053                Ok(Arith::PreDec(name))
2054            }
2055            b'+' => {
2056                self.lex.bump();
2057                let e = self.arith_atom()?;
2058                Ok(Arith::Pos(Box::new(e)))
2059            }
2060            b'-' => {
2061                self.lex.bump();
2062                let e = self.arith_atom()?;
2063                Ok(Arith::Neg(Box::new(e)))
2064            }
2065            b'!' => {
2066                self.lex.bump();
2067                let e = self.arith_atom()?;
2068                Ok(Arith::LogNot(Box::new(e)))
2069            }
2070            b'~' => {
2071                self.lex.bump();
2072                let e = self.arith_atom()?;
2073                Ok(Arith::BitNot(Box::new(e)))
2074            }
2075            b'$' => {
2076                self.lex.bump();
2077                // Handle $((expr)) as nested arithmetic substitution
2078                if self.lex.peek() == b'(' && self.lex.peek_at(1) == b'(' {
2079                    self.lex.bump_n(2);
2080                    let expr = self.arith(0)?;
2081                    self.lex.skip_blanks();
2082                    if !self.lex.eat(b')') || !self.lex.eat(b')') {
2083                        return Err(self.lex.err("expected '))' in nested arithmetic"));
2084                    }
2085                    return Ok(expr);
2086                }
2087                // Handle $(cmd) as command substitution in arithmetic — unsupported
2088                if self.lex.peek() == b'(' {
2089                    return Err(self
2090                        .lex
2091                        .err("unsupported: command substitution in arithmetic"));
2092                }
2093                let name = self.lex.read_name();
2094                if !name.is_empty() {
2095                    Ok(self.check_postfix(name))
2096                } else if self.lex.peek().is_ascii_digit() {
2097                    // Positional parameters: $1, $2, etc.
2098                    let start = self.lex.pos();
2099                    while self.lex.peek().is_ascii_digit() {
2100                        self.lex.bump();
2101                    }
2102                    Ok(Arith::Var(self.lex.slice(start)))
2103                } else {
2104                    Err(self.lex.err("expected variable after '$' in arithmetic"))
2105                }
2106            }
2107            b'0'..=b'9' => {
2108                let start = self.lex.pos();
2109                // Handle hex (0x/0X), octal (0), binary (0b/0B) prefixes
2110                if self.lex.peek() == b'0' {
2111                    self.lex.bump();
2112                    match self.lex.peek() {
2113                        b'x' | b'X' => {
2114                            self.lex.bump();
2115                            while self.lex.peek().is_ascii_hexdigit() {
2116                                self.lex.bump();
2117                            }
2118                            let s = self.lex.slice(start);
2119                            let n = i64::from_str_radix(&s[2..], 16).unwrap_or(0);
2120                            return Ok(Arith::Lit(n));
2121                        }
2122                        b'b' | b'B' => {
2123                            self.lex.bump();
2124                            while matches!(self.lex.peek(), b'0' | b'1') {
2125                                self.lex.bump();
2126                            }
2127                            let s = self.lex.slice(start);
2128                            let n = i64::from_str_radix(&s[2..], 2).unwrap_or(0);
2129                            return Ok(Arith::Lit(n));
2130                        }
2131                        _ => {} // fall through to read remaining digits (octal or decimal 0)
2132                    }
2133                }
2134                while self.lex.peek().is_ascii_digit() {
2135                    self.lex.bump();
2136                }
2137                let num_str = self.lex.slice(start);
2138                let n: i64 = if num_str.starts_with('0') && num_str.len() > 1 {
2139                    i64::from_str_radix(num_str, 8).unwrap_or(0) // octal
2140                } else {
2141                    num_str.parse().unwrap_or(0)
2142                };
2143                Ok(Arith::Lit(n))
2144            }
2145            _ => {
2146                let name = self.lex.read_name();
2147                if name.is_empty() {
2148                    Err(self.lex.err("expected arithmetic expression"))
2149                } else {
2150                    Ok(self.check_postfix(name))
2151                }
2152            }
2153        }
2154    }
2155
2156    /// Check for postfix ++ or -- after a variable name.
2157    #[inline]
2158    fn check_postfix(&mut self, name: &'a str) -> Arith<'a> {
2159        if self.lex.peek() == b'+' && self.lex.peek_at(1) == b'+' {
2160            self.lex.bump_n(2);
2161            Arith::PostInc(name)
2162        } else if self.lex.peek() == b'-' && self.lex.peek_at(1) == b'-' {
2163            self.lex.bump_n(2);
2164            Arith::PostDec(name)
2165        } else {
2166            Arith::Var(name)
2167        }
2168    }
2169
2170    /// Return the precedence, operator length, and constructor for a binary
2171    /// arithmetic infix operator at the current position.
2172    // Return type encodes (precedence, operator length, constructor) in one tuple
2173    // to avoid splitting into multiple functions that repeat the same match arms.
2174    #[allow(clippy::type_complexity)]
2175    fn arith_infix_op(
2176        &self,
2177    ) -> Option<(u8, usize, fn(Box<Arith<'a>>, Box<Arith<'a>>) -> Arith<'a>)> {
2178        let b1 = self.lex.peek();
2179        let b2 = self.lex.peek_at(1);
2180        let b3 = self.lex.peek_at(2);
2181
2182        // 3-char ops
2183        match (b1, b2, b3) {
2184            (b'<', b'<', b'=') | (b'>', b'>', b'=') => return None, // compound assignment, bail
2185            _ => {}
2186        }
2187
2188        // 2-char ops (check before 1-char)
2189        match (b1, b2) {
2190            (b'|', b'|') => return Some((1, 2, |l, r| Arith::LogOr(l, r))),
2191            (b'&', b'&') => return Some((2, 2, |l, r| Arith::LogAnd(l, r))),
2192            (b'=', b'=') => return Some((7, 2, |l, r| Arith::Eq(l, r))),
2193            (b'!', b'=') => return Some((7, 2, |l, r| Arith::Ne(l, r))),
2194            (b'<', b'=') => return Some((8, 2, |l, r| Arith::Le(l, r))),
2195            (b'>', b'=') => return Some((8, 2, |l, r| Arith::Ge(l, r))),
2196            (b'<', b'<') => return Some((9, 2, |l, r| Arith::Shl(l, r))),
2197            (b'>', b'>') => return Some((9, 2, |l, r| Arith::Shr(l, r))),
2198            (b'*', b'*') => return Some((13, 2, |l, r| Arith::Pow(l, r))),
2199            (b'+' | b'-' | b'*' | b'/' | b'%', b'=') => {
2200                return None; // compound assignment, bail
2201            }
2202            _ => {}
2203        }
2204
2205        // 1-char ops
2206        match b1 {
2207            b'|' => Some((3, 1, |l, r| Arith::BitOr(l, r))),
2208            b'^' => Some((4, 1, |l, r| Arith::BitXor(l, r))),
2209            b'&' => Some((5, 1, |l, r| Arith::BitAnd(l, r))),
2210            b'<' => Some((8, 1, |l, r| Arith::Lt(l, r))),
2211            b'>' => Some((8, 1, |l, r| Arith::Gt(l, r))),
2212            b'+' if b2 != b'+' => Some((10, 1, |l, r| Arith::Add(l, r))),
2213            b'-' if b2 != b'-' => Some((10, 1, |l, r| Arith::Sub(l, r))),
2214            b'*' if b2 != b'*' => Some((11, 1, |l, r| Arith::Mul(l, r))),
2215            b'/' => Some((11, 1, |l, r| Arith::Div(l, r))),
2216            b'%' => Some((11, 1, |l, r| Arith::Rem(l, r))),
2217            b'?' => Some((0, 1, |l, _| *l)), // placeholder — ternary handled in arith()
2218            b'=' if b2 != b'=' => Some((0, 1, |l, _| *l)), // placeholder — assignment handled in arith()
2219            _ => None,
2220        }
2221    }
2222
2223    // -----------------------------------------------------------------------
2224    // Helpers
2225    // -----------------------------------------------------------------------
2226
2227    #[inline]
2228    fn rewind(&mut self, pos: usize) {
2229        self.lex.set_pos(pos);
2230    }
2231
2232    fn expect(&mut self, kw: &[u8], msg: &'static str) -> Result<(), ParseError> {
2233        self.lex.skip_blanks();
2234        if self.lex.eat_str(kw) {
2235            Ok(())
2236        } else {
2237            Err(self.lex.err(msg))
2238        }
2239    }
2240
2241    #[inline]
2242    fn eat_separator(&mut self) {
2243        self.lex.skip_blanks();
2244        if self.lex.peek() == b';' || self.lex.peek() == b'\n' {
2245            self.lex.bump();
2246        }
2247    }
2248
2249    fn skip_separators(&mut self) {
2250        loop {
2251            self.lex.skip_blanks();
2252            match self.lex.peek() {
2253                b';' | b'\n' => self.lex.bump(),
2254                b'#' => self.lex.skip_comment(),
2255                _ => break,
2256            }
2257        }
2258    }
2259
2260    /// Read a heredoc delimiter. Returns `(tag, quoted)`.
2261    /// Quoted delimiters (`'EOF'`, `"EOF"`) suppress variable expansion.
2262    fn read_heredoc_delimiter(&mut self) -> Result<(&'a str, bool), ParseError> {
2263        match self.lex.peek() {
2264            b'\'' => {
2265                self.lex.bump();
2266                let tag = self.lex.scan_squote()?;
2267                Ok((tag, true))
2268            }
2269            b'"' => {
2270                self.lex.bump();
2271                let start = self.lex.pos();
2272                while !self.lex.is_eof() && self.lex.peek() != b'"' {
2273                    if self.lex.peek() == b'\\' {
2274                        self.lex.bump();
2275                    }
2276                    self.lex.bump();
2277                }
2278                let tag = self.lex.slice(start);
2279                if !self.lex.eat(b'"') {
2280                    return Err(self.lex.err("unterminated heredoc delimiter"));
2281                }
2282                Ok((tag, true))
2283            }
2284            _ => {
2285                let start = self.lex.pos();
2286                while !self.lex.is_eof() && !is_meta(self.lex.peek()) {
2287                    self.lex.bump();
2288                }
2289                let tag = self.lex.slice(start);
2290                if tag.is_empty() {
2291                    return Err(self.lex.err("expected heredoc delimiter"));
2292                }
2293                Ok((tag, false))
2294            }
2295        }
2296    }
2297
2298    #[inline]
2299    fn at_terminator(&self) -> bool {
2300        let b = self.lex.peek();
2301        b == 0
2302            || b == b'\n'
2303            || b == b';'
2304            || b == b')'
2305            || b == b'}'
2306            || b == b'|'
2307            || (b == b'&' && self.lex.peek_at(1) != b'>')
2308    }
2309
2310    /// Check if we're at a function definition: `NAME ()` or `function NAME`.
2311    fn at_func_def(&self) -> bool {
2312        if self.lex.at_keyword(b"function") {
2313            return true;
2314        }
2315        // Check for NAME() pattern
2316        let src = self.lex.remaining().as_bytes();
2317        if src.is_empty() || !(src[0].is_ascii_alphabetic() || src[0] == b'_') {
2318            return false;
2319        }
2320        let mut j = 1;
2321        while j < src.len() && (src[j].is_ascii_alphanumeric() || src[j] == b'_') {
2322            j += 1;
2323        }
2324        while j < src.len() && (src[j] == b' ' || src[j] == b'\t') {
2325            j += 1;
2326        }
2327        j < src.len() && src[j] == b'('
2328    }
2329}
2330
2331#[cfg(test)]
2332mod tests {
2333    use super::*;
2334
2335    fn parse(input: &str) -> Vec<Cmd<'_>> {
2336        Parser::new(input).parse().unwrap()
2337    }
2338
2339    fn parse_err(input: &str) -> ParseError {
2340        Parser::new(input).parse().unwrap_err()
2341    }
2342
2343    #[test]
2344    fn simple_command() {
2345        let cmds = parse("echo hello world");
2346        assert_eq!(cmds.len(), 1);
2347    }
2348
2349    #[test]
2350    fn pipeline() {
2351        let cmds = parse("cat file | grep foo | wc -l");
2352        assert_eq!(cmds.len(), 1);
2353    }
2354
2355    #[test]
2356    fn and_or_chain() {
2357        let cmds = parse("cmd1 && cmd2 || cmd3");
2358        assert_eq!(cmds.len(), 1);
2359        if let Cmd::List(ref list) = cmds[0] {
2360            assert_eq!(list.rest.len(), 2);
2361        } else {
2362            panic!("expected list");
2363        }
2364    }
2365
2366    #[test]
2367    fn background_job() {
2368        let cmds = parse("sleep 10 &");
2369        assert_eq!(cmds.len(), 1);
2370        assert!(matches!(cmds[0], Cmd::Job(_)));
2371    }
2372
2373    #[test]
2374    fn semicolon_separated() {
2375        let cmds = parse("echo a; echo b; echo c");
2376        assert_eq!(cmds.len(), 3);
2377    }
2378
2379    #[test]
2380    fn assignment() {
2381        let cmds = parse("FOO=bar");
2382        assert_eq!(cmds.len(), 1);
2383    }
2384
2385    #[test]
2386    fn export_assignment() {
2387        let cmds = parse("export PATH=/usr/bin:$PATH");
2388        assert_eq!(cmds.len(), 1);
2389    }
2390
2391    #[test]
2392    fn for_loop() {
2393        let cmds = parse("for i in a b c; do echo $i; done");
2394        assert_eq!(cmds.len(), 1);
2395    }
2396
2397    #[test]
2398    fn while_loop() {
2399        let cmds = parse("while true; do echo loop; done");
2400        assert_eq!(cmds.len(), 1);
2401    }
2402
2403    #[test]
2404    fn if_then_fi() {
2405        let cmds = parse("if test -f foo; then echo yes; fi");
2406        assert_eq!(cmds.len(), 1);
2407    }
2408
2409    #[test]
2410    fn if_else() {
2411        let cmds = parse("if test -f foo; then echo yes; else echo no; fi");
2412        assert_eq!(cmds.len(), 1);
2413    }
2414
2415    #[test]
2416    fn case_statement() {
2417        let cmds = parse("case $1 in foo) echo foo;; bar) echo bar;; esac");
2418        assert_eq!(cmds.len(), 1);
2419    }
2420
2421    #[test]
2422    fn brace_group() {
2423        let cmds = parse("{ echo a; echo b; }");
2424        assert_eq!(cmds.len(), 1);
2425    }
2426
2427    #[test]
2428    fn command_substitution() {
2429        let cmds = parse("echo $(whoami)");
2430        assert_eq!(cmds.len(), 1);
2431    }
2432
2433    #[test]
2434    fn arithmetic_substitution() {
2435        let cmds = parse("echo $((2 + 3))");
2436        assert_eq!(cmds.len(), 1);
2437    }
2438
2439    #[test]
2440    fn parameter_expansion_default() {
2441        let cmds = parse("echo ${HOME:-/tmp}");
2442        assert_eq!(cmds.len(), 1);
2443    }
2444
2445    #[test]
2446    fn single_quoted_word() {
2447        let cmds = parse("echo 'hello world'");
2448        assert_eq!(cmds.len(), 1);
2449    }
2450
2451    #[test]
2452    fn double_quoted_word() {
2453        let cmds = parse("echo \"hello $USER\"");
2454        assert_eq!(cmds.len(), 1);
2455    }
2456
2457    #[test]
2458    fn redirect_output() {
2459        let cmds = parse("echo hello >file.txt");
2460        assert_eq!(cmds.len(), 1);
2461    }
2462
2463    #[test]
2464    fn redirect_stderr() {
2465        let cmds = parse("cmd 2>/dev/null");
2466        assert_eq!(cmds.len(), 1);
2467    }
2468
2469    #[test]
2470    fn redirect_dup_write() {
2471        let cmds = parse("cmd 2>&1");
2472        assert_eq!(cmds.len(), 1);
2473    }
2474
2475    #[test]
2476    fn here_string() {
2477        let cmds = parse("cat <<< 'hello'");
2478        assert_eq!(cmds.len(), 1);
2479    }
2480
2481    #[test]
2482    fn write_all_redirect() {
2483        let cmds = parse("cmd &>file");
2484        assert_eq!(cmds.len(), 1);
2485    }
2486
2487    #[test]
2488    fn negated_pipeline() {
2489        let cmds = parse("! grep -q pattern file");
2490        assert_eq!(cmds.len(), 1);
2491    }
2492
2493    #[test]
2494    fn special_params() {
2495        let cmds = parse("echo $? $@ $# $$ $!");
2496        assert_eq!(cmds.len(), 1);
2497    }
2498
2499    #[test]
2500    fn positional_param() {
2501        let cmds = parse("echo $1 $2");
2502        assert_eq!(cmds.len(), 1);
2503    }
2504
2505    #[test]
2506    fn brace_range() {
2507        let cmds = parse("echo {1..5}");
2508        assert_eq!(cmds.len(), 1);
2509    }
2510
2511    #[test]
2512    fn brace_range_alpha() {
2513        let cmds = parse("echo {a..z}");
2514        assert_eq!(cmds.len(), 1);
2515    }
2516
2517    #[test]
2518    fn brace_range_with_step() {
2519        let cmds = parse("echo {1..10..2}");
2520        assert_eq!(cmds.len(), 1);
2521    }
2522
2523    #[test]
2524    fn double_bracket() {
2525        let cmds = parse("[[ -f /etc/hosts ]]");
2526        assert_eq!(cmds.len(), 1);
2527    }
2528
2529    #[test]
2530    fn double_bracket_with_and() {
2531        let cmds = parse("[[ -f a && -f b ]]");
2532        assert_eq!(cmds.len(), 1);
2533    }
2534
2535    #[test]
2536    fn standalone_arith() {
2537        let cmds = parse("(( i++ ))");
2538        assert_eq!(cmds.len(), 1);
2539    }
2540
2541    #[test]
2542    fn standalone_arith_assign() {
2543        let cmds = parse("(( x = 5 + 3 ))");
2544        assert_eq!(cmds.len(), 1);
2545    }
2546
2547    #[test]
2548    fn ansi_c_quoting() {
2549        let cmds = parse("echo $'hello\\nworld'");
2550        assert_eq!(cmds.len(), 1);
2551    }
2552
2553    #[test]
2554    fn ansi_c_quoting_escaped_squote() {
2555        let cmds = parse("echo $'it\\'s'");
2556        assert_eq!(cmds.len(), 1);
2557    }
2558
2559    #[test]
2560    fn env_prefix_command() {
2561        let cmds = parse("FOO=bar command");
2562        assert_eq!(cmds.len(), 1);
2563    }
2564
2565    #[test]
2566    fn nested_command_substitution() {
2567        let cmds = parse("echo $(basename $(pwd))");
2568        assert_eq!(cmds.len(), 1);
2569    }
2570
2571    #[test]
2572    fn glob_characters() {
2573        let cmds = parse("ls *.txt");
2574        assert_eq!(cmds.len(), 1);
2575    }
2576
2577    #[test]
2578    fn escaped_character() {
2579        let cmds = parse("echo hello\\ world");
2580        assert_eq!(cmds.len(), 1);
2581    }
2582
2583    #[test]
2584    fn param_expansion_trim_suffix() {
2585        let cmds = parse("echo ${file%.*}");
2586        assert_eq!(cmds.len(), 1);
2587    }
2588
2589    #[test]
2590    fn param_expansion_trim_prefix() {
2591        let cmds = parse("echo ${file##*/}");
2592        assert_eq!(cmds.len(), 1);
2593    }
2594
2595    #[test]
2596    fn comment() {
2597        let cmds = parse("echo hello # this is a comment\necho world");
2598        assert_eq!(cmds.len(), 2);
2599    }
2600
2601    #[test]
2602    fn function_def() {
2603        let cmds = parse("foo() { echo hello; }");
2604        assert_eq!(cmds.len(), 1);
2605    }
2606
2607    #[test]
2608    fn arithmetic_complex() {
2609        let cmds = parse("echo $((5 * (3 + 2)))");
2610        assert_eq!(cmds.len(), 1);
2611    }
2612
2613    #[test]
2614    fn param_expansion_len() {
2615        let cmds = parse("echo ${#HOME}");
2616        assert_eq!(cmds.len(), 1);
2617    }
2618
2619    #[test]
2620    fn append_redirect() {
2621        let cmds = parse("echo hello >>file.txt");
2622        assert_eq!(cmds.len(), 1);
2623    }
2624
2625    #[test]
2626    fn case_modification_upper() {
2627        let cmds = parse("echo ${var^^}");
2628        assert_eq!(cmds.len(), 1);
2629    }
2630
2631    #[test]
2632    fn case_modification_lower() {
2633        let cmds = parse("echo ${var,,}");
2634        assert_eq!(cmds.len(), 1);
2635    }
2636
2637    #[test]
2638    fn replace_first() {
2639        let cmds = parse("echo ${var/foo/bar}");
2640        assert_eq!(cmds.len(), 1);
2641    }
2642
2643    #[test]
2644    fn replace_all() {
2645        let cmds = parse("echo ${var//foo/bar}");
2646        assert_eq!(cmds.len(), 1);
2647    }
2648
2649    #[test]
2650    fn replace_prefix() {
2651        let cmds = parse("echo ${var/#foo/bar}");
2652        assert_eq!(cmds.len(), 1);
2653    }
2654
2655    #[test]
2656    fn replace_suffix() {
2657        let cmds = parse("echo ${var/%foo/bar}");
2658        assert_eq!(cmds.len(), 1);
2659    }
2660
2661    #[test]
2662    fn replace_delete() {
2663        let cmds = parse("echo ${var/foo}");
2664        assert_eq!(cmds.len(), 1);
2665    }
2666
2667    #[test]
2668    fn substring_offset() {
2669        let cmds = parse("echo ${var:2}");
2670        assert_eq!(cmds.len(), 1);
2671    }
2672
2673    #[test]
2674    fn substring_offset_length() {
2675        let cmds = parse("echo ${var:2:5}");
2676        assert_eq!(cmds.len(), 1);
2677    }
2678
2679    #[test]
2680    fn process_substitution() {
2681        let cmds = parse("diff <(sort a) <(sort b)");
2682        assert_eq!(cmds.len(), 1);
2683    }
2684
2685    #[test]
2686    fn process_substitution_out_error() {
2687        let err = parse_err("tee >(grep foo)");
2688        assert!(err.message().contains("output process substitution"));
2689    }
2690
2691    #[test]
2692    fn c_style_for() {
2693        let cmds = parse("for (( i=0; i<10; i++ )); do echo $i; done");
2694        assert_eq!(cmds.len(), 1);
2695    }
2696
2697    #[test]
2698    fn heredoc_quoted() {
2699        let cmds = parse("cat <<'EOF'\nhello world\nEOF");
2700        assert_eq!(cmds.len(), 1);
2701    }
2702
2703    #[test]
2704    fn heredoc_double_quoted() {
2705        let cmds = parse("cat <<\"EOF\"\nhello world\nEOF");
2706        assert_eq!(cmds.len(), 1);
2707    }
2708
2709    #[test]
2710    fn heredoc_unquoted() {
2711        let cmds = parse("cat <<EOF\nhello $NAME\nEOF");
2712        assert_eq!(cmds.len(), 1);
2713    }
2714
2715    #[test]
2716    fn case_fallthrough_error() {
2717        let err = parse_err("case $x in a) echo a;& b) echo b;; esac");
2718        assert!(err.message().contains("fallthrough"));
2719    }
2720
2721    #[test]
2722    fn case_continue_error() {
2723        let err = parse_err("case $x in a) echo a;;& b) echo b;; esac");
2724        assert!(err.message().contains(";;&"));
2725    }
2726
2727    #[test]
2728    fn prefix_list_star() {
2729        let cmds = parse("echo ${!BASH_*}");
2730        assert_eq!(cmds.len(), 1);
2731    }
2732
2733    #[test]
2734    fn prefix_list_at() {
2735        let cmds = parse("echo ${!MY@}");
2736        assert_eq!(cmds.len(), 1);
2737    }
2738
2739    #[test]
2740    fn select_error() {
2741        let err = parse_err("select opt in a b c; do echo $opt; done");
2742        assert!(err.message().contains("select"));
2743    }
2744}