Skip to main content

frost_parser/
parser.rs

1//! Recursive descent parser for zsh-compatible shell grammar.
2//!
3//! Transforms a flat token stream from `frost-lexer` into an AST
4//! defined in [`crate::ast`]. The parser is tolerant of errors —
5//! it never panics and produces the best AST it can from any input.
6
7use compact_str::CompactString;
8
9use frost_lexer::{Span, Token, TokenKind};
10
11use crate::ast::*;
12
13/// Recursive descent parser.
14pub struct Parser<'a> {
15    tokens: &'a [Token],
16    pos: usize,
17}
18
19impl<'a> Parser<'a> {
20    pub fn new(tokens: &'a [Token]) -> Self {
21        Self { tokens, pos: 0 }
22    }
23
24    // ── Helpers ──────────────────────────────────────────────────────
25
26    /// Peek at the current token without consuming it.
27    fn peek(&self) -> &Token {
28        &self.tokens[self.pos.min(self.tokens.len() - 1)]
29    }
30
31    /// Current token kind.
32    fn peek_kind(&self) -> TokenKind {
33        self.peek().kind
34    }
35
36    /// Peek N tokens ahead.
37    fn peek_nth(&self, n: usize) -> TokenKind {
38        self.tokens
39            .get(self.pos + n)
40            .map_or(TokenKind::Eof, |t| t.kind)
41    }
42
43    /// Whether we're at the given token kind.
44    fn at(&self, kind: TokenKind) -> bool {
45        self.peek_kind() == kind
46    }
47
48    /// Whether we're at EOF.
49    fn at_eof(&self) -> bool {
50        self.at(TokenKind::Eof)
51    }
52
53    /// Advance to the next token, returning the current one.
54    fn advance(&mut self) -> &'a Token {
55        let tok = &self.tokens[self.pos.min(self.tokens.len() - 1)];
56        if self.pos < self.tokens.len() - 1 {
57            self.pos += 1;
58        }
59        tok
60    }
61
62    /// Consume the current token if it matches `kind`.
63    fn eat(&mut self, kind: TokenKind) -> bool {
64        if self.at(kind) {
65            self.advance();
66            true
67        } else {
68            false
69        }
70    }
71
72    /// Expect and consume the current token. No-op if it doesn't match.
73    fn expect(&mut self, kind: TokenKind) -> bool {
74        if self.eat(kind) {
75            true
76        } else {
77            // Error recovery: don't advance — let caller decide.
78            false
79        }
80    }
81
82    /// Consume a keyword that might appear as either its dedicated TokenKind
83    /// or as a plain Word with matching text (the lexer only recognizes
84    /// reserved words in command position; in other positions they come
85    /// through as Word).
86    fn eat_keyword(&mut self, kind: TokenKind, text: &str) -> bool {
87        if self.at(kind) || (self.at(TokenKind::Word) && self.peek().text == text) {
88            self.advance();
89            true
90        } else {
91            false
92        }
93    }
94
95    /// Skip newlines and comments (linebreak in the grammar).
96    fn skip_newlines(&mut self) {
97        while matches!(self.peek_kind(), TokenKind::Newline | TokenKind::Comment) {
98            self.advance();
99        }
100    }
101
102    /// Parse a sub-program from tokens[start..self.pos], appending an Eof
103    /// token so the sub-parser terminates properly.
104    fn sub_parse(&self, start: usize) -> Program {
105        let inner = &self.tokens[start..self.pos];
106        if inner.is_empty() {
107            return Program { commands: Vec::new() };
108        }
109        // Build a vec with the inner tokens plus an Eof sentinel.
110        let mut sub_tokens: Vec<Token> = inner.to_vec();
111        let last_end = sub_tokens.last().map(|t| t.span.end).unwrap_or(0);
112        sub_tokens.push(Token {
113            kind: TokenKind::Eof,
114            text: CompactString::default(),
115            span: Span::new(last_end, last_end),
116        });
117        let mut sub_parser = Parser::new(&sub_tokens);
118        sub_parser.parse()
119    }
120
121    /// Eat separators (`;`, `\n`, comments) between commands.
122    fn eat_separators(&mut self) {
123        while matches!(
124            self.peek_kind(),
125            TokenKind::Semi | TokenKind::Newline | TokenKind::Comment
126        ) {
127            self.advance();
128        }
129    }
130
131    /// Whether we're at a token that can start a word.
132    fn at_word(&self) -> bool {
133        is_word_start(self.peek_kind())
134    }
135
136    /// Whether we're at a redirect operator.
137    fn at_redirect(&self) -> bool {
138        is_redirect_op(self.peek_kind())
139    }
140
141    /// Whether we're at a keyword that ends a compound command body.
142    fn at_compound_end(&self) -> bool {
143        let kind = self.peek_kind();
144        let text = self.peek().text.as_str();
145        matches!(
146            kind,
147            TokenKind::Then
148                | TokenKind::Elif
149                | TokenKind::Else
150                | TokenKind::Fi
151                | TokenKind::Do
152                | TokenKind::Done
153                | TokenKind::Esac
154                | TokenKind::RightBrace
155                | TokenKind::RightParen
156                | TokenKind::DoubleSemi
157                | TokenKind::SemiAnd
158                | TokenKind::SemiPipe
159        ) || (kind == TokenKind::Word
160            && matches!(
161                text,
162                "then" | "elif" | "else" | "fi" | "do" | "done" | "esac"
163            ))
164    }
165
166    /// Whether we're at a token that can begin a new command.
167    fn at_command_start(&self) -> bool {
168        self.at_word()
169            || self.at_redirect()
170            || matches!(
171                self.peek_kind(),
172                TokenKind::If
173                    | TokenKind::For
174                    | TokenKind::While
175                    | TokenKind::Until
176                    | TokenKind::Case
177                    | TokenKind::Select
178                    | TokenKind::Repeat
179                    | TokenKind::Function
180                    | TokenKind::Time
181                    | TokenKind::Coproc
182                    | TokenKind::LeftParen
183                    | TokenKind::LeftBrace
184                    | TokenKind::Bang
185            )
186            || (self.peek_kind() == TokenKind::Word
187                && matches!(
188                    self.peek().text.as_str(),
189                    "if" | "for" | "while" | "until" | "case" | "select" | "repeat"
190                        | "function" | "time" | "coproc"
191                ))
192    }
193
194    /// Check if the current position looks like `name() ...` (function def).
195    fn is_function_def_ahead(&self) -> bool {
196        self.peek_kind() == TokenKind::Word
197            && self.peek_nth(1) == TokenKind::LeftParen
198            && self.peek_nth(2) == TokenKind::RightParen
199    }
200
201    /// Check if the current position looks like an assignment: `NAME=...`
202    /// (word immediately followed by `=` with no whitespace).
203    fn is_assignment_ahead(&self) -> bool {
204        if self.peek_kind() != TokenKind::Word {
205            return false;
206        }
207        if !is_identifier(&self.peek().text) {
208            return false;
209        }
210        // Next token must be Equals, adjacent to the word (no space).
211        if self.peek_nth(1) != TokenKind::Equals {
212            return false;
213        }
214        // Adjacency check: word ends where equals starts.
215        if let Some(eq_tok) = self.tokens.get(self.pos + 1) {
216            self.peek().span.end == eq_tok.span.start
217        } else {
218            false
219        }
220    }
221
222    // ── Grammar Rules ───────────────────────────────────────────────
223
224    /// program → newline_list? complete_command_list? EOF
225    pub fn parse(&mut self) -> Program {
226        let mut commands = Vec::new();
227        self.skip_newlines();
228
229        while !self.at_eof() {
230            if let Some(cmd) = self.parse_complete_command() {
231                commands.push(cmd);
232            } else {
233                break;
234            }
235            self.eat_separators();
236        }
237
238        Program { commands }
239    }
240
241    /// compound_list — used inside compound commands (more lenient
242    /// with newlines as separators).
243    fn parse_compound_list(&mut self) -> Vec<CompleteCommand> {
244        let mut commands = Vec::new();
245        self.skip_newlines();
246
247        while !self.at_eof() && !self.at_compound_end() {
248            if let Some(cmd) = self.parse_complete_command() {
249                commands.push(cmd);
250            } else {
251                break;
252            }
253            self.eat_separators();
254        }
255
256        commands
257    }
258
259    /// complete_command → list [`&`]
260    fn parse_complete_command(&mut self) -> Option<CompleteCommand> {
261        self.skip_newlines();
262        if self.at_eof() || self.at_compound_end() {
263            return None;
264        }
265        if !self.at_command_start() {
266            return None;
267        }
268
269        let list = self.parse_list();
270        let is_async = self.eat(TokenKind::Ampersand) || self.eat(TokenKind::Disown);
271
272        Some(CompleteCommand { list, is_async })
273    }
274
275    /// list → pipeline ((`&&` | `||`) newline_list pipeline)*
276    fn parse_list(&mut self) -> List {
277        let first = self.parse_pipeline();
278        let mut rest = Vec::new();
279
280        loop {
281            let op = match self.peek_kind() {
282                TokenKind::AndAnd => ListOp::And,
283                TokenKind::OrOr => ListOp::Or,
284                _ => break,
285            };
286            self.advance();
287            self.skip_newlines();
288            let pipeline = self.parse_pipeline();
289            rest.push((op, pipeline));
290        }
291
292        List { first, rest }
293    }
294
295    /// pipeline → [`!`] command (`|` newline_list command)*
296    fn parse_pipeline(&mut self) -> Pipeline {
297        let bang = self.eat(TokenKind::Bang);
298        let first = self.parse_command();
299        let mut commands = vec![first];
300        let mut pipe_stderr = Vec::new();
301
302        loop {
303            match self.peek_kind() {
304                TokenKind::Pipe => {
305                    self.advance();
306                    pipe_stderr.push(false);
307                }
308                TokenKind::PipeAmpersand => {
309                    self.advance();
310                    pipe_stderr.push(true);
311                }
312                _ => break,
313            }
314            self.skip_newlines();
315            commands.push(self.parse_command());
316        }
317
318        Pipeline {
319            bang,
320            commands,
321            pipe_stderr,
322        }
323    }
324
325    /// command → compound_command | function_def | simple_command
326    fn parse_command(&mut self) -> Command {
327        // Check for compound commands by token kind or word text.
328        let kind = self.peek_kind();
329        let text = self.peek().text.as_str();
330
331        match kind {
332            TokenKind::If => return Command::If(Box::new(self.parse_if())),
333            TokenKind::For => return self.parse_for_or_arith_for(),
334            TokenKind::While => return Command::While(Box::new(self.parse_while())),
335            TokenKind::Until => return Command::Until(Box::new(self.parse_until())),
336            TokenKind::Case => return Command::Case(Box::new(self.parse_case())),
337            TokenKind::Select => return Command::Select(Box::new(self.parse_select())),
338            TokenKind::Repeat => return Command::Repeat(Box::new(self.parse_repeat())),
339            TokenKind::LeftParen => {
340                // Check if this is (( ... )) arithmetic command.
341                if self.pos + 1 < self.tokens.len()
342                    && self.tokens[self.pos + 1].kind == TokenKind::LeftParen
343                    && self.tokens[self.pos + 1].span.start == self.tokens[self.pos].span.end
344                {
345                    return self.parse_arith_command();
346                }
347                return Command::Subshell(self.parse_subshell());
348            }
349            TokenKind::LeftBrace => {
350                let bg = self.parse_brace_group();
351                // Check for `{ ... } always { ... }` construct.
352                if self.eat_keyword(TokenKind::Word, "always") {
353                    let always_bg = self.parse_brace_group();
354                    return Command::Always(Box::new(AlwaysClause {
355                        try_body: bg.body,
356                        always_body: always_bg.body,
357                        redirects: always_bg.redirects,
358                    }));
359                }
360                return Command::BraceGroup(bg);
361            }
362            TokenKind::Function => {
363                return Command::FunctionDef(Box::new(self.parse_function_keyword()));
364            }
365            TokenKind::Time => return Command::Time(Box::new(self.parse_time())),
366            TokenKind::Coproc => return Command::Coproc(Box::new(self.parse_coproc())),
367            TokenKind::DoubleLeftBracket | TokenKind::CondStart => {
368                return Command::Simple(self.parse_cond_command());
369            }
370            _ => {}
371        }
372
373        // Words that look like compound-command keywords but came through
374        // as Word (lexer didn't recognise them in non-command position).
375        if kind == TokenKind::Word {
376            match text {
377                "if" => return Command::If(Box::new(self.parse_if())),
378                "for" => return self.parse_for_or_arith_for(),
379                "while" => return Command::While(Box::new(self.parse_while())),
380                "until" => return Command::Until(Box::new(self.parse_until())),
381                "case" => return Command::Case(Box::new(self.parse_case())),
382                "select" => return Command::Select(Box::new(self.parse_select())),
383                "repeat" => return Command::Repeat(Box::new(self.parse_repeat())),
384                "function" => {
385                    return Command::FunctionDef(Box::new(self.parse_function_keyword()));
386                }
387                "time" => return Command::Time(Box::new(self.parse_time())),
388                "coproc" => return Command::Coproc(Box::new(self.parse_coproc())),
389                "[[" => return Command::Simple(self.parse_cond_command()),
390                _ => {}
391            }
392        }
393
394        // Check for `name() body` function definition.
395        if self.is_function_def_ahead() {
396            return Command::FunctionDef(Box::new(self.parse_function_shorthand()));
397        }
398
399        Command::Simple(self.parse_simple_command())
400    }
401
402    // ── Simple commands ─────────────────────────────────────────────
403
404    /// simple_command → assignment* (word | redirect)*
405    fn parse_simple_command(&mut self) -> SimpleCommand {
406        let mut assignments = Vec::new();
407        let mut words = Vec::new();
408        let mut redirects = Vec::new();
409
410        // Leading assignments (before any non-assignment word).
411        while self.is_assignment_ahead() {
412            assignments.push(self.parse_assignment());
413        }
414
415        // Words and redirects (interleaved).
416        loop {
417            if self.at_redirect() {
418                redirects.push(self.parse_redirect());
419            } else if self.at_word() || self.at(TokenKind::Equals) {
420                words.push(self.parse_word());
421            } else {
422                break;
423            }
424        }
425
426        SimpleCommand {
427            assignments,
428            words,
429            redirects,
430        }
431    }
432
433    /// Parse a variable assignment: `NAME=value` or `NAME+=value`.
434    fn parse_assignment(&mut self) -> Assignment {
435        let name_tok = self.advance(); // Word (identifier)
436        let name = name_tok.text.clone();
437        let span_start = name_tok.span.start;
438
439        // Consume `=`.
440        self.advance();
441        let op = AssignOp::Assign;
442
443        // Value is optional (bare `FOO=` is valid).
444        let eq_end = self.tokens[self.pos - 1].span.end;
445        let value = if self.pos < self.tokens.len()
446            && self.tokens[self.pos].span.start == eq_end
447            && (is_word_start(self.peek_kind()) || self.at(TokenKind::Equals))
448        {
449            Some(self.parse_word())
450        } else {
451            None
452        };
453
454        let span_end = self.peek().span.start;
455        Assignment {
456            name,
457            op,
458            value,
459            span: Span::new(span_start, span_end),
460        }
461    }
462
463    // ── Words ───────────────────────────────────────────────────────
464
465    /// Parse a word, joining adjacent tokens into a single Word node.
466    fn parse_word(&mut self) -> Word {
467        let start_span = self.peek().span;
468        let mut parts = Vec::new();
469
470        // Parse the first word part.
471        parts.push(self.parse_word_part());
472
473        // Continue if the next token is directly adjacent (no whitespace).
474        loop {
475            if self.pos >= self.tokens.len() || self.pos == 0 {
476                break;
477            }
478            let prev_end = self.tokens[self.pos - 1]
479                .span
480                .end;
481            let next_start = self.tokens[self.pos].span.start;
482
483            if prev_end != next_start {
484                break; // whitespace gap — separate word
485            }
486            if !is_word_start(self.peek_kind()) && !self.at(TokenKind::Equals) {
487                break; // not a word-like token
488            }
489
490            parts.push(self.parse_word_part());
491        }
492
493        Word {
494            parts,
495            span: start_span,
496        }
497    }
498
499    /// Parse a single word part from the current token.
500    fn parse_word_part(&mut self) -> WordPart {
501        match self.peek_kind() {
502            TokenKind::Dollar => {
503                let dollar = self.advance();
504                // Check if next token is adjacent and is a variable name or special char.
505                if self.pos < self.tokens.len()
506                    && self.tokens[self.pos].span.start == dollar.span.end
507                {
508                    let next = &self.tokens[self.pos];
509                    match next.kind {
510                        TokenKind::Word | TokenKind::Number => {
511                            let name_tok = self.advance();
512                            WordPart::DollarVar(name_tok.text.clone())
513                        }
514                        // Special variables: $?, $#, $@, $*, $!, $-, $$
515                        TokenKind::Question => {
516                            self.advance();
517                            WordPart::DollarVar(CompactString::new("?"))
518                        }
519                        TokenKind::Bang => {
520                            self.advance();
521                            WordPart::DollarVar(CompactString::new("!"))
522                        }
523                        TokenKind::Dollar => {
524                            self.advance();
525                            WordPart::DollarVar(CompactString::new("$"))
526                        }
527                        TokenKind::Star => {
528                            self.advance();
529                            WordPart::DollarVar(CompactString::new("*"))
530                        }
531                        TokenKind::At => {
532                            self.advance();
533                            WordPart::DollarVar(CompactString::new("@"))
534                        }
535                        _ => {
536                            // Check text for # and - which may be in Word tokens
537                            if next.text.as_str() == "#" || next.text.as_str() == "-" {
538                                let tok = self.advance();
539                                WordPart::DollarVar(tok.text.clone())
540                            } else {
541                                WordPart::Literal(CompactString::new("$"))
542                            }
543                        }
544                    }
545                } else {
546                    WordPart::Literal(CompactString::new("$"))
547                }
548            }
549
550            TokenKind::DollarBrace => {
551                self.advance(); // consume ${
552                let mut raw = CompactString::default();
553                // Consume tokens until }.
554                while !self.at(TokenKind::RightBrace) && !self.at_eof() {
555                    let tok = self.advance();
556                    raw.push_str(&tok.text);
557                }
558                self.eat(TokenKind::RightBrace);
559                // Handle ${#param} for string length.
560                if raw.starts_with('#') && raw.len() > 1 {
561                    let inner = &raw[1..];
562                    // ${#param} — string length
563                    WordPart::DollarBrace {
564                        param: CompactString::new(inner),
565                        operator: Some(CompactString::new("length")),
566                        arg: None,
567                    }
568                } else if let Some(op_pos) = find_param_operator(&raw) {
569                    let param = CompactString::new(&raw[..op_pos]);
570                    let (op, arg_start) = extract_operator(&raw[op_pos..]);
571                    let arg_str = &raw[op_pos + arg_start..];
572                    WordPart::DollarBrace {
573                        param,
574                        operator: Some(CompactString::new(op)),
575                        arg: if arg_str.is_empty() {
576                            None
577                        } else {
578                            Some(Box::new(Word {
579                                parts: vec![WordPart::Literal(CompactString::new(arg_str))],
580                                span: Span::new(0, 0),
581                            }))
582                        },
583                    }
584                } else {
585                    WordPart::DollarVar(CompactString::new(&raw))
586                }
587            }
588
589            TokenKind::DollarParen => {
590                self.advance(); // consume $(
591                // Find the matching ) — track nesting depth.
592                let start = self.pos;
593                let mut depth: u32 = 1;
594                while self.pos < self.tokens.len() {
595                    match self.peek_kind() {
596                        TokenKind::LeftParen | TokenKind::DollarParen => {
597                            depth += 1;
598                            self.advance();
599                        }
600                        TokenKind::RightParen => {
601                            depth -= 1;
602                            if depth == 0 {
603                                break;
604                            }
605                            self.advance();
606                        }
607                        TokenKind::Eof => break,
608                        _ => {
609                            self.advance();
610                        }
611                    }
612                }
613                // Parse the inner tokens as a sub-program.
614                // Must append an Eof token so the sub-parser terminates.
615                let program = self.sub_parse(start);
616                self.eat(TokenKind::RightParen);
617                WordPart::CommandSub(Box::new(program))
618            }
619
620            TokenKind::DollarDoubleParen => {
621                self.advance(); // consume $((
622                let mut expr = String::new();
623                // Consume until `)` `)`.
624                loop {
625                    if self.at_eof() {
626                        break;
627                    }
628                    if self.at(TokenKind::RightParen)
629                        && self.peek_nth(1) == TokenKind::RightParen
630                    {
631                        self.advance(); // first )
632                        self.advance(); // second )
633                        break;
634                    }
635                    expr.push_str(&self.advance().text);
636                }
637                WordPart::ArithSub(CompactString::new(&expr))
638            }
639
640            TokenKind::Backtick => {
641                self.advance(); // opening `
642                let start = self.pos;
643                while !self.at(TokenKind::Backtick) && !self.at_eof() {
644                    self.advance();
645                }
646                let program = self.sub_parse(start);
647                self.eat(TokenKind::Backtick);
648                WordPart::CommandSub(Box::new(program))
649            }
650
651            TokenKind::SingleQuoted => {
652                let tok = self.advance();
653                WordPart::SingleQuoted(strip_quotes(&tok.text, '\''))
654            }
655
656            TokenKind::DoubleQuoted => {
657                let tok = self.advance();
658                let inner = strip_quotes(&tok.text, '"');
659                let parts = parse_dq_interior(&inner);
660                WordPart::DoubleQuoted(parts)
661            }
662
663            TokenKind::DollarSingleQuoted => {
664                let tok = self.advance();
665                let s = &tok.text;
666                let inner = if s.len() >= 3 {
667                    CompactString::new(&s[2..s.len() - 1])
668                } else {
669                    CompactString::default()
670                };
671                WordPart::SingleQuoted(inner)
672            }
673
674            TokenKind::Tilde => {
675                self.advance();
676                WordPart::Tilde(CompactString::default())
677            }
678
679            TokenKind::Star => {
680                self.advance();
681                WordPart::Glob(GlobKind::Star)
682            }
683
684            TokenKind::Question => {
685                self.advance();
686                WordPart::Glob(GlobKind::Question)
687            }
688
689            TokenKind::At => {
690                self.advance();
691                WordPart::Glob(GlobKind::At)
692            }
693
694            TokenKind::Equals => {
695                self.advance();
696                WordPart::Literal(CompactString::new("="))
697            }
698
699            TokenKind::Bang => {
700                self.advance();
701                WordPart::Literal(CompactString::new("!"))
702            }
703
704            _ => {
705                let tok = self.advance();
706                WordPart::Literal(tok.text.clone())
707            }
708        }
709    }
710
711    // ── Redirections ────────────────────────────────────────────────
712
713    /// Parse a redirect operator and its target word.
714    fn parse_redirect(&mut self) -> Redirect {
715        let tok = self.advance();
716        let span = tok.span;
717
718        let (fd, op) = match tok.kind {
719            TokenKind::Less => (None, RedirectOp::Less),
720            TokenKind::Greater => (None, RedirectOp::Greater),
721            TokenKind::DoubleGreater => (None, RedirectOp::DoubleGreater),
722            TokenKind::GreaterPipe => (None, RedirectOp::GreaterPipe),
723            TokenKind::GreaterBang => (None, RedirectOp::GreaterBang),
724            TokenKind::AmpGreater => (None, RedirectOp::AmpGreater),
725            TokenKind::AmpDoubleGreater => (None, RedirectOp::AmpDoubleGreater),
726            TokenKind::DoubleLess => (None, RedirectOp::DoubleLess),
727            TokenKind::TripleLess => (None, RedirectOp::TripleLess),
728            TokenKind::DoubleLessDash => (None, RedirectOp::DoubleLessDash),
729            TokenKind::LessGreater => (None, RedirectOp::LessGreater),
730            TokenKind::FdGreater => {
731                let fd_num = tok.text.trim_end_matches('>').parse().ok();
732                (fd_num, RedirectOp::Greater)
733            }
734            TokenKind::FdLess => {
735                let fd_num = tok.text.trim_end_matches('<').parse().ok();
736                (fd_num, RedirectOp::Less)
737            }
738            TokenKind::FdDoubleGreater => {
739                let fd_num = tok.text.trim_end_matches(">>").parse().ok();
740                (fd_num, RedirectOp::DoubleGreater)
741            }
742            TokenKind::FdDup => {
743                let fd_num = tok.text.split(">&").next().and_then(|s| s.parse().ok());
744                (fd_num, RedirectOp::FdDup)
745            }
746            _ => (None, RedirectOp::Greater), // shouldn't happen
747        };
748
749        let target = if self.at_word() {
750            self.parse_word()
751        } else {
752            // Missing redirect target.
753            Word {
754                parts: vec![],
755                span: self.peek().span,
756            }
757        };
758
759        Redirect {
760            fd,
761            op,
762            target,
763            span,
764        }
765    }
766
767    /// Collect trailing redirects after a compound command.
768    fn parse_trailing_redirects(&mut self) -> Vec<Redirect> {
769        let mut redirects = Vec::new();
770        while self.at_redirect() {
771            redirects.push(self.parse_redirect());
772        }
773        redirects
774    }
775
776    // ── Compound commands ───────────────────────────────────────────
777
778    /// if_clause → `if` compound_list `then` compound_list
779    ///             (`elif` compound_list `then` compound_list)*
780    ///             [`else` compound_list] `fi`
781    fn parse_if(&mut self) -> IfClause {
782        self.advance(); // consume `if` (keyword or word)
783        let condition = self.parse_compound_list();
784        self.eat_keyword(TokenKind::Then, "then");
785        let then_body = self.parse_compound_list();
786
787        let mut elifs = Vec::new();
788        while self.eat_keyword(TokenKind::Elif, "elif") {
789            let elif_cond = self.parse_compound_list();
790            self.eat_keyword(TokenKind::Then, "then");
791            let elif_body = self.parse_compound_list();
792            elifs.push((elif_cond, elif_body));
793        }
794
795        let else_body = if self.eat_keyword(TokenKind::Else, "else") {
796            Some(self.parse_compound_list())
797        } else {
798            None
799        };
800
801        self.eat_keyword(TokenKind::Fi, "fi");
802        let redirects = self.parse_trailing_redirects();
803
804        IfClause {
805            condition,
806            then_body,
807            elifs,
808            else_body,
809            redirects,
810        }
811    }
812
813    /// Dispatch between `for var in ...` and `for (( ... ))`.
814    fn parse_for_or_arith_for(&mut self) -> Command {
815        // After `for`, check if the next tokens are `((` (adjacent parens).
816        let after_for = self.pos + 1;
817        let is_arith = after_for + 1 < self.tokens.len()
818            && self.tokens[after_for].kind == TokenKind::LeftParen
819            && self.tokens[after_for + 1].kind == TokenKind::LeftParen
820            && self.tokens[after_for + 1].span.start == self.tokens[after_for].span.end;
821
822        if is_arith {
823            Command::ArithFor(Box::new(self.parse_arith_for()))
824        } else {
825            Command::For(Box::new(self.parse_for()))
826        }
827    }
828
829    /// arith_for → `for` `((` init `;` cond `;` step `))` separator `do` compound_list `done`
830    fn parse_arith_for(&mut self) -> ArithForClause {
831        self.advance(); // consume `for`
832        self.advance(); // first (
833        self.advance(); // second (
834
835        // Collect tokens into three expressions separated by `;`.
836        // We track paren depth to handle nested parens within expressions.
837        let mut parts: Vec<String> = Vec::new();
838        let mut current = String::new();
839        let mut depth: u32 = 0;
840
841        loop {
842            if self.at_eof() {
843                break;
844            }
845            let kind = self.peek_kind();
846            match kind {
847                TokenKind::LeftParen => {
848                    depth += 1;
849                    current.push('(');
850                    self.advance();
851                }
852                TokenKind::RightParen => {
853                    if depth == 0 {
854                        // End of (( ... )) — push remaining expression
855                        parts.push(std::mem::take(&mut current));
856                        self.advance(); // first )
857                        if self.peek_kind() == TokenKind::RightParen {
858                            self.advance(); // second )
859                        }
860                        break;
861                    }
862                    depth -= 1;
863                    current.push(')');
864                    self.advance();
865                }
866                TokenKind::Semi => {
867                    if depth == 0 {
868                        // Separator between init/cond/step
869                        parts.push(std::mem::take(&mut current));
870                        self.advance();
871                    } else {
872                        current.push(';');
873                        self.advance();
874                    }
875                }
876                _ => {
877                    let tok = self.advance();
878                    // Add spacing between tokens, but not between adjacent
879                    // operator characters (same logic as parse_arith_command).
880                    if !current.is_empty() {
881                        let last = current.as_bytes()[current.len() - 1];
882                        let first = tok.text.as_bytes().first().copied().unwrap_or(0);
883                        let is_op = |c: u8| {
884                            matches!(
885                                c,
886                                b'=' | b'!'
887                                    | b'<'
888                                    | b'>'
889                                    | b'+'
890                                    | b'-'
891                                    | b'*'
892                                    | b'/'
893                                    | b'%'
894                                    | b'&'
895                                    | b'|'
896                                    | b'^'
897                                    | b'~'
898                            )
899                        };
900                        if !(is_op(last) && is_op(first)) && !current.ends_with(' ') {
901                            current.push(' ');
902                        }
903                    }
904                    current.push_str(&tok.text);
905                }
906            }
907        }
908
909        // Ensure we have exactly 3 parts (init, condition, step).
910        while parts.len() < 3 {
911            parts.push(String::new());
912        }
913
914        // Eat optional separator (`;` or newline) between `))` and `do`.
915        if !self.eat(TokenKind::Semi) {
916            self.skip_newlines();
917        }
918        self.skip_newlines();
919
920        self.eat_keyword(TokenKind::Do, "do");
921        let body = self.parse_compound_list();
922        self.eat_keyword(TokenKind::Done, "done");
923        let redirects = self.parse_trailing_redirects();
924
925        ArithForClause {
926            init: CompactString::new(parts[0].trim()),
927            condition: CompactString::new(parts[1].trim()),
928            step: CompactString::new(parts[2].trim()),
929            body,
930            redirects,
931        }
932    }
933
934    /// for_clause → `for` name [`in` word*] separator `do` compound_list `done`
935    fn parse_for(&mut self) -> ForClause {
936        self.advance(); // consume `for`
937        let var_tok = self.advance();
938        let var = var_tok.text.clone();
939        self.skip_newlines();
940
941        let words = if self.eat_keyword(TokenKind::In, "in") {
942            let mut words = Vec::new();
943            while self.at_word() {
944                words.push(self.parse_word());
945            }
946            Some(words)
947        } else {
948            None
949        };
950
951        // Eat separator between word list and `do`.
952        if !self.eat(TokenKind::Semi) {
953            self.skip_newlines();
954        }
955        self.skip_newlines();
956
957        self.eat_keyword(TokenKind::Do, "do");
958        let body = self.parse_compound_list();
959        self.eat_keyword(TokenKind::Done, "done");
960        let redirects = self.parse_trailing_redirects();
961
962        ForClause {
963            var,
964            words,
965            body,
966            redirects,
967        }
968    }
969
970    /// while_clause → `while` compound_list `do` compound_list `done`
971    fn parse_while(&mut self) -> WhileClause {
972        self.advance(); // consume `while`
973        let condition = self.parse_compound_list();
974        self.eat_keyword(TokenKind::Do, "do");
975        let body = self.parse_compound_list();
976        self.eat_keyword(TokenKind::Done, "done");
977        let redirects = self.parse_trailing_redirects();
978
979        WhileClause {
980            condition,
981            body,
982            redirects,
983        }
984    }
985
986    /// until_clause → `until` compound_list `do` compound_list `done`
987    fn parse_until(&mut self) -> UntilClause {
988        self.advance(); // consume `until`
989        let condition = self.parse_compound_list();
990        self.eat_keyword(TokenKind::Do, "do");
991        let body = self.parse_compound_list();
992        self.eat_keyword(TokenKind::Done, "done");
993        let redirects = self.parse_trailing_redirects();
994
995        UntilClause {
996            condition,
997            body,
998            redirects,
999        }
1000    }
1001
1002    /// case_clause → `case` word newline_list `in` newline_list case_item* `esac`
1003    fn parse_case(&mut self) -> CaseClause {
1004        self.advance(); // consume `case`
1005        let word = self.parse_word();
1006        self.skip_newlines();
1007        self.eat_keyword(TokenKind::In, "in");
1008        self.skip_newlines();
1009
1010        let mut items = Vec::new();
1011        while !self.eat_keyword(TokenKind::Esac, "esac") && !self.at_eof() {
1012            items.push(self.parse_case_item());
1013            self.skip_newlines();
1014        }
1015
1016        let redirects = self.parse_trailing_redirects();
1017
1018        CaseClause {
1019            word,
1020            items,
1021            redirects,
1022        }
1023    }
1024
1025    /// case_item → [`(`] pattern (`|` pattern)* `)` compound_list? (`;;`|`;&`|`;|`)
1026    fn parse_case_item(&mut self) -> CaseItem {
1027        self.eat(TokenKind::LeftParen); // optional leading (
1028
1029        let mut patterns = vec![self.parse_word()];
1030        while self.eat(TokenKind::Pipe) {
1031            patterns.push(self.parse_word());
1032        }
1033
1034        self.expect(TokenKind::RightParen);
1035        self.skip_newlines();
1036
1037        let body = self.parse_compound_list();
1038
1039        let terminator = match self.peek_kind() {
1040            TokenKind::DoubleSemi => {
1041                self.advance();
1042                CaseTerminator::DoubleSemi
1043            }
1044            TokenKind::SemiAnd => {
1045                self.advance();
1046                CaseTerminator::SemiAnd
1047            }
1048            TokenKind::SemiPipe => {
1049                self.advance();
1050                CaseTerminator::SemiPipe
1051            }
1052            _ => CaseTerminator::DoubleSemi,
1053        };
1054
1055        CaseItem {
1056            patterns,
1057            body,
1058            terminator,
1059        }
1060    }
1061
1062    /// select_clause → `select` name [`in` word*] separator `do` compound_list `done`
1063    fn parse_select(&mut self) -> SelectClause {
1064        self.advance(); // consume `select`
1065        let var_tok = self.advance();
1066        let var = var_tok.text.clone();
1067        self.skip_newlines();
1068
1069        let words = if self.eat_keyword(TokenKind::In, "in") {
1070            let mut words = Vec::new();
1071            while self.at_word() {
1072                words.push(self.parse_word());
1073            }
1074            Some(words)
1075        } else {
1076            None
1077        };
1078
1079        if !self.eat(TokenKind::Semi) {
1080            self.skip_newlines();
1081        }
1082        self.skip_newlines();
1083
1084        self.eat_keyword(TokenKind::Do, "do");
1085        let body = self.parse_compound_list();
1086        self.eat_keyword(TokenKind::Done, "done");
1087        let redirects = self.parse_trailing_redirects();
1088
1089        SelectClause {
1090            var,
1091            words,
1092            body,
1093            redirects,
1094        }
1095    }
1096
1097    /// repeat_clause → `repeat` word separator `do` compound_list `done`
1098    fn parse_repeat(&mut self) -> RepeatClause {
1099        self.advance(); // consume `repeat`
1100        let count = self.parse_word();
1101        self.eat_separators();
1102
1103        self.eat_keyword(TokenKind::Do, "do");
1104        let body = self.parse_compound_list();
1105        self.eat_keyword(TokenKind::Done, "done");
1106        let redirects = self.parse_trailing_redirects();
1107
1108        RepeatClause {
1109            count,
1110            body,
1111            redirects,
1112        }
1113    }
1114
1115    /// cond_command → `[[` ... `]]` — parse as a simple command.
1116    fn parse_cond_command(&mut self) -> SimpleCommand {
1117        let start_span = self.peek().span;
1118        self.advance(); // consume [[
1119
1120        let mut words = vec![Word {
1121            parts: vec![WordPart::Literal(CompactString::new("[["))],
1122            span: start_span,
1123        }];
1124
1125        // Consume tokens until ]] or EOF.
1126        loop {
1127            if self.at_eof() {
1128                break;
1129            }
1130            let kind = self.peek_kind();
1131            if kind == TokenKind::DoubleRightBracket || kind == TokenKind::CondEnd {
1132                let tok = self.advance();
1133                words.push(Word {
1134                    parts: vec![WordPart::Literal(CompactString::new("]]"))],
1135                    span: tok.span,
1136                });
1137                break;
1138            }
1139            // Check for word text "]]"
1140            if self.peek().text.as_str() == "]]" {
1141                let tok = self.advance();
1142                words.push(Word {
1143                    parts: vec![WordPart::Literal(CompactString::new("]]"))],
1144                    span: tok.span,
1145                });
1146                break;
1147            }
1148            words.push(self.parse_word());
1149        }
1150
1151        let redirects = self.parse_trailing_redirects();
1152
1153        SimpleCommand {
1154            assignments: Vec::new(),
1155            words,
1156            redirects,
1157        }
1158    }
1159
1160    /// arith_command → `((` ... `))` — parse as a simple command.
1161    fn parse_arith_command(&mut self) -> Command {
1162        let start_span = self.peek().span;
1163        self.advance(); // first (
1164        self.advance(); // second (
1165
1166        // Collect the expression text until ))
1167        let mut expr = String::new();
1168        let mut depth: u32 = 0;
1169        loop {
1170            if self.at_eof() {
1171                break;
1172            }
1173            let kind = self.peek_kind();
1174            match kind {
1175                TokenKind::LeftParen => {
1176                    depth += 1;
1177                    expr.push('(');
1178                    self.advance();
1179                }
1180                TokenKind::RightParen => {
1181                    if depth == 0 {
1182                        self.advance(); // first )
1183                        // Expect second )
1184                        if self.peek_kind() == TokenKind::RightParen {
1185                            self.advance();
1186                        }
1187                        break;
1188                    }
1189                    depth -= 1;
1190                    expr.push(')');
1191                    self.advance();
1192                }
1193                _ => {
1194                    let tok = self.advance();
1195                    // Add a space between tokens, BUT not between adjacent
1196                    // operator characters that form compound operators like
1197                    // ==, !=, <=, >=, +=, -=, *=, /=, %=, &&, ||, ++, --, <<, >>
1198                    if !expr.is_empty() {
1199                        let last = expr.as_bytes()[expr.len() - 1];
1200                        let first = tok.text.as_bytes().first().copied().unwrap_or(0);
1201                        let is_op = |c: u8| matches!(c, b'=' | b'!' | b'<' | b'>' | b'+' | b'-' | b'*' | b'/' | b'%' | b'&' | b'|' | b'^' | b'~');
1202                        if !(is_op(last) && is_op(first)) && !expr.ends_with(' ') {
1203                            expr.push(' ');
1204                        }
1205                    }
1206                    expr.push_str(&tok.text);
1207                }
1208            }
1209        }
1210
1211        // Create a simple command: (( expr ))
1212        let words = vec![
1213            Word {
1214                parts: vec![WordPart::Literal(CompactString::new("(("))],
1215                span: start_span,
1216            },
1217            Word {
1218                parts: vec![WordPart::Literal(CompactString::new(&expr))],
1219                span: start_span,
1220            },
1221            Word {
1222                parts: vec![WordPart::Literal(CompactString::new("))"))],
1223                span: start_span,
1224            },
1225        ];
1226
1227        Command::Simple(SimpleCommand {
1228            assignments: Vec::new(),
1229            words,
1230            redirects: Vec::new(),
1231        })
1232    }
1233
1234    /// subshell → `(` compound_list `)`
1235    fn parse_subshell(&mut self) -> Subshell {
1236        self.expect(TokenKind::LeftParen);
1237        let body = self.parse_compound_list();
1238        self.expect(TokenKind::RightParen);
1239        let redirects = self.parse_trailing_redirects();
1240        Subshell { body, redirects }
1241    }
1242
1243    /// brace_group → `{` compound_list `}`
1244    fn parse_brace_group(&mut self) -> BraceGroup {
1245        self.expect(TokenKind::LeftBrace);
1246        let body = self.parse_compound_list();
1247        self.expect(TokenKind::RightBrace);
1248        let redirects = self.parse_trailing_redirects();
1249        BraceGroup { body, redirects }
1250    }
1251
1252    /// function_def → `function` name [`(` `)`] newline_list command
1253    fn parse_function_keyword(&mut self) -> FunctionDef {
1254        self.advance(); // consume `function`
1255        let name_tok = self.advance();
1256        let name = name_tok.text.clone();
1257
1258        // Optional ( )
1259        if self.eat(TokenKind::LeftParen) {
1260            self.expect(TokenKind::RightParen);
1261        }
1262        self.skip_newlines();
1263
1264        let body = self.parse_command();
1265        let redirects = self.parse_trailing_redirects();
1266
1267        FunctionDef {
1268            name,
1269            body,
1270            redirects,
1271        }
1272    }
1273
1274    /// function_def → name `(` `)` newline_list command
1275    fn parse_function_shorthand(&mut self) -> FunctionDef {
1276        let name_tok = self.advance();
1277        let name = name_tok.text.clone();
1278        self.expect(TokenKind::LeftParen);
1279        self.expect(TokenKind::RightParen);
1280        self.skip_newlines();
1281
1282        let body = self.parse_command();
1283        let redirects = self.parse_trailing_redirects();
1284
1285        FunctionDef {
1286            name,
1287            body,
1288            redirects,
1289        }
1290    }
1291
1292    /// time_clause → `time` pipeline
1293    fn parse_time(&mut self) -> TimeClause {
1294        self.advance(); // consume `time`
1295        let pipeline = self.parse_pipeline();
1296        TimeClause { pipeline }
1297    }
1298
1299    /// coproc → `coproc` [name] command
1300    fn parse_coproc(&mut self) -> Coproc {
1301        self.advance(); // consume `coproc`
1302
1303        let name = if self.peek_kind() == TokenKind::Word
1304            && is_identifier(&self.peek().text)
1305            && !matches!(
1306                self.peek_nth(1),
1307                TokenKind::Eof
1308                    | TokenKind::Semi
1309                    | TokenKind::Newline
1310                    | TokenKind::Pipe
1311                    | TokenKind::AndAnd
1312                    | TokenKind::OrOr
1313                    | TokenKind::Ampersand
1314            )
1315        {
1316            let name_tok = self.advance();
1317            Some(name_tok.text.clone())
1318        } else {
1319            None
1320        };
1321
1322        let command = self.parse_command();
1323        Coproc { name, command }
1324    }
1325}
1326
1327// ── Free functions ──────────────────────────────────────────────────
1328
1329/// Whether a token kind can begin a word or be part of one.
1330fn is_word_start(kind: TokenKind) -> bool {
1331    matches!(
1332        kind,
1333        TokenKind::Word
1334            | TokenKind::SingleQuoted
1335            | TokenKind::DoubleQuoted
1336            | TokenKind::DollarSingleQuoted
1337            | TokenKind::Number
1338            | TokenKind::Dollar
1339            | TokenKind::DollarBrace
1340            | TokenKind::DollarParen
1341            | TokenKind::DollarDoubleParen
1342            | TokenKind::Backtick
1343            | TokenKind::Tilde
1344            | TokenKind::Star
1345            | TokenKind::Question
1346            | TokenKind::At
1347            | TokenKind::Bang
1348    )
1349}
1350
1351/// Whether a token kind is a redirect operator.
1352fn is_redirect_op(kind: TokenKind) -> bool {
1353    matches!(
1354        kind,
1355        TokenKind::Less
1356            | TokenKind::Greater
1357            | TokenKind::DoubleGreater
1358            | TokenKind::GreaterPipe
1359            | TokenKind::GreaterBang
1360            | TokenKind::AmpGreater
1361            | TokenKind::AmpDoubleGreater
1362            | TokenKind::DoubleLess
1363            | TokenKind::TripleLess
1364            | TokenKind::DoubleLessDash
1365            | TokenKind::LessGreater
1366            | TokenKind::FdGreater
1367            | TokenKind::FdLess
1368            | TokenKind::FdDoubleGreater
1369            | TokenKind::FdDup
1370    )
1371}
1372
1373/// Whether a string is a valid shell identifier (`[a-zA-Z_][a-zA-Z0-9_]*`).
1374fn is_identifier(s: &str) -> bool {
1375    let mut chars = s.chars();
1376    match chars.next() {
1377        Some(c) if c == '_' || c.is_ascii_alphabetic() => {}
1378        _ => return false,
1379    }
1380    chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1381}
1382
1383/// Strip matching quote characters from the start and end of a string.
1384fn strip_quotes(s: &str, quote: char) -> CompactString {
1385    if s.starts_with(quote) && s.ends_with(quote) && s.len() >= 2 {
1386        CompactString::new(&s[1..s.len() - 1])
1387    } else {
1388        CompactString::new(s)
1389    }
1390}
1391
1392/// Parse the interior of a double-quoted string, extracting `$VAR`,
1393/// `${param}`, `$(cmd)`, `$((expr))`, and backtick command substitutions.
1394fn parse_dq_interior(s: &str) -> Vec<WordPart> {
1395    let mut parts = Vec::new();
1396    let bytes = s.as_bytes();
1397    let mut i = 0;
1398    let mut literal_start = 0;
1399
1400    while i < bytes.len() {
1401        if bytes[i] == b'\\' && i + 1 < bytes.len() {
1402            // Escaped char — include it literally.
1403            i += 2;
1404            continue;
1405        }
1406
1407        if bytes[i] == b'$' {
1408            // Flush accumulated literal.
1409            if i > literal_start {
1410                parts.push(WordPart::Literal(CompactString::new(&s[literal_start..i])));
1411            }
1412
1413            if i + 1 >= bytes.len() {
1414                // Bare `$` at end.
1415                parts.push(WordPart::Literal(CompactString::new("$")));
1416                i += 1;
1417                literal_start = i;
1418                continue;
1419            }
1420
1421            match bytes[i + 1] {
1422                b'{' => {
1423                    // ${...}
1424                    let start = i + 2;
1425                    let mut depth = 1;
1426                    let mut j = start;
1427                    while j < bytes.len() && depth > 0 {
1428                        if bytes[j] == b'{' {
1429                            depth += 1;
1430                        } else if bytes[j] == b'}' {
1431                            depth -= 1;
1432                        }
1433                        if depth > 0 {
1434                            j += 1;
1435                        }
1436                    }
1437                    let inner = &s[start..j];
1438                    // Parse operator if present.
1439                    if let Some(op_pos) = find_param_operator(inner) {
1440                        let param = CompactString::new(&inner[..op_pos]);
1441                        let (op, arg_start) = extract_operator(&inner[op_pos..]);
1442                        let arg_str = &inner[op_pos + arg_start..];
1443                        parts.push(WordPart::DollarBrace {
1444                            param,
1445                            operator: Some(CompactString::new(op)),
1446                            arg: if arg_str.is_empty() {
1447                                None
1448                            } else {
1449                                Some(Box::new(Word {
1450                                    parts: vec![WordPart::Literal(CompactString::new(arg_str))],
1451                                    span: Span::new(0, 0),
1452                                }))
1453                            },
1454                        });
1455                    } else {
1456                        parts.push(WordPart::DollarVar(CompactString::new(inner)));
1457                    }
1458                    i = j + 1;
1459                    literal_start = i;
1460                }
1461                b'(' => {
1462                    if i + 2 < bytes.len() && bytes[i + 2] == b'(' {
1463                        // $((...)) arithmetic substitution.
1464                        let start = i + 3;
1465                        let mut j = start;
1466                        while j + 1 < bytes.len() && !(bytes[j] == b')' && bytes[j + 1] == b')') {
1467                            j += 1;
1468                        }
1469                        let expr = &s[start..j];
1470                        parts.push(WordPart::ArithSub(CompactString::new(expr)));
1471                        i = j + 2;
1472                    } else {
1473                        // $(...) command substitution.
1474                        let start = i + 2;
1475                        let mut depth = 1;
1476                        let mut j = start;
1477                        while j < bytes.len() && depth > 0 {
1478                            if bytes[j] == b'(' {
1479                                depth += 1;
1480                            } else if bytes[j] == b')' {
1481                                depth -= 1;
1482                            }
1483                            if depth > 0 {
1484                                j += 1;
1485                            }
1486                        }
1487                        let inner = &s[start..j];
1488                        // Parse the inner command as a sub-program.
1489                        let tokens = frost_lexer::lexer::tokenize(inner.as_bytes());
1490                        let mut sub_parser = Parser::new(&tokens);
1491                        let program = sub_parser.parse();
1492                        parts.push(WordPart::CommandSub(Box::new(program)));
1493                        i = j + 1;
1494                    }
1495                    literal_start = i;
1496                }
1497                c if c.is_ascii_alphanumeric() || c == b'_' || c == b'?' || c == b'#'
1498                    || c == b'@' || c == b'*' || c == b'!' || c == b'-' || c == b'$' => {
1499                    // $VAR or special var ($?, $$, $#, $@, $*, $!, $-)
1500                    if matches!(c, b'?' | b'#' | b'@' | b'*' | b'!' | b'-' | b'$') {
1501                        parts.push(WordPart::DollarVar(CompactString::new(
1502                            &s[i + 1..i + 2],
1503                        )));
1504                        i += 2;
1505                    } else {
1506                        let start = i + 1;
1507                        let mut j = start;
1508                        while j < bytes.len()
1509                            && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_')
1510                        {
1511                            j += 1;
1512                        }
1513                        parts.push(WordPart::DollarVar(CompactString::new(&s[start..j])));
1514                        i = j;
1515                    }
1516                    literal_start = i;
1517                }
1518                _ => {
1519                    // Bare `$` followed by something we don't recognize.
1520                    parts.push(WordPart::Literal(CompactString::new("$")));
1521                    i += 1;
1522                    literal_start = i;
1523                }
1524            }
1525        } else if bytes[i] == b'`' {
1526            // Backtick command substitution.
1527            if i > literal_start {
1528                parts.push(WordPart::Literal(CompactString::new(&s[literal_start..i])));
1529            }
1530            let start = i + 1;
1531            let mut j = start;
1532            while j < bytes.len() && bytes[j] != b'`' {
1533                if bytes[j] == b'\\' {
1534                    j += 1; // skip escaped char
1535                }
1536                j += 1;
1537            }
1538            let inner = &s[start..j];
1539            let tokens = frost_lexer::lexer::tokenize(inner.as_bytes());
1540            let mut sub_parser = Parser::new(&tokens);
1541            let program = sub_parser.parse();
1542            parts.push(WordPart::CommandSub(Box::new(program)));
1543            i = j + 1;
1544            literal_start = i;
1545        } else {
1546            i += 1;
1547        }
1548    }
1549
1550    // Flush remaining literal.
1551    if literal_start < bytes.len() {
1552        parts.push(WordPart::Literal(CompactString::new(&s[literal_start..])));
1553    }
1554
1555    if parts.is_empty() {
1556        parts.push(WordPart::Literal(CompactString::default()));
1557    }
1558
1559    parts
1560}
1561
1562/// Find the position of a parameter expansion operator in a ${...} expression.
1563fn find_param_operator(s: &str) -> Option<usize> {
1564    let bytes = s.as_bytes();
1565    // Skip the parameter name (alphanumeric + _).
1566    let mut i = 0;
1567    // Handle special params: ?, #, @, *, !, -, $, and digits.
1568    if i < bytes.len() && matches!(bytes[i], b'?' | b'@' | b'*' | b'!' | b'-' | b'$') {
1569        i += 1;
1570    } else if i < bytes.len() && bytes[i] == b'#' {
1571        // Could be ${#param} (string length) — skip.
1572        return None;
1573    } else {
1574        while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
1575            i += 1;
1576        }
1577    }
1578    if i < bytes.len() && i > 0 {
1579        // Check if the char at position i is a valid operator start.
1580        let c = bytes[i];
1581        if matches!(c, b':' | b'#' | b'%' | b'-' | b'+' | b'=' | b'?' | b'/' | b',' | b'^') {
1582            Some(i)
1583        } else {
1584            None
1585        }
1586    } else {
1587        None
1588    }
1589}
1590
1591/// Extract operator and arg start position from a parameter operator string.
1592fn extract_operator(s: &str) -> (&str, usize) {
1593    if s.starts_with(":-") { (":-", 2) }
1594    else if s.starts_with(":+") { (":+", 2) }
1595    else if s.starts_with(":=") { (":=", 2) }
1596    else if s.starts_with(":?") { (":?", 2) }
1597    else if s.starts_with("##") { ("##", 2) }
1598    else if s.starts_with("%%") { ("%%", 2) }
1599    else if s.starts_with('#') { ("#", 1) }
1600    else if s.starts_with('%') { ("%", 1) }
1601    else if s.starts_with('-') { ("-", 1) }
1602    else if s.starts_with('+') { ("+", 1) }
1603    else if s.starts_with('=') { ("=", 1) }
1604    else if s.starts_with('?') { ("?", 1) }
1605    else if s.starts_with("//") { ("//", 2) }
1606    else if s.starts_with('/') { ("/", 1) }
1607    else if s.starts_with(",,") { (",,", 2) }
1608    else if s.starts_with(',') { (",", 1) }
1609    else if s.starts_with("^^") { ("^^", 2) }
1610    else if s.starts_with('^') { ("^", 1) }
1611    else { ("", 0) }
1612}
1613
1614// ── Tests ───────────────────────────────────────────────────────────
1615
1616#[cfg(test)]
1617mod tests {
1618    use super::*;
1619    use frost_lexer::lexer::tokenize;
1620    use pretty_assertions::assert_eq;
1621
1622    fn parse_str(src: &str) -> Program {
1623        let tokens = tokenize(src.as_bytes());
1624        let mut parser = Parser::new(&tokens);
1625        parser.parse()
1626    }
1627
1628    fn first_simple(program: &Program) -> &SimpleCommand {
1629        match &program.commands[0].list.first.commands[0] {
1630            Command::Simple(s) => s,
1631            other => panic!("expected Simple, got {other:?}"),
1632        }
1633    }
1634
1635    fn word_text(w: &Word) -> String {
1636        w.parts
1637            .iter()
1638            .map(|p| match p {
1639                WordPart::Literal(s) | WordPart::SingleQuoted(s) => s.to_string(),
1640                WordPart::DoubleQuoted(parts) => parts
1641                    .iter()
1642                    .map(|p| match p {
1643                        WordPart::Literal(s) => s.to_string(),
1644                        _ => String::new(),
1645                    })
1646                    .collect(),
1647                WordPart::DollarVar(name) => format!("${name}"),
1648                _ => String::new(),
1649            })
1650            .collect()
1651    }
1652
1653    // ── Simple commands ─────────────────────────────────────────
1654
1655    #[test]
1656    fn parse_single_word() {
1657        let prog = parse_str("true");
1658        assert_eq!(prog.commands.len(), 1);
1659        let simple = first_simple(&prog);
1660        assert_eq!(simple.words.len(), 1);
1661        assert_eq!(word_text(&simple.words[0]), "true");
1662    }
1663
1664    #[test]
1665    fn parse_two_words() {
1666        let prog = parse_str("echo hello");
1667        let simple = first_simple(&prog);
1668        assert_eq!(simple.words.len(), 2);
1669        assert_eq!(word_text(&simple.words[0]), "echo");
1670        assert_eq!(word_text(&simple.words[1]), "hello");
1671    }
1672
1673    #[test]
1674    fn parse_three_words() {
1675        let prog = parse_str("echo hello world");
1676        let simple = first_simple(&prog);
1677        assert_eq!(simple.words.len(), 3);
1678        assert_eq!(word_text(&simple.words[2]), "world");
1679    }
1680
1681    #[test]
1682    fn parse_single_quoted() {
1683        let prog = parse_str("echo 'hello world'");
1684        let simple = first_simple(&prog);
1685        assert_eq!(simple.words.len(), 2);
1686        assert!(matches!(&simple.words[1].parts[0], WordPart::SingleQuoted(s) if s == "hello world"));
1687    }
1688
1689    #[test]
1690    fn parse_double_quoted() {
1691        let prog = parse_str(r#"echo "hello""#);
1692        let simple = first_simple(&prog);
1693        assert_eq!(simple.words.len(), 2);
1694        assert!(matches!(&simple.words[1].parts[0], WordPart::DoubleQuoted(_)));
1695    }
1696
1697    // ── Pipelines ───────────────────────────────────────────────
1698
1699    #[test]
1700    fn parse_pipeline() {
1701        let prog = parse_str("echo a | cat");
1702        let pipeline = &prog.commands[0].list.first;
1703        assert_eq!(pipeline.commands.len(), 2);
1704        assert!(!pipeline.bang);
1705    }
1706
1707    #[test]
1708    fn parse_bang_pipeline() {
1709        let prog = parse_str("! false");
1710        let pipeline = &prog.commands[0].list.first;
1711        assert!(pipeline.bang);
1712        assert_eq!(pipeline.commands.len(), 1);
1713    }
1714
1715    // ── Lists ───────────────────────────────────────────────────
1716
1717    #[test]
1718    fn parse_and_list() {
1719        let prog = parse_str("true && echo yes");
1720        let list = &prog.commands[0].list;
1721        assert_eq!(list.rest.len(), 1);
1722        assert_eq!(list.rest[0].0, ListOp::And);
1723    }
1724
1725    #[test]
1726    fn parse_or_list() {
1727        let prog = parse_str("false || echo fallback");
1728        let list = &prog.commands[0].list;
1729        assert_eq!(list.rest.len(), 1);
1730        assert_eq!(list.rest[0].0, ListOp::Or);
1731    }
1732
1733    // ── Semicolons / Multiple commands ──────────────────────────
1734
1735    #[test]
1736    fn parse_semicolon_separated() {
1737        let prog = parse_str("echo a; echo b");
1738        assert_eq!(prog.commands.len(), 2);
1739    }
1740
1741    #[test]
1742    fn parse_newline_separated() {
1743        let prog = parse_str("echo a\necho b");
1744        assert_eq!(prog.commands.len(), 2);
1745    }
1746
1747    // ── Assignments ─────────────────────────────────────────────
1748
1749    #[test]
1750    fn parse_assignment() {
1751        let prog = parse_str("FOO=bar");
1752        let simple = first_simple(&prog);
1753        assert_eq!(simple.assignments.len(), 1);
1754        assert_eq!(simple.assignments[0].name, "FOO");
1755        assert!(simple.words.is_empty());
1756    }
1757
1758    #[test]
1759    fn parse_assignment_with_command() {
1760        let prog = parse_str("FOO=bar echo hello");
1761        let simple = first_simple(&prog);
1762        assert_eq!(simple.assignments.len(), 1);
1763        assert_eq!(simple.words.len(), 2);
1764    }
1765
1766    // ── Redirects ───────────────────────────────────────────────
1767
1768    #[test]
1769    fn parse_output_redirect() {
1770        let prog = parse_str("echo hello > out.txt");
1771        let simple = first_simple(&prog);
1772        assert_eq!(simple.redirects.len(), 1);
1773        assert_eq!(simple.redirects[0].op, RedirectOp::Greater);
1774        assert_eq!(word_text(&simple.redirects[0].target), "out.txt");
1775    }
1776
1777    // ── Compound commands ───────────────────────────────────────
1778
1779    #[test]
1780    fn parse_if_then_fi() {
1781        let prog = parse_str("if true; then echo yes; fi");
1782        assert_eq!(prog.commands.len(), 1);
1783        match &prog.commands[0].list.first.commands[0] {
1784            Command::If(clause) => {
1785                assert!(!clause.condition.is_empty());
1786                assert!(!clause.then_body.is_empty());
1787                assert!(clause.else_body.is_none());
1788            }
1789            other => panic!("expected If, got {other:?}"),
1790        }
1791    }
1792
1793    #[test]
1794    fn parse_if_else() {
1795        let prog = parse_str("if false; then echo no; else echo yes; fi");
1796        match &prog.commands[0].list.first.commands[0] {
1797            Command::If(clause) => {
1798                assert!(clause.else_body.is_some());
1799            }
1800            other => panic!("expected If, got {other:?}"),
1801        }
1802    }
1803
1804    #[test]
1805    fn parse_for_loop() {
1806        let prog = parse_str("for x in a b c; do echo $x; done");
1807        match &prog.commands[0].list.first.commands[0] {
1808            Command::For(clause) => {
1809                assert_eq!(clause.var, "x");
1810                assert_eq!(clause.words.as_ref().unwrap().len(), 3);
1811                assert!(!clause.body.is_empty());
1812            }
1813            other => panic!("expected For, got {other:?}"),
1814        }
1815    }
1816
1817    #[test]
1818    fn parse_arith_for_loop() {
1819        let prog = parse_str("for ((i=0; i<3; i++)); do echo $i; done");
1820        match &prog.commands[0].list.first.commands[0] {
1821            Command::ArithFor(clause) => {
1822                // The lexer tokenizes `=` separately, so the parser produces
1823                // spaced expressions like `i = 0`. The executor's
1824                // eval_arith_with_assignment handles this correctly.
1825                assert!(clause.init.contains("i"));
1826                assert!(clause.init.contains("0"));
1827                assert!(clause.condition.contains("i"));
1828                assert!(clause.condition.contains("3"));
1829                assert!(clause.step.contains("i"));
1830                assert!(clause.step.contains("++"));
1831                assert!(!clause.body.is_empty());
1832            }
1833            other => panic!("expected ArithFor, got {other:?}"),
1834        }
1835    }
1836
1837    #[test]
1838    fn parse_arith_for_loop_multiline() {
1839        let prog = parse_str("for ((x=1; x<=5; x++))\ndo\n  echo $x\ndone");
1840        match &prog.commands[0].list.first.commands[0] {
1841            Command::ArithFor(clause) => {
1842                assert!(clause.init.contains("x"));
1843                assert!(clause.init.contains("1"));
1844                assert!(clause.condition.contains("x"));
1845                assert!(clause.condition.contains("5"));
1846                assert!(clause.step.contains("x"));
1847                assert!(clause.step.contains("++"));
1848                assert!(!clause.body.is_empty());
1849            }
1850            other => panic!("expected ArithFor, got {other:?}"),
1851        }
1852    }
1853
1854    #[test]
1855    fn parse_while_loop() {
1856        let prog = parse_str("while true; do echo loop; done");
1857        match &prog.commands[0].list.first.commands[0] {
1858            Command::While(clause) => {
1859                assert!(!clause.condition.is_empty());
1860                assert!(!clause.body.is_empty());
1861            }
1862            other => panic!("expected While, got {other:?}"),
1863        }
1864    }
1865
1866    #[test]
1867    fn parse_subshell() {
1868        let prog = parse_str("(echo sub)");
1869        match &prog.commands[0].list.first.commands[0] {
1870            Command::Subshell(sub) => {
1871                assert!(!sub.body.is_empty());
1872            }
1873            other => panic!("expected Subshell, got {other:?}"),
1874        }
1875    }
1876
1877    #[test]
1878    fn parse_brace_group() {
1879        let prog = parse_str("{ echo group; }");
1880        match &prog.commands[0].list.first.commands[0] {
1881            Command::BraceGroup(bg) => {
1882                assert!(!bg.body.is_empty());
1883            }
1884            other => panic!("expected BraceGroup, got {other:?}"),
1885        }
1886    }
1887
1888    #[test]
1889    fn parse_function_def() {
1890        let prog = parse_str("myfn() { echo hello; }");
1891        match &prog.commands[0].list.first.commands[0] {
1892            Command::FunctionDef(fd) => {
1893                assert_eq!(fd.name, "myfn");
1894            }
1895            other => panic!("expected FunctionDef, got {other:?}"),
1896        }
1897    }
1898
1899    #[test]
1900    fn parse_function_keyword() {
1901        let prog = parse_str("function myfn { echo hello; }");
1902        match &prog.commands[0].list.first.commands[0] {
1903            Command::FunctionDef(fd) => {
1904                assert_eq!(fd.name, "myfn");
1905            }
1906            other => panic!("expected FunctionDef, got {other:?}"),
1907        }
1908    }
1909
1910    // ── Async / background ──────────────────────────────────────
1911
1912    #[test]
1913    fn parse_background() {
1914        let prog = parse_str("sleep 10 &");
1915        assert!(prog.commands[0].is_async);
1916    }
1917
1918    // ── Empty / edge cases ──────────────────────────────────────
1919
1920    #[test]
1921    fn parse_empty() {
1922        let prog = parse_str("");
1923        assert!(prog.commands.is_empty());
1924    }
1925
1926    #[test]
1927    fn parse_only_newlines() {
1928        let prog = parse_str("\n\n\n");
1929        assert!(prog.commands.is_empty());
1930    }
1931
1932    #[test]
1933    fn parse_comment_only() {
1934        let prog = parse_str("# this is a comment\n");
1935        assert!(prog.commands.is_empty());
1936    }
1937}