Skip to main content

wasmsh_parse/
lib.rs

1//! Handwritten recursive-descent parser for the wasmsh shell.
2//!
3//! Consumes tokens from `wasmsh-lex` and produces an AST defined
4//! in `wasmsh-ast`. No parser generators are used.
5
6mod word_parser;
7
8use std::collections::VecDeque;
9
10use wasmsh_ast::{
11    AndOrList, AndOrOp, ArithCommandNode, ArithForCommand, Assignment, CaseCommand, CaseItem,
12    CaseTerminator, Command, CompleteCommand, DoubleBracketCommand, ElifClause, ForCommand,
13    FunctionDef, GroupCommand, HereDocBody, IfCommand, Pipeline, Program, Redirection,
14    RedirectionOp, SelectCommand, SimpleCommand, Span, SubshellCommand, UntilCommand, WhileCommand,
15    Word, WordPart,
16};
17use wasmsh_lex::{Lexer, Token, TokenKind};
18
19/// Parse errors with span information.
20#[derive(Debug, Clone, PartialEq)]
21pub struct ParseError {
22    pub message: String,
23    pub offset: u32,
24}
25
26impl std::fmt::Display for ParseError {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        write!(f, "parse error at {}: {}", self.offset, self.message)
29    }
30}
31
32impl std::error::Error for ParseError {}
33
34/// Parse a complete shell source string into a `Program` AST.
35pub fn parse(source: &str) -> Result<Program, ParseError> {
36    let mut parser = Parser::new(source)?;
37    parser.parse_program()
38}
39
40// Words that terminate compound-list bodies (not command-starters).
41const TERMINATOR_WORDS: &[&str] = &["then", "elif", "else", "fi", "do", "done", "esac", "}"];
42
43/// A pending here-doc that needs its body read after the command line.
44struct PendingHereDoc {
45    delimiter: String,
46    strip_tabs: bool,
47}
48
49struct Parser<'src> {
50    source: &'src str,
51    lexer: Lexer<'src>,
52    current: Token,
53    peeked: VecDeque<Token>,
54    prev_end: u32,
55    pending_heredocs: Vec<PendingHereDoc>,
56}
57
58impl<'src> Parser<'src> {
59    fn new(source: &'src str) -> Result<Self, ParseError> {
60        let mut lexer = Lexer::new(source);
61        let current = lexer.next_token().map_err(lex_err)?;
62        Ok(Self {
63            source,
64            lexer,
65            current,
66            peeked: VecDeque::new(),
67            prev_end: 0,
68            pending_heredocs: Vec::new(),
69        })
70    }
71
72    fn advance(&mut self) -> Result<Token, ParseError> {
73        let prev = self.current.clone();
74        self.prev_end = prev.span.end;
75        self.current = if let Some(tok) = self.peeked.pop_front() {
76            tok
77        } else {
78            self.lexer.next_token().map_err(lex_err)?
79        };
80        Ok(prev)
81    }
82
83    /// Peek at the nth token ahead (0 = next token after current).
84    fn peek_nth(&mut self, n: usize) -> Result<&Token, ParseError> {
85        while self.peeked.len() <= n {
86            self.peeked
87                .push_back(self.lexer.next_token().map_err(lex_err)?);
88        }
89        Ok(&self.peeked[n])
90    }
91
92    fn at(&self, kind: &TokenKind) -> bool {
93        self.current.kind == *kind
94    }
95
96    fn at_word(&self) -> bool {
97        matches!(self.current.kind, TokenKind::Word { .. })
98    }
99
100    fn at_word_eq(&self, text: &str) -> bool {
101        self.at_word() && self.current_text() == text
102    }
103
104    // at_process_substitution removed — use inline peek_is_lparen check instead
105
106    fn parse_process_substitution_word(&mut self) -> Result<Word, ParseError> {
107        use wasmsh_ast::WordPart;
108
109        let start = self.current.span.start;
110        let is_input = self.current.kind == TokenKind::Less;
111        self.advance()?; // consume < or >
112        self.advance()?; // consume (
113
114        // Collect tokens until matching )
115        let inner_start = self.current.span.start;
116        let mut depth = 1u32;
117        while depth > 0 {
118            match self.current.kind {
119                TokenKind::LParen => depth += 1,
120                TokenKind::RParen => {
121                    depth -= 1;
122                    if depth == 0 {
123                        break;
124                    }
125                }
126                TokenKind::Eof => {
127                    return Err(ParseError {
128                        message: "unterminated process substitution".into(),
129                        offset: start,
130                    });
131                }
132                _ => {}
133            }
134            self.advance()?;
135        }
136        let inner_end = self.current.span.start;
137        let inner_text = self.source[inner_start as usize..inner_end as usize]
138            .trim()
139            .to_string();
140        self.advance()?; // consume )
141
142        let part = if is_input {
143            WordPart::ProcessSubstIn(inner_text.into())
144        } else {
145            WordPart::ProcessSubstOut(inner_text.into())
146        };
147        Ok(Word {
148            parts: vec![part],
149            span: self.span_from(start),
150        })
151    }
152
153    fn at_redirection(&self) -> bool {
154        matches!(
155            self.current.kind,
156            TokenKind::Less
157                | TokenKind::Greater
158                | TokenKind::GreaterGreater
159                | TokenKind::LessLess
160                | TokenKind::LessLessDash
161                | TokenKind::LessLessLess
162                | TokenKind::LessGreater
163                | TokenKind::AmpGreater
164        )
165    }
166
167    /// Check if the current word token is a single digit and the next token
168    /// is a redirection operator. If so, this is an fd-prefix redirection.
169    fn at_fd_prefix_redirection(&mut self) -> bool {
170        if !self.at_word() {
171            return false;
172        }
173        let text = self.current_text();
174        if text.len() != 1 || !text.as_bytes()[0].is_ascii_digit() {
175            return false;
176        }
177        if let Ok(next) = self.peek_nth(0) {
178            matches!(
179                next.kind,
180                TokenKind::Less
181                    | TokenKind::Greater
182                    | TokenKind::GreaterGreater
183                    | TokenKind::LessLess
184                    | TokenKind::LessLessDash
185                    | TokenKind::LessLessLess
186                    | TokenKind::LessGreater
187            )
188        } else {
189            false
190        }
191    }
192
193    /// Check if the next token (after current) is `LParen`.
194    fn peek_is_lparen(&mut self) -> bool {
195        matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::LParen)
196    }
197
198    /// Parse a word like `arr=(x y z)` where the current token is `arr=`
199    /// and the next token is `LParen`. Combines the name= with compound value.
200    fn parse_compound_assign_word(&mut self) -> Result<Word, ParseError> {
201        let tok = self.advance()?; // consume `arr=`
202        let text = tok.text(self.source);
203        self.advance()?; // consume '('
204        let mut elements = Vec::new();
205        while !self.at(&TokenKind::RParen) && !self.at(&TokenKind::Eof) {
206            if self.at(&TokenKind::Newline) {
207                self.advance()?;
208                continue;
209            }
210            if self.at_word() {
211                let w = self.advance()?;
212                elements.push(w.text(self.source).to_string());
213            } else {
214                break;
215            }
216        }
217        let end_span = if self.at(&TokenKind::RParen) {
218            self.advance()?.span.end
219        } else {
220            self.current.span.end
221        };
222        // Build a synthetic word like "arr=(x y z)"
223        let compound = format!("{text}({})", elements.join(" "));
224        Ok(Word {
225            parts: vec![WordPart::Literal(compound.into())],
226            span: Span {
227                start: tok.span.start,
228                end: end_span,
229            },
230        })
231    }
232
233    /// True if the current token can start a new command.
234    /// Check for `;;` (two consecutive semicolons — case item terminator).
235    fn is_double_semi(&mut self) -> bool {
236        if !self.at(&TokenKind::Semi) {
237            return false;
238        }
239        matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::Semi)
240    }
241
242    /// Check for `;&` (case fall-through): `;` followed by `&`.
243    fn is_case_fallthrough(&mut self) -> bool {
244        if !self.at(&TokenKind::Semi) {
245            return false;
246        }
247        matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::Amp)
248    }
249
250    /// Check for `;;&` (case continue-testing): `;;` followed by `&`.
251    fn is_case_continue_testing(&mut self) -> bool {
252        if !self.at(&TokenKind::Semi) {
253            return false;
254        }
255        if !matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::Semi) {
256            return false;
257        }
258        matches!(self.peek_nth(1), Ok(tok) if tok.kind == TokenKind::Amp)
259    }
260
261    fn at_command_start(&self) -> bool {
262        if self.at(&TokenKind::LParen) || self.at(&TokenKind::DblLBracket) {
263            return true;
264        }
265        if self.at_word() {
266            let text = self.current_text();
267            return !TERMINATOR_WORDS.contains(&text);
268        }
269        self.at_redirection()
270    }
271
272    fn current_text(&self) -> &str {
273        self.current.text(self.source)
274    }
275
276    fn skip_newlines(&mut self) -> Result<(), ParseError> {
277        while self.at(&TokenKind::Newline) {
278            self.advance()?;
279        }
280        Ok(())
281    }
282
283    fn span_from(&self, start: u32) -> Span {
284        Span {
285            start,
286            end: self.prev_end,
287        }
288    }
289
290    fn expect_word(&mut self, expected: &str) -> Result<Token, ParseError> {
291        if self.at_word_eq(expected) {
292            self.advance()
293        } else {
294            Err(ParseError {
295                message: format!("expected '{}', got '{}'", expected, self.current_text()),
296                offset: self.current.span.start,
297            })
298        }
299    }
300
301    // ---- Grammar rules ----
302
303    fn parse_program(&mut self) -> Result<Program, ParseError> {
304        self.skip_newlines()?;
305        let mut commands = Vec::new();
306        while self.at_command_start() {
307            commands.push(self.parse_complete_command()?);
308            self.skip_newlines()?;
309        }
310        if !self.at(&TokenKind::Eof) {
311            return Err(ParseError {
312                message: format!("unexpected token: {:?}", self.current.kind),
313                offset: self.current.span.start,
314            });
315        }
316        Ok(Program { commands })
317    }
318
319    /// Parse a compound list (body of compound commands).
320    /// Stops at terminator words, `)`, or EOF.
321    fn parse_compound_list(&mut self) -> Result<Vec<CompleteCommand>, ParseError> {
322        self.skip_newlines()?;
323        let mut commands = Vec::new();
324        while self.at_command_start() {
325            commands.push(self.parse_complete_command()?);
326            self.skip_newlines()?;
327        }
328        Ok(commands)
329    }
330
331    fn parse_complete_command(&mut self) -> Result<CompleteCommand, ParseError> {
332        let start = self.current.span.start;
333        let mut list = Vec::new();
334        list.push(self.parse_and_or()?);
335
336        while self.at(&TokenKind::Semi)
337            && !self.is_double_semi()
338            && !self.is_case_fallthrough()
339            && !self.is_case_continue_testing()
340        {
341            self.advance()?;
342            // Don't skip newlines here if there are pending heredocs
343            if self.pending_heredocs.is_empty() {
344                self.skip_newlines()?;
345            }
346            if self.at_command_start() {
347                list.push(self.parse_and_or()?);
348            }
349        }
350
351        let mut cc = CompleteCommand {
352            list,
353            span: self.span_from(start),
354        };
355
356        // Resolve pending here-docs: read bodies from source after the newline
357        if !self.pending_heredocs.is_empty() && self.at(&TokenKind::Newline) {
358            self.resolve_heredocs(&mut cc)?;
359        }
360
361        Ok(cc)
362    }
363
364    fn parse_and_or(&mut self) -> Result<AndOrList, ParseError> {
365        let first = self.parse_pipeline()?;
366        let mut rest = Vec::new();
367
368        loop {
369            let op = if self.at(&TokenKind::AndAnd) {
370                self.advance()?;
371                AndOrOp::And
372            } else if self.at(&TokenKind::OrOr) {
373                self.advance()?;
374                AndOrOp::Or
375            } else {
376                break;
377            };
378            self.skip_newlines()?;
379            rest.push((op, self.parse_pipeline()?));
380        }
381
382        Ok(AndOrList { first, rest })
383    }
384
385    fn parse_pipeline(&mut self) -> Result<Pipeline, ParseError> {
386        let negated = if self.at_word_eq("!") {
387            self.advance()?;
388            true
389        } else {
390            false
391        };
392
393        let mut commands = Vec::new();
394        let mut pipe_stderr = Vec::new();
395        commands.push(self.parse_command()?);
396
397        loop {
398            if self.at(&TokenKind::Pipe) {
399                pipe_stderr.push(false);
400                self.advance()?;
401                self.skip_newlines()?;
402                commands.push(self.parse_command()?);
403            } else if self.at(&TokenKind::PipeAmp) {
404                pipe_stderr.push(true);
405                self.advance()?;
406                self.skip_newlines()?;
407                commands.push(self.parse_command()?);
408            } else {
409                break;
410            }
411        }
412
413        Ok(Pipeline {
414            negated,
415            commands,
416            pipe_stderr,
417        })
418    }
419
420    fn parse_command(&mut self) -> Result<Command, ParseError> {
421        if self.at(&TokenKind::LParen) {
422            return self.parse_command_lparen();
423        }
424        if self.at(&TokenKind::DblLBracket) {
425            return self.parse_double_bracket();
426        }
427        if self.at_word() {
428            if let Some(cmd) = self.try_parse_compound_keyword()? {
429                return Ok(cmd);
430            }
431        }
432        Ok(Command::Simple(self.parse_simple_command()?))
433    }
434
435    /// Dispatch `(( expr ))` arithmetic command or `( list )` subshell.
436    fn parse_command_lparen(&mut self) -> Result<Command, ParseError> {
437        if let Ok(next) = self.peek_nth(0) {
438            if next.kind == TokenKind::LParen && next.span.start == self.current.span.end {
439                return self.parse_arith_command();
440            }
441        }
442        self.parse_subshell()
443    }
444
445    /// If the current word is a compound keyword, parse and return it.
446    fn try_parse_compound_keyword(&mut self) -> Result<Option<Command>, ParseError> {
447        let text = self.current_text();
448        match text {
449            "{" => Ok(Some(self.parse_group()?)),
450            "if" => Ok(Some(self.parse_if()?)),
451            "while" => Ok(Some(self.parse_while()?)),
452            "until" => Ok(Some(self.parse_until()?)),
453            "for" => Ok(Some(self.parse_for()?)),
454            "case" => Ok(Some(self.parse_case()?)),
455            "select" => Ok(Some(self.parse_select()?)),
456            "function" => Ok(Some(self.parse_function_bash()?)),
457            _ => {
458                if self.peek_nth(0)?.kind == TokenKind::LParen
459                    && self.peek_nth(1)?.kind == TokenKind::RParen
460                {
461                    return Ok(Some(self.parse_function_posix()?));
462                }
463                Ok(None)
464            }
465        }
466    }
467
468    // ---- Compound commands ----
469
470    fn parse_subshell(&mut self) -> Result<Command, ParseError> {
471        let start = self.current.span.start;
472        self.advance()?; // consume (
473        let body = self.parse_compound_list()?;
474        if !self.at(&TokenKind::RParen) {
475            return Err(ParseError {
476                message: "expected ')' to close subshell".into(),
477                offset: self.current.span.start,
478            });
479        }
480        self.advance()?; // consume )
481        Ok(Command::Subshell(SubshellCommand {
482            body,
483            span: self.span_from(start),
484        }))
485    }
486
487    fn parse_group(&mut self) -> Result<Command, ParseError> {
488        let start = self.current.span.start;
489        self.expect_word("{")?;
490        let body = self.parse_compound_list()?;
491        self.expect_word("}")?;
492        Ok(Command::Group(GroupCommand {
493            body,
494            span: self.span_from(start),
495        }))
496    }
497
498    fn parse_if(&mut self) -> Result<Command, ParseError> {
499        let start = self.current.span.start;
500        self.expect_word("if")?;
501        let condition = self.parse_compound_list()?;
502        self.expect_word("then")?;
503        let then_body = self.parse_compound_list()?;
504
505        let mut elifs = Vec::new();
506        while self.at_word_eq("elif") {
507            self.advance()?;
508            let elif_cond = self.parse_compound_list()?;
509            self.expect_word("then")?;
510            let elif_body = self.parse_compound_list()?;
511            elifs.push(ElifClause {
512                condition: elif_cond,
513                then_body: elif_body,
514            });
515        }
516
517        let else_body = if self.at_word_eq("else") {
518            self.advance()?;
519            Some(self.parse_compound_list()?)
520        } else {
521            None
522        };
523
524        self.expect_word("fi")?;
525        Ok(Command::If(IfCommand {
526            condition,
527            then_body,
528            elifs,
529            else_body,
530            span: self.span_from(start),
531        }))
532    }
533
534    fn parse_while(&mut self) -> Result<Command, ParseError> {
535        let start = self.current.span.start;
536        self.expect_word("while")?;
537        let condition = self.parse_compound_list()?;
538        self.expect_word("do")?;
539        let body = self.parse_compound_list()?;
540        self.expect_word("done")?;
541        Ok(Command::While(WhileCommand {
542            condition,
543            body,
544            span: self.span_from(start),
545        }))
546    }
547
548    fn parse_until(&mut self) -> Result<Command, ParseError> {
549        let start = self.current.span.start;
550        self.expect_word("until")?;
551        let condition = self.parse_compound_list()?;
552        self.expect_word("do")?;
553        let body = self.parse_compound_list()?;
554        self.expect_word("done")?;
555        Ok(Command::Until(UntilCommand {
556            condition,
557            body,
558            span: self.span_from(start),
559        }))
560    }
561
562    fn parse_for(&mut self) -> Result<Command, ParseError> {
563        let start = self.current.span.start;
564        self.expect_word("for")?;
565
566        if self.is_arith_for_start() {
567            return self.parse_arith_for(start);
568        }
569
570        if !self.at_word() {
571            return Err(ParseError {
572                message: "expected variable name after 'for'".into(),
573                offset: self.current.span.start,
574            });
575        }
576        let var_name = self.current_text().into();
577        self.advance()?;
578
579        let words = self.parse_loop_words_clause()?;
580
581        self.skip_newlines()?;
582        self.expect_word("do")?;
583        let body = self.parse_compound_list()?;
584        self.expect_word("done")?;
585
586        Ok(Command::For(ForCommand {
587            var_name,
588            words,
589            body,
590            span: self.span_from(start),
591        }))
592    }
593
594    fn is_arith_for_start(&mut self) -> bool {
595        let current_end = self.current.span.end;
596        self.at(&TokenKind::LParen)
597            && self
598                .peek_nth(0)
599                .is_ok_and(|next| next.kind == TokenKind::LParen && next.span.start == current_end)
600    }
601
602    fn parse_loop_words_clause(&mut self) -> Result<Option<Vec<Word>>, ParseError> {
603        if !self.at_word_eq("in") {
604            self.consume_optional_semi()?;
605            return Ok(None);
606        }
607
608        self.advance()?;
609        let mut words = Vec::new();
610        while self.at_word()
611            && !self.at_word_eq("do")
612            && !TERMINATOR_WORDS.contains(&self.current_text())
613        {
614            words.push(self.parse_word()?);
615        }
616        self.consume_optional_semi()?;
617        Ok(Some(words))
618    }
619
620    fn consume_optional_semi(&mut self) -> Result<(), ParseError> {
621        if self.at(&TokenKind::Semi) {
622            self.advance()?;
623        }
624        Ok(())
625    }
626
627    /// Parse `select name [in word ...]; do body; done`.
628    fn parse_select(&mut self) -> Result<Command, ParseError> {
629        let start = self.current.span.start;
630        self.expect_word("select")?;
631
632        if !self.at_word() {
633            return Err(ParseError {
634                message: "expected variable name after 'select'".into(),
635                offset: self.current.span.start,
636            });
637        }
638        let var_name = self.current_text().into();
639        self.advance()?;
640
641        // Optional `in word...` clause
642        let words = if self.at_word_eq("in") {
643            self.advance()?;
644            let mut words = Vec::new();
645            while self.at_word()
646                && !self.at_word_eq("do")
647                && !TERMINATOR_WORDS.contains(&self.current_text())
648            {
649                words.push(self.parse_word()?);
650            }
651            if self.at(&TokenKind::Semi) {
652                self.advance()?;
653            }
654            Some(words)
655        } else {
656            if self.at(&TokenKind::Semi) {
657                self.advance()?;
658            }
659            None
660        };
661
662        self.skip_newlines()?;
663        self.expect_word("do")?;
664        let body = self.parse_compound_list()?;
665        self.expect_word("done")?;
666
667        // Collect trailing redirections (e.g., `done <<< "input"`)
668        let mut redirections = Vec::new();
669        while self.at_redirection() || self.at_fd_prefix_redirection() {
670            if self.at_fd_prefix_redirection() {
671                let fd_text = self.current_text();
672                let fd: u32 = fd_text.parse().unwrap_or(0);
673                self.advance()?;
674                let mut redir = self.parse_redirection()?;
675                redir.fd = Some(fd);
676                redirections.push(redir);
677            } else {
678                redirections.push(self.parse_redirection()?);
679            }
680        }
681
682        Ok(Command::Select(SelectCommand {
683            var_name,
684            words,
685            body,
686            redirections,
687            span: self.span_from(start),
688        }))
689    }
690
691    /// Parse `(( expr ))` arithmetic command.
692    /// The lexer tokenizes `((` as two `LParen` tokens. We consume them, then
693    /// collect raw source characters until the matching `))`.
694    fn parse_arith_command(&mut self) -> Result<Command, ParseError> {
695        let start = self.current.span.start;
696        self.advance()?; // first (
697        self.advance()?; // second (
698
699        let expr = self.collect_arith_expr()?;
700
701        Ok(Command::ArithCommand(ArithCommandNode {
702            expr: expr.into(),
703            span: self.span_from(start),
704        }))
705    }
706
707    /// Parse C-style for loop: `for (( init; cond; step )) do body done`.
708    /// Called after `for` has been consumed. `start` is the span start of `for`.
709    fn parse_arith_for(&mut self, start: u32) -> Result<Command, ParseError> {
710        self.advance()?; // first (
711        self.advance()?; // second (
712
713        // Collect three semicolon-separated expressions until ))
714        let inner = self.collect_arith_expr()?;
715
716        // Split inner on ';' to get init, cond, step
717        let parts: Vec<&str> = inner.splitn(3, ';').collect();
718        let init = parts.first().map_or("", |s| s.trim());
719        let cond = parts.get(1).map_or("", |s| s.trim());
720        let step = parts.get(2).map_or("", |s| s.trim());
721
722        // Optional ; or newline before `do`
723        if self.at(&TokenKind::Semi) {
724            self.advance()?;
725        }
726        self.skip_newlines()?;
727        self.expect_word("do")?;
728        let body = self.parse_compound_list()?;
729        self.expect_word("done")?;
730
731        Ok(Command::ArithFor(ArithForCommand {
732            init: init.into(),
733            cond: cond.into(),
734            step: step.into(),
735            body,
736            span: self.span_from(start),
737        }))
738    }
739
740    /// Collect raw source text for an arithmetic expression until `))` is found.
741    /// Handles nested parentheses. Consumes the closing `))`.
742    fn collect_arith_expr(&mut self) -> Result<String, ParseError> {
743        // We need to read raw source text until we find ))
744        // The current token is the first token after ((.
745        // Strategy: track byte position in source and scan for )).
746        let expr_start = self.current.span.start as usize;
747        let src = self.source;
748        let bytes = src.as_bytes();
749        let mut pos = expr_start;
750        let mut depth: u32 = 0;
751
752        while pos < bytes.len() {
753            if bytes[pos] == b'(' {
754                depth += 1;
755                pos += 1;
756            } else if bytes[pos] == b')' {
757                if depth > 0 {
758                    depth -= 1;
759                    pos += 1;
760                } else if pos + 1 < bytes.len() && bytes[pos + 1] == b')' {
761                    // Found ))
762                    let expr = src[expr_start..pos].trim().to_string();
763                    let end_pos = pos + 2;
764                    // Reposition lexer past ))
765                    self.lexer.set_position(end_pos);
766                    self.peeked.clear();
767                    self.prev_end = end_pos as u32;
768                    self.current = self.lexer.next_token().map_err(lex_err)?;
769                    return Ok(expr);
770                } else {
771                    return Err(ParseError {
772                        message: "expected '))' to close arithmetic expression".into(),
773                        offset: pos as u32,
774                    });
775                }
776            } else {
777                pos += 1;
778            }
779        }
780
781        Err(ParseError {
782            message: "unterminated arithmetic expression, expected '))'".into(),
783            offset: expr_start as u32,
784        })
785    }
786
787    /// Parse `case word in pattern) body ;; ... esac`.
788    fn parse_case(&mut self) -> Result<Command, ParseError> {
789        let start = self.current.span.start;
790        self.expect_word("case")?;
791
792        if !self.at_word() {
793            return Err(ParseError {
794                message: "expected word after 'case'".into(),
795                offset: self.current.span.start,
796            });
797        }
798        let word = self.parse_word()?;
799        self.skip_newlines()?;
800        self.expect_word("in")?;
801        self.skip_newlines()?;
802
803        let mut items = Vec::new();
804        while !self.at_word_eq("esac") && !self.at(&TokenKind::Eof) {
805            let patterns = self.parse_case_patterns()?;
806            self.skip_newlines()?;
807
808            let body = self.parse_case_body()?;
809            let terminator = self.parse_case_terminator()?;
810            self.skip_newlines()?;
811
812            items.push(CaseItem {
813                patterns,
814                body,
815                terminator,
816            });
817        }
818
819        self.expect_word("esac")?;
820        Ok(Command::Case(CaseCommand {
821            word,
822            items,
823            span: self.span_from(start),
824        }))
825    }
826
827    fn parse_case_patterns(&mut self) -> Result<Vec<Word>, ParseError> {
828        if self.at(&TokenKind::LParen) {
829            self.advance()?;
830        }
831        let mut patterns = vec![self.parse_word()?];
832        while self.at(&TokenKind::Pipe) {
833            self.advance()?;
834            patterns.push(self.parse_word()?);
835        }
836        if !self.at(&TokenKind::RParen) {
837            return Err(ParseError {
838                message: "expected ')' after case pattern".into(),
839                offset: self.current.span.start,
840            });
841        }
842        self.advance()?;
843        Ok(patterns)
844    }
845
846    fn parse_case_body(&mut self) -> Result<Vec<CompleteCommand>, ParseError> {
847        let mut body = Vec::new();
848        while self.at_command_start() && !self.is_double_semi() && !self.is_case_fallthrough() {
849            body.push(self.parse_complete_command()?);
850            self.skip_newlines()?;
851        }
852        Ok(body)
853    }
854
855    fn parse_case_terminator(&mut self) -> Result<CaseTerminator, ParseError> {
856        if self.is_case_continue_testing() {
857            self.advance()?;
858            self.advance()?;
859            self.advance()?;
860            return Ok(CaseTerminator::ContinueTesting);
861        }
862        if self.is_double_semi() {
863            self.advance()?;
864            self.advance()?;
865            return Ok(CaseTerminator::Break);
866        }
867        if self.is_case_fallthrough() {
868            self.advance()?;
869            self.advance()?;
870            return Ok(CaseTerminator::Fallthrough);
871        }
872        Ok(CaseTerminator::Break)
873    }
874
875    /// Parse `[[ expression ]]` extended test command.
876    fn parse_double_bracket(&mut self) -> Result<Command, ParseError> {
877        let start = self.current.span.start;
878        self.advance()?; // consume [[
879
880        let mut words = Vec::new();
881        loop {
882            if self.at(&TokenKind::DblRBracket) {
883                self.advance()?; // consume ]]
884                break;
885            }
886            if self.at(&TokenKind::Eof) {
887                return Err(ParseError {
888                    message: "expected ']]' to close extended test".into(),
889                    offset: self.current.span.start,
890                });
891            }
892            // Collect tokens as words (operators inside [[ ]] are expression tokens)
893            if self.at_word() {
894                words.push(self.parse_word()?);
895            } else {
896                // Operator tokens (&&, ||, <, >, (, )) become literal words
897                let tok = self.advance()?;
898                let text = tok.text(self.source);
899                words.push(Word {
900                    parts: vec![WordPart::Literal(text.into())],
901                    span: tok.span,
902                });
903            }
904        }
905
906        Ok(Command::DoubleBracket(DoubleBracketCommand {
907            words,
908            span: self.span_from(start),
909        }))
910    }
911
912    /// Parse POSIX-style function: `name ( ) compound_command`
913    fn parse_function_posix(&mut self) -> Result<Command, ParseError> {
914        let start = self.current.span.start;
915        let name = self.current_text().into();
916        self.advance()?; // name
917        self.advance()?; // (
918        self.advance()?; // )
919        self.skip_newlines()?;
920        let body = Box::new(self.parse_command()?);
921        Ok(Command::FunctionDef(FunctionDef {
922            name,
923            body,
924            span: self.span_from(start),
925        }))
926    }
927
928    /// Parse bash-style function: `function name [( )] compound_command`
929    fn parse_function_bash(&mut self) -> Result<Command, ParseError> {
930        let start = self.current.span.start;
931        self.expect_word("function")?;
932
933        if !self.at_word() {
934            return Err(ParseError {
935                message: "expected function name after 'function'".into(),
936                offset: self.current.span.start,
937            });
938        }
939        let name = self.current_text().into();
940        self.advance()?;
941
942        // Optional ( )
943        if self.at(&TokenKind::LParen) {
944            self.advance()?;
945            if !self.at(&TokenKind::RParen) {
946                return Err(ParseError {
947                    message: "expected ')' after '(' in function definition".into(),
948                    offset: self.current.span.start,
949                });
950            }
951            self.advance()?;
952        }
953
954        self.skip_newlines()?;
955        let body = Box::new(self.parse_command()?);
956        Ok(Command::FunctionDef(FunctionDef {
957            name,
958            body,
959            span: self.span_from(start),
960        }))
961    }
962
963    // ---- Simple commands ----
964
965    fn parse_simple_command(&mut self) -> Result<SimpleCommand, ParseError> {
966        let start = self.current.span.start;
967        let mut assignments = Vec::new();
968        let mut words = Vec::new();
969        let mut redirections = Vec::new();
970        let mut past_assignments = false;
971
972        loop {
973            if !self.parse_simple_command_part(
974                &mut assignments,
975                &mut words,
976                &mut redirections,
977                &mut past_assignments,
978            )? {
979                break;
980            }
981        }
982
983        if assignments.is_empty() && words.is_empty() && redirections.is_empty() {
984            return Err(ParseError {
985                message: "expected a command".into(),
986                offset: self.current.span.start,
987            });
988        }
989
990        Ok(SimpleCommand {
991            assignments,
992            words,
993            redirections,
994            span: self.span_from(start),
995        })
996    }
997
998    fn parse_simple_command_part(
999        &mut self,
1000        assignments: &mut Vec<Assignment>,
1001        words: &mut Vec<Word>,
1002        redirections: &mut Vec<Redirection>,
1003        past_assignments: &mut bool,
1004    ) -> Result<bool, ParseError> {
1005        if self.at_fd_prefix_redirection() {
1006            redirections.push(self.parse_fd_prefixed_redirection()?);
1007            return Ok(true);
1008        }
1009        // Process substitution <(cmd) / >(cmd) is a word argument, not a redirection.
1010        if matches!(self.current.kind, TokenKind::Less | TokenKind::Greater)
1011            && self.peek_is_lparen()
1012        {
1013            let word = self.parse_process_substitution_word()?;
1014            words.push(word);
1015            *past_assignments = true;
1016            return Ok(true);
1017        }
1018        if self.at_redirection() {
1019            redirections.push(self.parse_redirection()?);
1020            return Ok(true);
1021        }
1022        if self.at_word() {
1023            self.parse_simple_word_part(assignments, words, past_assignments)?;
1024            return Ok(true);
1025        }
1026        Ok(false)
1027    }
1028
1029    fn parse_fd_prefixed_redirection(&mut self) -> Result<Redirection, ParseError> {
1030        let fd_text = self.current_text();
1031        let fd: u32 = fd_text.parse().unwrap_or(0);
1032        self.advance()?;
1033        let mut redir = self.parse_redirection()?;
1034        redir.fd = Some(fd);
1035        Ok(redir)
1036    }
1037
1038    fn parse_simple_word_part(
1039        &mut self,
1040        assignments: &mut Vec<Assignment>,
1041        words: &mut Vec<Word>,
1042        past_assignments: &mut bool,
1043    ) -> Result<(), ParseError> {
1044        let text = self.current_text();
1045        if !*past_assignments && is_assignment_text(text) {
1046            assignments.push(self.parse_assignment()?);
1047            return Ok(());
1048        }
1049        *past_assignments = true;
1050        if text.ends_with('=') && self.peek_is_lparen() {
1051            words.push(self.parse_compound_assign_word()?);
1052        } else {
1053            words.push(self.parse_word()?);
1054        }
1055        Ok(())
1056    }
1057
1058    fn parse_word(&mut self) -> Result<Word, ParseError> {
1059        let tok = self.advance()?;
1060        let text = tok.text(self.source);
1061        let parts = word_parser::parse_word_parts(text);
1062        Ok(Word {
1063            parts,
1064            span: tok.span,
1065        })
1066    }
1067
1068    fn parse_assignment(&mut self) -> Result<Assignment, ParseError> {
1069        let tok = self.advance()?;
1070        let text = tok.text(self.source);
1071        let eq_pos = text.find('=').expect("assignment must contain '='");
1072        let name = &text[..eq_pos];
1073        let val_str = &text[eq_pos + 1..];
1074
1075        let value = if val_str.is_empty() {
1076            self.parse_assignment_compound_value(&tok, eq_pos)?
1077        } else {
1078            Some(self.make_assignment_word(&tok, eq_pos, val_str))
1079        };
1080
1081        Ok(Assignment {
1082            name: name.into(),
1083            value,
1084            span: tok.span,
1085        })
1086    }
1087
1088    fn parse_assignment_compound_value(
1089        &mut self,
1090        tok: &Token,
1091        eq_pos: usize,
1092    ) -> Result<Option<Word>, ParseError> {
1093        if !self.at(&TokenKind::LParen) {
1094            return Ok(None);
1095        }
1096        self.advance()?;
1097        let paren_start = tok.span.start + eq_pos as u32 + 1;
1098        let mut elements = Vec::new();
1099        while !self.at(&TokenKind::RParen) && !self.at(&TokenKind::Eof) {
1100            if self.at(&TokenKind::Newline) {
1101                self.advance()?;
1102                continue;
1103            }
1104            if !self.at_word() {
1105                break;
1106            }
1107            let word = self.advance()?;
1108            elements.push(word.text(self.source).to_string());
1109        }
1110        let end_span = if self.at(&TokenKind::RParen) {
1111            self.advance()?.span.end
1112        } else {
1113            self.current.span.end
1114        };
1115        Ok(Some(Word {
1116            parts: vec![WordPart::Literal(
1117                format!("({})", elements.join(" ")).into(),
1118            )],
1119            span: Span {
1120                start: paren_start,
1121                end: end_span,
1122            },
1123        }))
1124    }
1125
1126    #[allow(clippy::unused_self)]
1127    fn make_assignment_word(&self, tok: &Token, eq_pos: usize, val_str: &str) -> Word {
1128        let val_start = tok.span.start + eq_pos as u32 + 1;
1129        Word {
1130            parts: word_parser::parse_word_parts(val_str),
1131            span: Span {
1132                start: val_start,
1133                end: tok.span.end,
1134            },
1135        }
1136    }
1137
1138    fn parse_redirection(&mut self) -> Result<Redirection, ParseError> {
1139        let op_tok = self.advance()?;
1140        let (op, is_heredoc) = match op_tok.kind {
1141            TokenKind::Less => (RedirectionOp::Input, false),
1142            TokenKind::Greater | TokenKind::AmpGreater => (RedirectionOp::Output, false),
1143            TokenKind::GreaterGreater => (RedirectionOp::Append, false),
1144            TokenKind::LessGreater => (RedirectionOp::ReadWrite, false),
1145            TokenKind::LessLess => (RedirectionOp::HereDoc, true),
1146            TokenKind::LessLessDash => (RedirectionOp::HereDocStrip, true),
1147            TokenKind::LessLessLess => (RedirectionOp::HereString, false),
1148            _ => {
1149                return Err(ParseError {
1150                    message: format!("unexpected redirection operator: {:?}", op_tok.kind),
1151                    offset: op_tok.span.start,
1152                });
1153            }
1154        };
1155
1156        // For &>, we treat it as both stdout and stderr to file
1157        let is_amp_greater = op_tok.kind == TokenKind::AmpGreater;
1158
1159        // Check for >&N or <&N (fd duplication): > followed by & followed by digit word
1160        if self.at(&TokenKind::Amp) && matches!(op_tok.kind, TokenKind::Greater | TokenKind::Less) {
1161            self.advance()?; // consume &
1162            if self.at_word() {
1163                let target = self.parse_word()?;
1164                let span = Span {
1165                    start: op_tok.span.start,
1166                    end: target.span.end,
1167                };
1168                let dup_op = if op_tok.kind == TokenKind::Greater {
1169                    RedirectionOp::DupOutput
1170                } else {
1171                    RedirectionOp::DupInput
1172                };
1173                return Ok(Redirection {
1174                    fd: None,
1175                    op: dup_op,
1176                    target,
1177                    here_doc_body: None,
1178                    span,
1179                });
1180            }
1181            return Err(ParseError {
1182                message: "expected fd number after >&".into(),
1183                offset: self.current.span.start,
1184            });
1185        }
1186
1187        if !self.at_word() {
1188            return Err(ParseError {
1189                message: "expected word after redirection operator".into(),
1190                offset: self.current.span.start,
1191            });
1192        }
1193        let target = self.parse_word()?;
1194        let span = Span {
1195            start: op_tok.span.start,
1196            end: target.span.end,
1197        };
1198
1199        if is_heredoc {
1200            // Extract delimiter text (strip quotes from the delimiter word)
1201            let delim = heredoc_delimiter(&target);
1202            self.pending_heredocs.push(PendingHereDoc {
1203                delimiter: delim,
1204                strip_tabs: op == RedirectionOp::HereDocStrip,
1205            });
1206        }
1207
1208        // For &>, produce a fd=None redirection (handled specially in runtime)
1209        // We encode this by using fd = Some(u32::MAX) as a sentinel for &>
1210        let fd = if is_amp_greater {
1211            Some(u32::MAX) // sentinel: redirect both stdout and stderr
1212        } else {
1213            None
1214        };
1215
1216        Ok(Redirection {
1217            fd,
1218            op,
1219            target,
1220            here_doc_body: None,
1221            span,
1222        })
1223    }
1224
1225    /// Read here-doc bodies from source and attach them to the AST.
1226    fn resolve_heredocs(&mut self, cc: &mut CompleteCommand) -> Result<(), ParseError> {
1227        let newline_end = self.current.span.end as usize;
1228        let mut scan_pos = newline_end;
1229
1230        let mut bodies = Vec::new();
1231        for hd in &self.pending_heredocs {
1232            let body_start = scan_pos;
1233            loop {
1234                let (line_start, line_end, line) = self.heredoc_line(scan_pos);
1235                let check_line = self.heredoc_check_line(line, hd.strip_tabs);
1236                if check_line == hd.delimiter {
1237                    bodies.push(self.build_heredoc_body(body_start, line_start, hd.strip_tabs));
1238                    scan_pos = self.advance_heredoc_scan(line_end);
1239                    break;
1240                }
1241
1242                if line_end >= self.source.len() {
1243                    return Err(ParseError {
1244                        message: format!("unterminated here-doc, expected '{}'", hd.delimiter),
1245                        offset: body_start as u32,
1246                    });
1247                }
1248                scan_pos = line_end + 1;
1249            }
1250        }
1251
1252        // Walk the AST and assign bodies in order
1253        let mut body_iter = bodies.into_iter();
1254        assign_heredoc_bodies_cc(cc, &mut body_iter);
1255
1256        self.pending_heredocs.clear();
1257
1258        self.lexer.set_position(scan_pos);
1259        self.peeked.clear();
1260        self.current = self.lexer.next_token().map_err(lex_err)?;
1261
1262        Ok(())
1263    }
1264
1265    fn heredoc_line(&self, scan_pos: usize) -> (usize, usize, &str) {
1266        let line_start = scan_pos;
1267        let line_end = self.source[scan_pos..]
1268            .find('\n')
1269            .map_or(self.source.len(), |i| scan_pos + i);
1270        (line_start, line_end, &self.source[line_start..line_end])
1271    }
1272
1273    #[allow(clippy::unused_self)]
1274    fn heredoc_check_line<'a>(&self, line: &'a str, strip_tabs: bool) -> &'a str {
1275        if strip_tabs {
1276            line.trim_start_matches('\t')
1277        } else {
1278            line
1279        }
1280    }
1281
1282    fn build_heredoc_body(
1283        &self,
1284        body_start: usize,
1285        line_start: usize,
1286        strip_tabs: bool,
1287    ) -> HereDocBody {
1288        let raw_body = &self.source[body_start..line_start];
1289        let content = if strip_tabs {
1290            raw_body
1291                .lines()
1292                .map(|line| line.trim_start_matches('\t'))
1293                .collect::<Vec<_>>()
1294                .join("\n")
1295                + if raw_body.ends_with('\n') { "\n" } else { "" }
1296        } else {
1297            raw_body.to_string()
1298        };
1299        HereDocBody {
1300            content: content.into(),
1301            span: Span {
1302                start: body_start as u32,
1303                end: line_start as u32,
1304            },
1305        }
1306    }
1307
1308    fn advance_heredoc_scan(&self, line_end: usize) -> usize {
1309        if line_end < self.source.len() {
1310            line_end + 1
1311        } else {
1312            line_end
1313        }
1314    }
1315}
1316
1317/// Walk a `CompleteCommand` and assign here-doc bodies to here-doc redirections in source order.
1318fn assign_heredoc_bodies_cc(
1319    cc: &mut CompleteCommand,
1320    bodies: &mut impl Iterator<Item = HereDocBody>,
1321) {
1322    for and_or in &mut cc.list {
1323        assign_heredoc_bodies_pipeline(&mut and_or.first, bodies);
1324        for (_, pipeline) in &mut and_or.rest {
1325            assign_heredoc_bodies_pipeline(pipeline, bodies);
1326        }
1327    }
1328}
1329
1330fn assign_heredoc_bodies_pipeline(
1331    pipeline: &mut Pipeline,
1332    bodies: &mut impl Iterator<Item = HereDocBody>,
1333) {
1334    for cmd in &mut pipeline.commands {
1335        assign_heredoc_bodies_cmd(cmd, bodies);
1336    }
1337}
1338
1339fn assign_heredoc_bodies_cmd(cmd: &mut Command, bodies: &mut impl Iterator<Item = HereDocBody>) {
1340    if let Command::Simple(sc) = cmd {
1341        for redir in &mut sc.redirections {
1342            if matches!(
1343                redir.op,
1344                RedirectionOp::HereDoc | RedirectionOp::HereDocStrip
1345            ) && redir.here_doc_body.is_none()
1346            {
1347                redir.here_doc_body = bodies.next();
1348            }
1349        }
1350    }
1351}
1352
1353/// Extract the delimiter string from a here-doc target word, stripping quotes.
1354fn heredoc_delimiter(word: &Word) -> String {
1355    let mut result = String::new();
1356    for part in &word.parts {
1357        match part {
1358            WordPart::Literal(s) | WordPart::SingleQuoted(s) => result.push_str(s),
1359            WordPart::DoubleQuoted(parts) => {
1360                for p in parts {
1361                    if let WordPart::Literal(s) = p {
1362                        result.push_str(s);
1363                    }
1364                }
1365            }
1366            _ => {}
1367        }
1368    }
1369    result
1370}
1371
1372fn is_assignment_text(text: &str) -> bool {
1373    let Some(eq_pos) = text.find('=') else {
1374        return false;
1375    };
1376    let mut name = &text[..eq_pos];
1377    if name.is_empty() {
1378        return false;
1379    }
1380    // Handle += operator: strip trailing '+'
1381    if name.ends_with('+') {
1382        name = &name[..name.len() - 1];
1383        if name.is_empty() {
1384            return false;
1385        }
1386    }
1387    // Strip trailing [subscript] for array element assignment
1388    if let Some(bracket_start) = name.find('[') {
1389        if name.ends_with(']') {
1390            name = &name[..bracket_start];
1391        } else {
1392            return false;
1393        }
1394    }
1395    if name.is_empty() {
1396        return false;
1397    }
1398    let mut chars = name.chars();
1399    let first = chars.next().unwrap();
1400    if !first.is_ascii_alphabetic() && first != '_' {
1401        return false;
1402    }
1403    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1404}
1405
1406fn lex_err(e: wasmsh_lex::LexerError) -> ParseError {
1407    ParseError {
1408        message: e.message,
1409        offset: e.span.start,
1410    }
1411}
1412
1413#[cfg(test)]
1414mod tests {
1415    use super::*;
1416
1417    fn parse_ok(source: &str) -> Program {
1418        parse(source).unwrap_or_else(|e| panic!("parse failed for {source:?}: {e}"))
1419    }
1420
1421    fn first_simple(source: &str) -> SimpleCommand {
1422        let prog = parse_ok(source);
1423        let cmd = &prog.commands[0].list[0].first.commands[0];
1424        match cmd {
1425            Command::Simple(sc) => sc.clone(),
1426            other => panic!("expected simple command, got {other:?}"),
1427        }
1428    }
1429
1430    fn first_command(source: &str) -> Command {
1431        let prog = parse_ok(source);
1432        prog.commands[0].list[0].first.commands[0].clone()
1433    }
1434
1435    fn word_texts(sc: &SimpleCommand) -> Vec<&str> {
1436        sc.words
1437            .iter()
1438            .map(|w| match &w.parts[0] {
1439                WordPart::Literal(s) => s.as_str(),
1440                _ => panic!("expected literal word part"),
1441            })
1442            .collect()
1443    }
1444
1445    // ---- Simple commands (from prompt 02) ----
1446
1447    #[test]
1448    fn parse_empty_input() {
1449        let program = parse_ok("");
1450        assert!(program.commands.is_empty());
1451    }
1452
1453    #[test]
1454    fn parse_echo_hi() {
1455        let sc = first_simple("echo hi");
1456        assert_eq!(word_texts(&sc), vec!["echo", "hi"]);
1457    }
1458
1459    #[test]
1460    fn parse_assignment_prefix() {
1461        let sc = first_simple("FOO=1 BAR=2 env");
1462        assert_eq!(sc.assignments.len(), 2);
1463        assert_eq!(word_texts(&sc), vec!["env"]);
1464    }
1465
1466    #[test]
1467    fn parse_redirections() {
1468        let sc = first_simple("cat < in > out");
1469        assert_eq!(word_texts(&sc), vec!["cat"]);
1470        assert_eq!(sc.redirections.len(), 2);
1471    }
1472
1473    #[test]
1474    fn parse_pipeline() {
1475        let prog = parse_ok("a | b | c");
1476        assert_eq!(prog.commands[0].list[0].first.commands.len(), 3);
1477    }
1478
1479    #[test]
1480    fn parse_and_or_semicolons() {
1481        let prog = parse_ok("false && echo x; true || echo y");
1482        assert_eq!(prog.commands[0].list.len(), 2);
1483    }
1484
1485    #[test]
1486    fn parse_spans_preserved() {
1487        let sc = first_simple("echo hello");
1488        assert_eq!(sc.span, Span { start: 0, end: 10 });
1489    }
1490
1491    // ---- Subshell ----
1492
1493    #[test]
1494    fn parse_subshell() {
1495        let cmd = first_command("(echo hi)");
1496        let Command::Subshell(sub) = cmd else {
1497            panic!("expected subshell");
1498        };
1499        assert_eq!(sub.body.len(), 1);
1500    }
1501
1502    #[test]
1503    fn parse_subshell_with_semicolons() {
1504        let cmd = first_command("(echo a; echo b)");
1505        let Command::Subshell(sub) = cmd else {
1506            panic!("expected subshell");
1507        };
1508        // one complete command with two and_or entries
1509        assert_eq!(sub.body[0].list.len(), 2);
1510    }
1511
1512    // ---- Group ----
1513
1514    #[test]
1515    fn parse_group() {
1516        let cmd = first_command("{ echo hi; }");
1517        let Command::Group(grp) = cmd else {
1518            panic!("expected group");
1519        };
1520        assert_eq!(grp.body.len(), 1);
1521    }
1522
1523    // ---- If ----
1524
1525    #[test]
1526    fn parse_if_then_fi() {
1527        let cmd = first_command("if true; then echo yes; fi");
1528        let Command::If(if_cmd) = cmd else {
1529            panic!("expected if");
1530        };
1531        assert_eq!(if_cmd.condition.len(), 1);
1532        assert_eq!(if_cmd.then_body.len(), 1);
1533        assert!(if_cmd.elifs.is_empty());
1534        assert!(if_cmd.else_body.is_none());
1535    }
1536
1537    #[test]
1538    fn parse_if_else() {
1539        let cmd = first_command("if true; then echo yes; else echo no; fi");
1540        let Command::If(if_cmd) = cmd else {
1541            panic!("expected if");
1542        };
1543        assert!(if_cmd.else_body.is_some());
1544    }
1545
1546    #[test]
1547    fn parse_if_elif_else() {
1548        let cmd = first_command("if a; then b; elif c; then d; elif e; then f; else g; fi");
1549        let Command::If(if_cmd) = cmd else {
1550            panic!("expected if");
1551        };
1552        assert_eq!(if_cmd.elifs.len(), 2);
1553        assert!(if_cmd.else_body.is_some());
1554    }
1555
1556    // ---- While ----
1557
1558    #[test]
1559    fn parse_while() {
1560        let cmd = first_command("while true; do echo loop; done");
1561        let Command::While(w) = cmd else {
1562            panic!("expected while");
1563        };
1564        assert_eq!(w.condition.len(), 1);
1565        assert_eq!(w.body.len(), 1);
1566    }
1567
1568    // ---- Until ----
1569
1570    #[test]
1571    fn parse_until() {
1572        let cmd = first_command("until false; do echo loop; done");
1573        let Command::Until(u) = cmd else {
1574            panic!("expected until");
1575        };
1576        assert_eq!(u.condition.len(), 1);
1577        assert_eq!(u.body.len(), 1);
1578    }
1579
1580    // ---- For ----
1581
1582    #[test]
1583    fn parse_for_in() {
1584        let cmd = first_command("for x in a b c; do echo x; done");
1585        let Command::For(f) = cmd else {
1586            panic!("expected for");
1587        };
1588        assert_eq!(f.var_name.as_str(), "x");
1589        let words = f.words.as_ref().unwrap();
1590        assert_eq!(words.len(), 3);
1591        assert_eq!(f.body.len(), 1);
1592    }
1593
1594    #[test]
1595    fn parse_for_no_in() {
1596        let cmd = first_command("for x; do echo x; done");
1597        let Command::For(f) = cmd else {
1598            panic!("expected for");
1599        };
1600        assert!(f.words.is_none());
1601    }
1602
1603    #[test]
1604    fn parse_for_newline_before_do() {
1605        let cmd = first_command("for x in a b c\ndo\necho x\ndone");
1606        let Command::For(f) = cmd else {
1607            panic!("expected for");
1608        };
1609        assert_eq!(f.words.as_ref().unwrap().len(), 3);
1610    }
1611
1612    // ---- Case ----
1613
1614    #[test]
1615    fn parse_case() {
1616        let cmd = first_command("case x in\na) echo a;;\nb) echo b;;\nesac");
1617        let Command::Case(c) = cmd else {
1618            panic!("expected case");
1619        };
1620        assert_eq!(c.items.len(), 2);
1621    }
1622
1623    #[test]
1624    fn parse_case_wildcard() {
1625        let cmd = first_command("case x in\n*) echo default;;\nesac");
1626        let Command::Case(c) = cmd else {
1627            panic!("expected case");
1628        };
1629        assert_eq!(c.items.len(), 1);
1630    }
1631
1632    // ---- Function definitions ----
1633
1634    #[test]
1635    fn parse_function_posix() {
1636        let cmd = first_command("greet() { echo hello; }");
1637        let Command::FunctionDef(fd) = cmd else {
1638            panic!("expected function def");
1639        };
1640        assert_eq!(fd.name.as_str(), "greet");
1641        assert!(matches!(*fd.body, Command::Group(_)));
1642    }
1643
1644    #[test]
1645    fn parse_function_bash() {
1646        let cmd = first_command("function greet { echo hello; }");
1647        let Command::FunctionDef(fd) = cmd else {
1648            panic!("expected function def");
1649        };
1650        assert_eq!(fd.name.as_str(), "greet");
1651    }
1652
1653    #[test]
1654    fn parse_function_bash_with_parens() {
1655        let cmd = first_command("function greet() { echo hello; }");
1656        let Command::FunctionDef(fd) = cmd else {
1657            panic!("expected function def");
1658        };
1659        assert_eq!(fd.name.as_str(), "greet");
1660    }
1661
1662    // ---- Reserved words are context-sensitive ----
1663
1664    #[test]
1665    fn reserved_word_as_argument() {
1666        // `if` as an argument, not as a keyword
1667        let sc = first_simple("echo if then else");
1668        assert_eq!(word_texts(&sc), vec!["echo", "if", "then", "else"]);
1669    }
1670
1671    // ---- Here-docs ----
1672
1673    #[test]
1674    fn parse_heredoc() {
1675        let source = "cat <<EOF\nhello world\nEOF\n";
1676        let sc = first_simple(source);
1677        assert_eq!(sc.redirections.len(), 1);
1678        assert_eq!(sc.redirections[0].op, RedirectionOp::HereDoc);
1679        let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1680        assert_eq!(body.content.as_str(), "hello world\n");
1681    }
1682
1683    #[test]
1684    fn parse_heredoc_multiline() {
1685        let source = "cat <<EOF\nline1\nline2\nline3\nEOF\n";
1686        let sc = first_simple(source);
1687        let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1688        assert_eq!(body.content.as_str(), "line1\nline2\nline3\n");
1689    }
1690
1691    #[test]
1692    fn parse_heredoc_strip_tabs() {
1693        let source = "cat <<-EOF\n\thello\n\tworld\n\tEOF\n";
1694        let sc = first_simple(source);
1695        assert_eq!(sc.redirections[0].op, RedirectionOp::HereDocStrip);
1696        let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1697        assert_eq!(body.content.as_str(), "hello\nworld\n");
1698    }
1699
1700    #[test]
1701    fn parse_multiple_heredocs() {
1702        let source = "cmd <<A <<B\nbody_a\nA\nbody_b\nB\n";
1703        let sc = first_simple(source);
1704        assert_eq!(sc.redirections.len(), 2);
1705        let body_a = sc.redirections[0].here_doc_body.as_ref().unwrap();
1706        assert_eq!(body_a.content.as_str(), "body_a\n");
1707        let body_b = sc.redirections[1].here_doc_body.as_ref().unwrap();
1708        assert_eq!(body_b.content.as_str(), "body_b\n");
1709    }
1710
1711    #[test]
1712    fn parse_heredoc_in_pipeline() {
1713        let source = "cat <<EOF | wc -l\nhello\nEOF\n";
1714        let prog = parse_ok(source);
1715        let pipeline = &prog.commands[0].list[0].first;
1716        assert_eq!(pipeline.commands.len(), 2);
1717        if let Command::Simple(sc) = &pipeline.commands[0] {
1718            let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1719            assert_eq!(body.content.as_str(), "hello\n");
1720        } else {
1721            panic!("expected simple command");
1722        }
1723    }
1724
1725    #[test]
1726    fn parse_heredoc_followed_by_command() {
1727        let source = "cat <<EOF\nhello\nEOF\necho done";
1728        let prog = parse_ok(source);
1729        assert_eq!(prog.commands.len(), 2);
1730    }
1731
1732    #[test]
1733    fn parse_error_unterminated_heredoc() {
1734        assert!(parse("cat <<EOF\nhello\n").is_err());
1735    }
1736
1737    // ---- Error cases ----
1738
1739    #[test]
1740    fn parse_error_on_lone_pipe() {
1741        assert!(parse("|").is_err());
1742    }
1743
1744    #[test]
1745    fn parse_error_on_lone_semicolon() {
1746        assert!(parse(";").is_err());
1747    }
1748
1749    #[test]
1750    fn parse_error_redirect_no_target() {
1751        assert!(parse("echo >").is_err());
1752    }
1753
1754    #[test]
1755    fn parse_error_unclosed_if() {
1756        assert!(parse("if true; then echo x").is_err());
1757    }
1758
1759    #[test]
1760    fn parse_error_unclosed_group() {
1761        assert!(parse("{ echo x").is_err());
1762    }
1763
1764    #[test]
1765    fn parse_error_unclosed_subshell() {
1766        assert!(parse("(echo x").is_err());
1767    }
1768
1769    // ---- Here-string ----
1770
1771    #[test]
1772    fn parse_here_string() {
1773        let sc = first_simple("cat <<< hello");
1774        assert_eq!(word_texts(&sc), vec!["cat"]);
1775        assert_eq!(sc.redirections.len(), 1);
1776        assert_eq!(sc.redirections[0].op, RedirectionOp::HereString);
1777    }
1778
1779    // ---- Fd-prefix redirection ----
1780
1781    #[test]
1782    fn parse_stderr_redirect() {
1783        let sc = first_simple("cmd 2> file");
1784        assert_eq!(sc.redirections.len(), 1);
1785        assert_eq!(sc.redirections[0].fd, Some(2));
1786        assert_eq!(sc.redirections[0].op, RedirectionOp::Output);
1787    }
1788
1789    #[test]
1790    fn parse_fd_dup_output() {
1791        let sc = first_simple("cmd 2>&1");
1792        assert_eq!(sc.redirections.len(), 1);
1793        assert_eq!(sc.redirections[0].fd, Some(2));
1794        assert_eq!(sc.redirections[0].op, RedirectionOp::DupOutput);
1795    }
1796
1797    #[test]
1798    fn parse_amp_greater() {
1799        let sc = first_simple("cmd &> file");
1800        assert_eq!(sc.redirections.len(), 1);
1801        // &> is encoded as fd=MAX, op=Output
1802        assert_eq!(sc.redirections[0].fd, Some(u32::MAX));
1803        assert_eq!(sc.redirections[0].op, RedirectionOp::Output);
1804    }
1805
1806    // ---- Double bracket ----
1807
1808    #[test]
1809    fn parse_double_bracket_basic() {
1810        let cmd = first_command("[[ hello == world ]]");
1811        let Command::DoubleBracket(db) = cmd else {
1812            panic!("expected DoubleBracket, got {cmd:?}");
1813        };
1814        assert_eq!(db.words.len(), 3);
1815    }
1816
1817    #[test]
1818    fn parse_double_bracket_with_var() {
1819        let cmd = first_command("[[ $x == hello ]]");
1820        let Command::DoubleBracket(db) = cmd else {
1821            panic!("expected DoubleBracket");
1822        };
1823        assert_eq!(db.words.len(), 3);
1824    }
1825
1826    #[test]
1827    fn parse_double_bracket_logical_ops() {
1828        // && and || inside [[ ]] should be captured as expression tokens
1829        let cmd = first_command("[[ a == a && b == b ]]");
1830        let Command::DoubleBracket(db) = cmd else {
1831            panic!("expected DoubleBracket");
1832        };
1833        // a, ==, a, &&, b, ==, b
1834        assert_eq!(db.words.len(), 7);
1835    }
1836
1837    #[test]
1838    fn parse_double_bracket_in_if() {
1839        let prog = parse_ok("if [[ x == x ]]; then echo yes; fi");
1840        let cmd = &prog.commands[0].list[0].first.commands[0];
1841        assert!(matches!(cmd, Command::If(_)));
1842    }
1843
1844    #[test]
1845    fn parse_double_bracket_in_pipeline() {
1846        let prog = parse_ok("[[ x == x ]] && echo yes");
1847        assert!(!prog.commands.is_empty());
1848    }
1849
1850    // ---- Arithmetic command (( )) ----
1851
1852    #[test]
1853    fn parse_arith_command_basic() {
1854        let cmd = first_command("((1+2))");
1855        let Command::ArithCommand(ac) = cmd else {
1856            panic!("expected ArithCommand, got {cmd:?}");
1857        };
1858        assert_eq!(ac.expr.as_str(), "1+2");
1859    }
1860
1861    #[test]
1862    fn parse_arith_command_with_spaces() {
1863        let cmd = first_command("(( x = 1 + 2 ))");
1864        let Command::ArithCommand(ac) = cmd else {
1865            panic!("expected ArithCommand, got {cmd:?}");
1866        };
1867        assert_eq!(ac.expr.as_str(), "x = 1 + 2");
1868    }
1869
1870    #[test]
1871    fn parse_arith_command_with_parens() {
1872        let cmd = first_command("(( (1+2) * 3 ))");
1873        let Command::ArithCommand(ac) = cmd else {
1874            panic!("expected ArithCommand, got {cmd:?}");
1875        };
1876        assert_eq!(ac.expr.as_str(), "(1+2) * 3");
1877    }
1878
1879    #[test]
1880    fn parse_arith_command_in_if() {
1881        let prog = parse_ok("if (( x > 0 )); then echo yes; fi");
1882        let cmd = &prog.commands[0].list[0].first.commands[0];
1883        assert!(matches!(cmd, Command::If(_)));
1884    }
1885
1886    #[test]
1887    fn parse_arith_command_in_and_or() {
1888        let prog = parse_ok("(( x > 0 )) && echo yes");
1889        assert!(!prog.commands.is_empty());
1890    }
1891
1892    // ---- C-style for (( )) ----
1893
1894    #[test]
1895    fn parse_arith_for_basic() {
1896        let cmd = first_command("for ((i=0; i<10; i++)) do echo $i; done");
1897        let Command::ArithFor(af) = cmd else {
1898            panic!("expected ArithFor, got {cmd:?}");
1899        };
1900        assert_eq!(af.init.as_str(), "i=0");
1901        assert_eq!(af.cond.as_str(), "i<10");
1902        assert_eq!(af.step.as_str(), "i++");
1903        assert_eq!(af.body.len(), 1);
1904    }
1905
1906    #[test]
1907    fn parse_arith_for_with_spaces() {
1908        let cmd = first_command("for (( i = 0; i < 5; i++ )) do echo $i; done");
1909        let Command::ArithFor(af) = cmd else {
1910            panic!("expected ArithFor, got {cmd:?}");
1911        };
1912        assert_eq!(af.init.as_str(), "i = 0");
1913        assert_eq!(af.cond.as_str(), "i < 5");
1914        assert_eq!(af.step.as_str(), "i++");
1915    }
1916
1917    #[test]
1918    fn parse_arith_for_with_semicolon_before_do() {
1919        let cmd = first_command("for ((i=0; i<3; i++)); do echo $i; done");
1920        let Command::ArithFor(af) = cmd else {
1921            panic!("expected ArithFor, got {cmd:?}");
1922        };
1923        assert_eq!(af.init.as_str(), "i=0");
1924        assert_eq!(af.cond.as_str(), "i<3");
1925        assert_eq!(af.step.as_str(), "i++");
1926    }
1927
1928    #[test]
1929    fn parse_arith_for_newline_before_do() {
1930        let cmd = first_command("for ((i=0; i<3; i++))\ndo\necho $i\ndone");
1931        let Command::ArithFor(af) = cmd else {
1932            panic!("expected ArithFor, got {cmd:?}");
1933        };
1934        assert_eq!(af.init.as_str(), "i=0");
1935    }
1936
1937    #[test]
1938    fn parse_subshell_not_confused_with_arith() {
1939        // A subshell ( echo hi ) should not be confused with (( ))
1940        let cmd = first_command("(echo hi)");
1941        assert!(matches!(cmd, Command::Subshell(_)));
1942    }
1943}