Skip to main content

wasmsh_parse/
lib.rs

1//! Handwritten recursive-descent parser for the wasmsh shell.
2//!
3//! Consumes tokens from `wasmsh-lex` and produces an AST defined
4//! in `wasmsh-ast`. No parser generators are used.
5
6mod word_parser;
7
8use std::collections::VecDeque;
9
10use wasmsh_ast::{
11    AndOrList, AndOrOp, ArithCommandNode, ArithForCommand, Assignment, CaseCommand, CaseItem,
12    CaseTerminator, Command, CompleteCommand, DoubleBracketCommand, ElifClause, ForCommand,
13    FunctionDef, GroupCommand, HereDocBody, IfCommand, Pipeline, Program, Redirection,
14    RedirectionOp, SelectCommand, SimpleCommand, Span, SubshellCommand, UntilCommand, WhileCommand,
15    Word, WordPart,
16};
17use wasmsh_lex::{Lexer, Token, TokenKind};
18
19/// Parse errors with span information.
20#[derive(Debug, Clone, PartialEq)]
21pub struct ParseError {
22    pub message: String,
23    pub offset: u32,
24}
25
26impl std::fmt::Display for ParseError {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        write!(f, "parse error at {}: {}", self.offset, self.message)
29    }
30}
31
32impl std::error::Error for ParseError {}
33
34/// Parse a complete shell source string into a `Program` AST.
35pub fn parse(source: &str) -> Result<Program, ParseError> {
36    let mut parser = Parser::new(source)?;
37    parser.parse_program()
38}
39
40// Words that terminate compound-list bodies (not command-starters).
41const TERMINATOR_WORDS: &[&str] = &["then", "elif", "else", "fi", "do", "done", "esac", "}"];
42
43/// A pending here-doc that needs its body read after the command line.
44struct PendingHereDoc {
45    delimiter: String,
46    strip_tabs: bool,
47}
48
49struct Parser<'src> {
50    source: &'src str,
51    lexer: Lexer<'src>,
52    current: Token,
53    peeked: VecDeque<Token>,
54    prev_end: u32,
55    pending_heredocs: Vec<PendingHereDoc>,
56}
57
58impl<'src> Parser<'src> {
59    fn new(source: &'src str) -> Result<Self, ParseError> {
60        let mut lexer = Lexer::new(source);
61        let current = lexer.next_token().map_err(lex_err)?;
62        Ok(Self {
63            source,
64            lexer,
65            current,
66            peeked: VecDeque::new(),
67            prev_end: 0,
68            pending_heredocs: Vec::new(),
69        })
70    }
71
72    fn advance(&mut self) -> Result<Token, ParseError> {
73        let prev = self.current.clone();
74        self.prev_end = prev.span.end;
75        self.current = if let Some(tok) = self.peeked.pop_front() {
76            tok
77        } else {
78            self.lexer.next_token().map_err(lex_err)?
79        };
80        Ok(prev)
81    }
82
83    /// Peek at the nth token ahead (0 = next token after current).
84    fn peek_nth(&mut self, n: usize) -> Result<&Token, ParseError> {
85        while self.peeked.len() <= n {
86            self.peeked
87                .push_back(self.lexer.next_token().map_err(lex_err)?);
88        }
89        Ok(&self.peeked[n])
90    }
91
92    fn at(&self, kind: &TokenKind) -> bool {
93        self.current.kind == *kind
94    }
95
96    fn at_word(&self) -> bool {
97        matches!(self.current.kind, TokenKind::Word { .. })
98    }
99
100    fn at_word_eq(&self, text: &str) -> bool {
101        self.at_word() && self.current_text() == text
102    }
103
104    fn at_redirection(&self) -> bool {
105        matches!(
106            self.current.kind,
107            TokenKind::Less
108                | TokenKind::Greater
109                | TokenKind::GreaterGreater
110                | TokenKind::LessLess
111                | TokenKind::LessLessDash
112                | TokenKind::LessLessLess
113                | TokenKind::LessGreater
114                | TokenKind::AmpGreater
115        )
116    }
117
118    /// Check if the current word token is a single digit and the next token
119    /// is a redirection operator. If so, this is an fd-prefix redirection.
120    fn at_fd_prefix_redirection(&mut self) -> bool {
121        if !self.at_word() {
122            return false;
123        }
124        let text = self.current_text();
125        if text.len() != 1 || !text.as_bytes()[0].is_ascii_digit() {
126            return false;
127        }
128        if let Ok(next) = self.peek_nth(0) {
129            matches!(
130                next.kind,
131                TokenKind::Less
132                    | TokenKind::Greater
133                    | TokenKind::GreaterGreater
134                    | TokenKind::LessLess
135                    | TokenKind::LessLessDash
136                    | TokenKind::LessLessLess
137                    | TokenKind::LessGreater
138            )
139        } else {
140            false
141        }
142    }
143
144    /// Check if the next token (after current) is `LParen`.
145    fn peek_is_lparen(&mut self) -> bool {
146        matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::LParen)
147    }
148
149    /// Parse a word like `arr=(x y z)` where the current token is `arr=`
150    /// and the next token is `LParen`. Combines the name= with compound value.
151    fn parse_compound_assign_word(&mut self) -> Result<Word, ParseError> {
152        let tok = self.advance()?; // consume `arr=`
153        let text = tok.text(self.source);
154        self.advance()?; // consume '('
155        let mut elements = Vec::new();
156        while !self.at(&TokenKind::RParen) && !self.at(&TokenKind::Eof) {
157            if self.at(&TokenKind::Newline) {
158                self.advance()?;
159                continue;
160            }
161            if self.at_word() {
162                let w = self.advance()?;
163                elements.push(w.text(self.source).to_string());
164            } else {
165                break;
166            }
167        }
168        let end_span = if self.at(&TokenKind::RParen) {
169            self.advance()?.span.end
170        } else {
171            self.current.span.end
172        };
173        // Build a synthetic word like "arr=(x y z)"
174        let compound = format!("{text}({})", elements.join(" "));
175        Ok(Word {
176            parts: vec![WordPart::Literal(compound.into())],
177            span: Span {
178                start: tok.span.start,
179                end: end_span,
180            },
181        })
182    }
183
184    /// True if the current token can start a new command.
185    /// Check for `;;` (two consecutive semicolons — case item terminator).
186    fn is_double_semi(&mut self) -> bool {
187        if !self.at(&TokenKind::Semi) {
188            return false;
189        }
190        matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::Semi)
191    }
192
193    /// Check for `;&` (case fall-through): `;` followed by `&`.
194    fn is_case_fallthrough(&mut self) -> bool {
195        if !self.at(&TokenKind::Semi) {
196            return false;
197        }
198        matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::Amp)
199    }
200
201    /// Check for `;;&` (case continue-testing): `;;` followed by `&`.
202    fn is_case_continue_testing(&mut self) -> bool {
203        if !self.at(&TokenKind::Semi) {
204            return false;
205        }
206        if !matches!(self.peek_nth(0), Ok(tok) if tok.kind == TokenKind::Semi) {
207            return false;
208        }
209        matches!(self.peek_nth(1), Ok(tok) if tok.kind == TokenKind::Amp)
210    }
211
212    fn at_command_start(&self) -> bool {
213        if self.at(&TokenKind::LParen) || self.at(&TokenKind::DblLBracket) {
214            return true;
215        }
216        if self.at_word() {
217            let text = self.current_text();
218            return !TERMINATOR_WORDS.contains(&text);
219        }
220        self.at_redirection()
221    }
222
223    fn current_text(&self) -> &str {
224        self.current.text(self.source)
225    }
226
227    fn skip_newlines(&mut self) -> Result<(), ParseError> {
228        while self.at(&TokenKind::Newline) {
229            self.advance()?;
230        }
231        Ok(())
232    }
233
234    fn span_from(&self, start: u32) -> Span {
235        Span {
236            start,
237            end: self.prev_end,
238        }
239    }
240
241    fn expect_word(&mut self, expected: &str) -> Result<Token, ParseError> {
242        if self.at_word_eq(expected) {
243            self.advance()
244        } else {
245            Err(ParseError {
246                message: format!("expected '{}', got '{}'", expected, self.current_text()),
247                offset: self.current.span.start,
248            })
249        }
250    }
251
252    // ---- Grammar rules ----
253
254    fn parse_program(&mut self) -> Result<Program, ParseError> {
255        self.skip_newlines()?;
256        let mut commands = Vec::new();
257        while self.at_command_start() {
258            commands.push(self.parse_complete_command()?);
259            self.skip_newlines()?;
260        }
261        if !self.at(&TokenKind::Eof) {
262            return Err(ParseError {
263                message: format!("unexpected token: {:?}", self.current.kind),
264                offset: self.current.span.start,
265            });
266        }
267        Ok(Program { commands })
268    }
269
270    /// Parse a compound list (body of compound commands).
271    /// Stops at terminator words, `)`, or EOF.
272    fn parse_compound_list(&mut self) -> Result<Vec<CompleteCommand>, ParseError> {
273        self.skip_newlines()?;
274        let mut commands = Vec::new();
275        while self.at_command_start() {
276            commands.push(self.parse_complete_command()?);
277            self.skip_newlines()?;
278        }
279        Ok(commands)
280    }
281
282    fn parse_complete_command(&mut self) -> Result<CompleteCommand, ParseError> {
283        let start = self.current.span.start;
284        let mut list = Vec::new();
285        list.push(self.parse_and_or()?);
286
287        while self.at(&TokenKind::Semi)
288            && !self.is_double_semi()
289            && !self.is_case_fallthrough()
290            && !self.is_case_continue_testing()
291        {
292            self.advance()?;
293            // Don't skip newlines here if there are pending heredocs
294            if self.pending_heredocs.is_empty() {
295                self.skip_newlines()?;
296            }
297            if self.at_command_start() {
298                list.push(self.parse_and_or()?);
299            }
300        }
301
302        let mut cc = CompleteCommand {
303            list,
304            span: self.span_from(start),
305        };
306
307        // Resolve pending here-docs: read bodies from source after the newline
308        if !self.pending_heredocs.is_empty() && self.at(&TokenKind::Newline) {
309            self.resolve_heredocs(&mut cc)?;
310        }
311
312        Ok(cc)
313    }
314
315    fn parse_and_or(&mut self) -> Result<AndOrList, ParseError> {
316        let first = self.parse_pipeline()?;
317        let mut rest = Vec::new();
318
319        loop {
320            let op = if self.at(&TokenKind::AndAnd) {
321                self.advance()?;
322                AndOrOp::And
323            } else if self.at(&TokenKind::OrOr) {
324                self.advance()?;
325                AndOrOp::Or
326            } else {
327                break;
328            };
329            self.skip_newlines()?;
330            rest.push((op, self.parse_pipeline()?));
331        }
332
333        Ok(AndOrList { first, rest })
334    }
335
336    fn parse_pipeline(&mut self) -> Result<Pipeline, ParseError> {
337        let negated = if self.at_word_eq("!") {
338            self.advance()?;
339            true
340        } else {
341            false
342        };
343
344        let mut commands = Vec::new();
345        let mut pipe_stderr = Vec::new();
346        commands.push(self.parse_command()?);
347
348        loop {
349            if self.at(&TokenKind::Pipe) {
350                pipe_stderr.push(false);
351                self.advance()?;
352                self.skip_newlines()?;
353                commands.push(self.parse_command()?);
354            } else if self.at(&TokenKind::PipeAmp) {
355                pipe_stderr.push(true);
356                self.advance()?;
357                self.skip_newlines()?;
358                commands.push(self.parse_command()?);
359            } else {
360                break;
361            }
362        }
363
364        Ok(Pipeline {
365            negated,
366            commands,
367            pipe_stderr,
368        })
369    }
370
371    fn parse_command(&mut self) -> Result<Command, ParseError> {
372        if self.at(&TokenKind::LParen) {
373            return self.parse_command_lparen();
374        }
375        if self.at(&TokenKind::DblLBracket) {
376            return self.parse_double_bracket();
377        }
378        if self.at_word() {
379            if let Some(cmd) = self.try_parse_compound_keyword()? {
380                return Ok(cmd);
381            }
382        }
383        Ok(Command::Simple(self.parse_simple_command()?))
384    }
385
386    /// Dispatch `(( expr ))` arithmetic command or `( list )` subshell.
387    fn parse_command_lparen(&mut self) -> Result<Command, ParseError> {
388        if let Ok(next) = self.peek_nth(0) {
389            if next.kind == TokenKind::LParen && next.span.start == self.current.span.end {
390                return self.parse_arith_command();
391            }
392        }
393        self.parse_subshell()
394    }
395
396    /// If the current word is a compound keyword, parse and return it.
397    fn try_parse_compound_keyword(&mut self) -> Result<Option<Command>, ParseError> {
398        let text = self.current_text();
399        match text {
400            "{" => Ok(Some(self.parse_group()?)),
401            "if" => Ok(Some(self.parse_if()?)),
402            "while" => Ok(Some(self.parse_while()?)),
403            "until" => Ok(Some(self.parse_until()?)),
404            "for" => Ok(Some(self.parse_for()?)),
405            "case" => Ok(Some(self.parse_case()?)),
406            "select" => Ok(Some(self.parse_select()?)),
407            "function" => Ok(Some(self.parse_function_bash()?)),
408            _ => {
409                if self.peek_nth(0)?.kind == TokenKind::LParen
410                    && self.peek_nth(1)?.kind == TokenKind::RParen
411                {
412                    return Ok(Some(self.parse_function_posix()?));
413                }
414                Ok(None)
415            }
416        }
417    }
418
419    // ---- Compound commands ----
420
421    fn parse_subshell(&mut self) -> Result<Command, ParseError> {
422        let start = self.current.span.start;
423        self.advance()?; // consume (
424        let body = self.parse_compound_list()?;
425        if !self.at(&TokenKind::RParen) {
426            return Err(ParseError {
427                message: "expected ')' to close subshell".into(),
428                offset: self.current.span.start,
429            });
430        }
431        self.advance()?; // consume )
432        Ok(Command::Subshell(SubshellCommand {
433            body,
434            span: self.span_from(start),
435        }))
436    }
437
438    fn parse_group(&mut self) -> Result<Command, ParseError> {
439        let start = self.current.span.start;
440        self.expect_word("{")?;
441        let body = self.parse_compound_list()?;
442        self.expect_word("}")?;
443        Ok(Command::Group(GroupCommand {
444            body,
445            span: self.span_from(start),
446        }))
447    }
448
449    fn parse_if(&mut self) -> Result<Command, ParseError> {
450        let start = self.current.span.start;
451        self.expect_word("if")?;
452        let condition = self.parse_compound_list()?;
453        self.expect_word("then")?;
454        let then_body = self.parse_compound_list()?;
455
456        let mut elifs = Vec::new();
457        while self.at_word_eq("elif") {
458            self.advance()?;
459            let elif_cond = self.parse_compound_list()?;
460            self.expect_word("then")?;
461            let elif_body = self.parse_compound_list()?;
462            elifs.push(ElifClause {
463                condition: elif_cond,
464                then_body: elif_body,
465            });
466        }
467
468        let else_body = if self.at_word_eq("else") {
469            self.advance()?;
470            Some(self.parse_compound_list()?)
471        } else {
472            None
473        };
474
475        self.expect_word("fi")?;
476        Ok(Command::If(IfCommand {
477            condition,
478            then_body,
479            elifs,
480            else_body,
481            span: self.span_from(start),
482        }))
483    }
484
485    fn parse_while(&mut self) -> Result<Command, ParseError> {
486        let start = self.current.span.start;
487        self.expect_word("while")?;
488        let condition = self.parse_compound_list()?;
489        self.expect_word("do")?;
490        let body = self.parse_compound_list()?;
491        self.expect_word("done")?;
492        Ok(Command::While(WhileCommand {
493            condition,
494            body,
495            span: self.span_from(start),
496        }))
497    }
498
499    fn parse_until(&mut self) -> Result<Command, ParseError> {
500        let start = self.current.span.start;
501        self.expect_word("until")?;
502        let condition = self.parse_compound_list()?;
503        self.expect_word("do")?;
504        let body = self.parse_compound_list()?;
505        self.expect_word("done")?;
506        Ok(Command::Until(UntilCommand {
507            condition,
508            body,
509            span: self.span_from(start),
510        }))
511    }
512
513    fn parse_for(&mut self) -> Result<Command, ParseError> {
514        let start = self.current.span.start;
515        self.expect_word("for")?;
516
517        if self.is_arith_for_start() {
518            return self.parse_arith_for(start);
519        }
520
521        if !self.at_word() {
522            return Err(ParseError {
523                message: "expected variable name after 'for'".into(),
524                offset: self.current.span.start,
525            });
526        }
527        let var_name = self.current_text().into();
528        self.advance()?;
529
530        let words = self.parse_loop_words_clause()?;
531
532        self.skip_newlines()?;
533        self.expect_word("do")?;
534        let body = self.parse_compound_list()?;
535        self.expect_word("done")?;
536
537        Ok(Command::For(ForCommand {
538            var_name,
539            words,
540            body,
541            span: self.span_from(start),
542        }))
543    }
544
545    fn is_arith_for_start(&mut self) -> bool {
546        let current_end = self.current.span.end;
547        self.at(&TokenKind::LParen)
548            && self
549                .peek_nth(0)
550                .is_ok_and(|next| next.kind == TokenKind::LParen && next.span.start == current_end)
551    }
552
553    fn parse_loop_words_clause(&mut self) -> Result<Option<Vec<Word>>, ParseError> {
554        if !self.at_word_eq("in") {
555            self.consume_optional_semi()?;
556            return Ok(None);
557        }
558
559        self.advance()?;
560        let mut words = Vec::new();
561        while self.at_word()
562            && !self.at_word_eq("do")
563            && !TERMINATOR_WORDS.contains(&self.current_text())
564        {
565            words.push(self.parse_word()?);
566        }
567        self.consume_optional_semi()?;
568        Ok(Some(words))
569    }
570
571    fn consume_optional_semi(&mut self) -> Result<(), ParseError> {
572        if self.at(&TokenKind::Semi) {
573            self.advance()?;
574        }
575        Ok(())
576    }
577
578    /// Parse `select name [in word ...]; do body; done`.
579    fn parse_select(&mut self) -> Result<Command, ParseError> {
580        let start = self.current.span.start;
581        self.expect_word("select")?;
582
583        if !self.at_word() {
584            return Err(ParseError {
585                message: "expected variable name after 'select'".into(),
586                offset: self.current.span.start,
587            });
588        }
589        let var_name = self.current_text().into();
590        self.advance()?;
591
592        // Optional `in word...` clause
593        let words = if self.at_word_eq("in") {
594            self.advance()?;
595            let mut words = Vec::new();
596            while self.at_word()
597                && !self.at_word_eq("do")
598                && !TERMINATOR_WORDS.contains(&self.current_text())
599            {
600                words.push(self.parse_word()?);
601            }
602            if self.at(&TokenKind::Semi) {
603                self.advance()?;
604            }
605            Some(words)
606        } else {
607            if self.at(&TokenKind::Semi) {
608                self.advance()?;
609            }
610            None
611        };
612
613        self.skip_newlines()?;
614        self.expect_word("do")?;
615        let body = self.parse_compound_list()?;
616        self.expect_word("done")?;
617
618        // Collect trailing redirections (e.g., `done <<< "input"`)
619        let mut redirections = Vec::new();
620        while self.at_redirection() || self.at_fd_prefix_redirection() {
621            if self.at_fd_prefix_redirection() {
622                let fd_text = self.current_text();
623                let fd: u32 = fd_text.parse().unwrap_or(0);
624                self.advance()?;
625                let mut redir = self.parse_redirection()?;
626                redir.fd = Some(fd);
627                redirections.push(redir);
628            } else {
629                redirections.push(self.parse_redirection()?);
630            }
631        }
632
633        Ok(Command::Select(SelectCommand {
634            var_name,
635            words,
636            body,
637            redirections,
638            span: self.span_from(start),
639        }))
640    }
641
642    /// Parse `(( expr ))` arithmetic command.
643    /// The lexer tokenizes `((` as two `LParen` tokens. We consume them, then
644    /// collect raw source characters until the matching `))`.
645    fn parse_arith_command(&mut self) -> Result<Command, ParseError> {
646        let start = self.current.span.start;
647        self.advance()?; // first (
648        self.advance()?; // second (
649
650        let expr = self.collect_arith_expr()?;
651
652        Ok(Command::ArithCommand(ArithCommandNode {
653            expr: expr.into(),
654            span: self.span_from(start),
655        }))
656    }
657
658    /// Parse C-style for loop: `for (( init; cond; step )) do body done`.
659    /// Called after `for` has been consumed. `start` is the span start of `for`.
660    fn parse_arith_for(&mut self, start: u32) -> Result<Command, ParseError> {
661        self.advance()?; // first (
662        self.advance()?; // second (
663
664        // Collect three semicolon-separated expressions until ))
665        let inner = self.collect_arith_expr()?;
666
667        // Split inner on ';' to get init, cond, step
668        let parts: Vec<&str> = inner.splitn(3, ';').collect();
669        let init = parts.first().map_or("", |s| s.trim());
670        let cond = parts.get(1).map_or("", |s| s.trim());
671        let step = parts.get(2).map_or("", |s| s.trim());
672
673        // Optional ; or newline before `do`
674        if self.at(&TokenKind::Semi) {
675            self.advance()?;
676        }
677        self.skip_newlines()?;
678        self.expect_word("do")?;
679        let body = self.parse_compound_list()?;
680        self.expect_word("done")?;
681
682        Ok(Command::ArithFor(ArithForCommand {
683            init: init.into(),
684            cond: cond.into(),
685            step: step.into(),
686            body,
687            span: self.span_from(start),
688        }))
689    }
690
691    /// Collect raw source text for an arithmetic expression until `))` is found.
692    /// Handles nested parentheses. Consumes the closing `))`.
693    fn collect_arith_expr(&mut self) -> Result<String, ParseError> {
694        // We need to read raw source text until we find ))
695        // The current token is the first token after ((.
696        // Strategy: track byte position in source and scan for )).
697        let expr_start = self.current.span.start as usize;
698        let src = self.source;
699        let bytes = src.as_bytes();
700        let mut pos = expr_start;
701        let mut depth: u32 = 0;
702
703        while pos < bytes.len() {
704            if bytes[pos] == b'(' {
705                depth += 1;
706                pos += 1;
707            } else if bytes[pos] == b')' {
708                if depth > 0 {
709                    depth -= 1;
710                    pos += 1;
711                } else if pos + 1 < bytes.len() && bytes[pos + 1] == b')' {
712                    // Found ))
713                    let expr = src[expr_start..pos].trim().to_string();
714                    let end_pos = pos + 2;
715                    // Reposition lexer past ))
716                    self.lexer.set_position(end_pos);
717                    self.peeked.clear();
718                    self.prev_end = end_pos as u32;
719                    self.current = self.lexer.next_token().map_err(lex_err)?;
720                    return Ok(expr);
721                } else {
722                    return Err(ParseError {
723                        message: "expected '))' to close arithmetic expression".into(),
724                        offset: pos as u32,
725                    });
726                }
727            } else {
728                pos += 1;
729            }
730        }
731
732        Err(ParseError {
733            message: "unterminated arithmetic expression, expected '))'".into(),
734            offset: expr_start as u32,
735        })
736    }
737
738    /// Parse `case word in pattern) body ;; ... esac`.
739    fn parse_case(&mut self) -> Result<Command, ParseError> {
740        let start = self.current.span.start;
741        self.expect_word("case")?;
742
743        if !self.at_word() {
744            return Err(ParseError {
745                message: "expected word after 'case'".into(),
746                offset: self.current.span.start,
747            });
748        }
749        let word = self.parse_word()?;
750        self.skip_newlines()?;
751        self.expect_word("in")?;
752        self.skip_newlines()?;
753
754        let mut items = Vec::new();
755        while !self.at_word_eq("esac") && !self.at(&TokenKind::Eof) {
756            let patterns = self.parse_case_patterns()?;
757            self.skip_newlines()?;
758
759            let body = self.parse_case_body()?;
760            let terminator = self.parse_case_terminator()?;
761            self.skip_newlines()?;
762
763            items.push(CaseItem {
764                patterns,
765                body,
766                terminator,
767            });
768        }
769
770        self.expect_word("esac")?;
771        Ok(Command::Case(CaseCommand {
772            word,
773            items,
774            span: self.span_from(start),
775        }))
776    }
777
778    fn parse_case_patterns(&mut self) -> Result<Vec<Word>, ParseError> {
779        if self.at(&TokenKind::LParen) {
780            self.advance()?;
781        }
782        let mut patterns = vec![self.parse_word()?];
783        while self.at(&TokenKind::Pipe) {
784            self.advance()?;
785            patterns.push(self.parse_word()?);
786        }
787        if !self.at(&TokenKind::RParen) {
788            return Err(ParseError {
789                message: "expected ')' after case pattern".into(),
790                offset: self.current.span.start,
791            });
792        }
793        self.advance()?;
794        Ok(patterns)
795    }
796
797    fn parse_case_body(&mut self) -> Result<Vec<CompleteCommand>, ParseError> {
798        let mut body = Vec::new();
799        while self.at_command_start() && !self.is_double_semi() && !self.is_case_fallthrough() {
800            body.push(self.parse_complete_command()?);
801            self.skip_newlines()?;
802        }
803        Ok(body)
804    }
805
806    fn parse_case_terminator(&mut self) -> Result<CaseTerminator, ParseError> {
807        if self.is_case_continue_testing() {
808            self.advance()?;
809            self.advance()?;
810            self.advance()?;
811            return Ok(CaseTerminator::ContinueTesting);
812        }
813        if self.is_double_semi() {
814            self.advance()?;
815            self.advance()?;
816            return Ok(CaseTerminator::Break);
817        }
818        if self.is_case_fallthrough() {
819            self.advance()?;
820            self.advance()?;
821            return Ok(CaseTerminator::Fallthrough);
822        }
823        Ok(CaseTerminator::Break)
824    }
825
826    /// Parse `[[ expression ]]` extended test command.
827    fn parse_double_bracket(&mut self) -> Result<Command, ParseError> {
828        let start = self.current.span.start;
829        self.advance()?; // consume [[
830
831        let mut words = Vec::new();
832        loop {
833            if self.at(&TokenKind::DblRBracket) {
834                self.advance()?; // consume ]]
835                break;
836            }
837            if self.at(&TokenKind::Eof) {
838                return Err(ParseError {
839                    message: "expected ']]' to close extended test".into(),
840                    offset: self.current.span.start,
841                });
842            }
843            // Collect tokens as words (operators inside [[ ]] are expression tokens)
844            if self.at_word() {
845                words.push(self.parse_word()?);
846            } else {
847                // Operator tokens (&&, ||, <, >, (, )) become literal words
848                let tok = self.advance()?;
849                let text = tok.text(self.source);
850                words.push(Word {
851                    parts: vec![WordPart::Literal(text.into())],
852                    span: tok.span,
853                });
854            }
855        }
856
857        Ok(Command::DoubleBracket(DoubleBracketCommand {
858            words,
859            span: self.span_from(start),
860        }))
861    }
862
863    /// Parse POSIX-style function: `name ( ) compound_command`
864    fn parse_function_posix(&mut self) -> Result<Command, ParseError> {
865        let start = self.current.span.start;
866        let name = self.current_text().into();
867        self.advance()?; // name
868        self.advance()?; // (
869        self.advance()?; // )
870        self.skip_newlines()?;
871        let body = Box::new(self.parse_command()?);
872        Ok(Command::FunctionDef(FunctionDef {
873            name,
874            body,
875            span: self.span_from(start),
876        }))
877    }
878
879    /// Parse bash-style function: `function name [( )] compound_command`
880    fn parse_function_bash(&mut self) -> Result<Command, ParseError> {
881        let start = self.current.span.start;
882        self.expect_word("function")?;
883
884        if !self.at_word() {
885            return Err(ParseError {
886                message: "expected function name after 'function'".into(),
887                offset: self.current.span.start,
888            });
889        }
890        let name = self.current_text().into();
891        self.advance()?;
892
893        // Optional ( )
894        if self.at(&TokenKind::LParen) {
895            self.advance()?;
896            if !self.at(&TokenKind::RParen) {
897                return Err(ParseError {
898                    message: "expected ')' after '(' in function definition".into(),
899                    offset: self.current.span.start,
900                });
901            }
902            self.advance()?;
903        }
904
905        self.skip_newlines()?;
906        let body = Box::new(self.parse_command()?);
907        Ok(Command::FunctionDef(FunctionDef {
908            name,
909            body,
910            span: self.span_from(start),
911        }))
912    }
913
914    // ---- Simple commands ----
915
916    fn parse_simple_command(&mut self) -> Result<SimpleCommand, ParseError> {
917        let start = self.current.span.start;
918        let mut assignments = Vec::new();
919        let mut words = Vec::new();
920        let mut redirections = Vec::new();
921        let mut past_assignments = false;
922
923        loop {
924            if !self.parse_simple_command_part(
925                &mut assignments,
926                &mut words,
927                &mut redirections,
928                &mut past_assignments,
929            )? {
930                break;
931            }
932        }
933
934        if assignments.is_empty() && words.is_empty() && redirections.is_empty() {
935            return Err(ParseError {
936                message: "expected a command".into(),
937                offset: self.current.span.start,
938            });
939        }
940
941        Ok(SimpleCommand {
942            assignments,
943            words,
944            redirections,
945            span: self.span_from(start),
946        })
947    }
948
949    fn parse_simple_command_part(
950        &mut self,
951        assignments: &mut Vec<Assignment>,
952        words: &mut Vec<Word>,
953        redirections: &mut Vec<Redirection>,
954        past_assignments: &mut bool,
955    ) -> Result<bool, ParseError> {
956        if self.at_fd_prefix_redirection() {
957            redirections.push(self.parse_fd_prefixed_redirection()?);
958            return Ok(true);
959        }
960        if self.at_redirection() {
961            redirections.push(self.parse_redirection()?);
962            return Ok(true);
963        }
964        if self.at_word() {
965            self.parse_simple_word_part(assignments, words, past_assignments)?;
966            return Ok(true);
967        }
968        Ok(false)
969    }
970
971    fn parse_fd_prefixed_redirection(&mut self) -> Result<Redirection, ParseError> {
972        let fd_text = self.current_text();
973        let fd: u32 = fd_text.parse().unwrap_or(0);
974        self.advance()?;
975        let mut redir = self.parse_redirection()?;
976        redir.fd = Some(fd);
977        Ok(redir)
978    }
979
980    fn parse_simple_word_part(
981        &mut self,
982        assignments: &mut Vec<Assignment>,
983        words: &mut Vec<Word>,
984        past_assignments: &mut bool,
985    ) -> Result<(), ParseError> {
986        let text = self.current_text();
987        if !*past_assignments && is_assignment_text(text) {
988            assignments.push(self.parse_assignment()?);
989            return Ok(());
990        }
991        *past_assignments = true;
992        if text.ends_with('=') && self.peek_is_lparen() {
993            words.push(self.parse_compound_assign_word()?);
994        } else {
995            words.push(self.parse_word()?);
996        }
997        Ok(())
998    }
999
1000    fn parse_word(&mut self) -> Result<Word, ParseError> {
1001        let tok = self.advance()?;
1002        let text = tok.text(self.source);
1003        let parts = word_parser::parse_word_parts(text);
1004        Ok(Word {
1005            parts,
1006            span: tok.span,
1007        })
1008    }
1009
1010    fn parse_assignment(&mut self) -> Result<Assignment, ParseError> {
1011        let tok = self.advance()?;
1012        let text = tok.text(self.source);
1013        let eq_pos = text.find('=').expect("assignment must contain '='");
1014        let name = &text[..eq_pos];
1015        let val_str = &text[eq_pos + 1..];
1016
1017        let value = if val_str.is_empty() {
1018            self.parse_assignment_compound_value(&tok, eq_pos)?
1019        } else {
1020            Some(self.make_assignment_word(&tok, eq_pos, val_str))
1021        };
1022
1023        Ok(Assignment {
1024            name: name.into(),
1025            value,
1026            span: tok.span,
1027        })
1028    }
1029
1030    fn parse_assignment_compound_value(
1031        &mut self,
1032        tok: &Token,
1033        eq_pos: usize,
1034    ) -> Result<Option<Word>, ParseError> {
1035        if !self.at(&TokenKind::LParen) {
1036            return Ok(None);
1037        }
1038        self.advance()?;
1039        let paren_start = tok.span.start + eq_pos as u32 + 1;
1040        let mut elements = Vec::new();
1041        while !self.at(&TokenKind::RParen) && !self.at(&TokenKind::Eof) {
1042            if self.at(&TokenKind::Newline) {
1043                self.advance()?;
1044                continue;
1045            }
1046            if !self.at_word() {
1047                break;
1048            }
1049            let word = self.advance()?;
1050            elements.push(word.text(self.source).to_string());
1051        }
1052        let end_span = if self.at(&TokenKind::RParen) {
1053            self.advance()?.span.end
1054        } else {
1055            self.current.span.end
1056        };
1057        Ok(Some(Word {
1058            parts: vec![WordPart::Literal(
1059                format!("({})", elements.join(" ")).into(),
1060            )],
1061            span: Span {
1062                start: paren_start,
1063                end: end_span,
1064            },
1065        }))
1066    }
1067
1068    #[allow(clippy::unused_self)]
1069    fn make_assignment_word(&self, tok: &Token, eq_pos: usize, val_str: &str) -> Word {
1070        let val_start = tok.span.start + eq_pos as u32 + 1;
1071        Word {
1072            parts: word_parser::parse_word_parts(val_str),
1073            span: Span {
1074                start: val_start,
1075                end: tok.span.end,
1076            },
1077        }
1078    }
1079
1080    fn parse_redirection(&mut self) -> Result<Redirection, ParseError> {
1081        let op_tok = self.advance()?;
1082        let (op, is_heredoc) = match op_tok.kind {
1083            TokenKind::Less => (RedirectionOp::Input, false),
1084            TokenKind::Greater | TokenKind::AmpGreater => (RedirectionOp::Output, false),
1085            TokenKind::GreaterGreater => (RedirectionOp::Append, false),
1086            TokenKind::LessGreater => (RedirectionOp::ReadWrite, false),
1087            TokenKind::LessLess => (RedirectionOp::HereDoc, true),
1088            TokenKind::LessLessDash => (RedirectionOp::HereDocStrip, true),
1089            TokenKind::LessLessLess => (RedirectionOp::HereString, false),
1090            _ => {
1091                return Err(ParseError {
1092                    message: format!("unexpected redirection operator: {:?}", op_tok.kind),
1093                    offset: op_tok.span.start,
1094                });
1095            }
1096        };
1097
1098        // For &>, we treat it as both stdout and stderr to file
1099        let is_amp_greater = op_tok.kind == TokenKind::AmpGreater;
1100
1101        // Check for >&N or <&N (fd duplication): > followed by & followed by digit word
1102        if self.at(&TokenKind::Amp) && matches!(op_tok.kind, TokenKind::Greater | TokenKind::Less) {
1103            self.advance()?; // consume &
1104            if self.at_word() {
1105                let target = self.parse_word()?;
1106                let span = Span {
1107                    start: op_tok.span.start,
1108                    end: target.span.end,
1109                };
1110                let dup_op = if op_tok.kind == TokenKind::Greater {
1111                    RedirectionOp::DupOutput
1112                } else {
1113                    RedirectionOp::DupInput
1114                };
1115                return Ok(Redirection {
1116                    fd: None,
1117                    op: dup_op,
1118                    target,
1119                    here_doc_body: None,
1120                    span,
1121                });
1122            }
1123            return Err(ParseError {
1124                message: "expected fd number after >&".into(),
1125                offset: self.current.span.start,
1126            });
1127        }
1128
1129        if !self.at_word() {
1130            return Err(ParseError {
1131                message: "expected word after redirection operator".into(),
1132                offset: self.current.span.start,
1133            });
1134        }
1135        let target = self.parse_word()?;
1136        let span = Span {
1137            start: op_tok.span.start,
1138            end: target.span.end,
1139        };
1140
1141        if is_heredoc {
1142            // Extract delimiter text (strip quotes from the delimiter word)
1143            let delim = heredoc_delimiter(&target);
1144            self.pending_heredocs.push(PendingHereDoc {
1145                delimiter: delim,
1146                strip_tabs: op == RedirectionOp::HereDocStrip,
1147            });
1148        }
1149
1150        // For &>, produce a fd=None redirection (handled specially in runtime)
1151        // We encode this by using fd = Some(u32::MAX) as a sentinel for &>
1152        let fd = if is_amp_greater {
1153            Some(u32::MAX) // sentinel: redirect both stdout and stderr
1154        } else {
1155            None
1156        };
1157
1158        Ok(Redirection {
1159            fd,
1160            op,
1161            target,
1162            here_doc_body: None,
1163            span,
1164        })
1165    }
1166
1167    /// Read here-doc bodies from source and attach them to the AST.
1168    fn resolve_heredocs(&mut self, cc: &mut CompleteCommand) -> Result<(), ParseError> {
1169        let newline_end = self.current.span.end as usize;
1170        let mut scan_pos = newline_end;
1171
1172        let mut bodies = Vec::new();
1173        for hd in &self.pending_heredocs {
1174            let body_start = scan_pos;
1175            loop {
1176                let (line_start, line_end, line) = self.heredoc_line(scan_pos);
1177                let check_line = self.heredoc_check_line(line, hd.strip_tabs);
1178                if check_line == hd.delimiter {
1179                    bodies.push(self.build_heredoc_body(body_start, line_start, hd.strip_tabs));
1180                    scan_pos = self.advance_heredoc_scan(line_end);
1181                    break;
1182                }
1183
1184                if line_end >= self.source.len() {
1185                    return Err(ParseError {
1186                        message: format!("unterminated here-doc, expected '{}'", hd.delimiter),
1187                        offset: body_start as u32,
1188                    });
1189                }
1190                scan_pos = line_end + 1;
1191            }
1192        }
1193
1194        // Walk the AST and assign bodies in order
1195        let mut body_iter = bodies.into_iter();
1196        assign_heredoc_bodies_cc(cc, &mut body_iter);
1197
1198        self.pending_heredocs.clear();
1199
1200        self.lexer.set_position(scan_pos);
1201        self.peeked.clear();
1202        self.current = self.lexer.next_token().map_err(lex_err)?;
1203
1204        Ok(())
1205    }
1206
1207    fn heredoc_line(&self, scan_pos: usize) -> (usize, usize, &str) {
1208        let line_start = scan_pos;
1209        let line_end = self.source[scan_pos..]
1210            .find('\n')
1211            .map_or(self.source.len(), |i| scan_pos + i);
1212        (line_start, line_end, &self.source[line_start..line_end])
1213    }
1214
1215    #[allow(clippy::unused_self)]
1216    fn heredoc_check_line<'a>(&self, line: &'a str, strip_tabs: bool) -> &'a str {
1217        if strip_tabs {
1218            line.trim_start_matches('\t')
1219        } else {
1220            line
1221        }
1222    }
1223
1224    fn build_heredoc_body(
1225        &self,
1226        body_start: usize,
1227        line_start: usize,
1228        strip_tabs: bool,
1229    ) -> HereDocBody {
1230        let raw_body = &self.source[body_start..line_start];
1231        let content = if strip_tabs {
1232            raw_body
1233                .lines()
1234                .map(|line| line.trim_start_matches('\t'))
1235                .collect::<Vec<_>>()
1236                .join("\n")
1237                + if raw_body.ends_with('\n') { "\n" } else { "" }
1238        } else {
1239            raw_body.to_string()
1240        };
1241        HereDocBody {
1242            content: content.into(),
1243            span: Span {
1244                start: body_start as u32,
1245                end: line_start as u32,
1246            },
1247        }
1248    }
1249
1250    fn advance_heredoc_scan(&self, line_end: usize) -> usize {
1251        if line_end < self.source.len() {
1252            line_end + 1
1253        } else {
1254            line_end
1255        }
1256    }
1257}
1258
1259/// Walk a `CompleteCommand` and assign here-doc bodies to here-doc redirections in source order.
1260fn assign_heredoc_bodies_cc(
1261    cc: &mut CompleteCommand,
1262    bodies: &mut impl Iterator<Item = HereDocBody>,
1263) {
1264    for and_or in &mut cc.list {
1265        assign_heredoc_bodies_pipeline(&mut and_or.first, bodies);
1266        for (_, pipeline) in &mut and_or.rest {
1267            assign_heredoc_bodies_pipeline(pipeline, bodies);
1268        }
1269    }
1270}
1271
1272fn assign_heredoc_bodies_pipeline(
1273    pipeline: &mut Pipeline,
1274    bodies: &mut impl Iterator<Item = HereDocBody>,
1275) {
1276    for cmd in &mut pipeline.commands {
1277        assign_heredoc_bodies_cmd(cmd, bodies);
1278    }
1279}
1280
1281fn assign_heredoc_bodies_cmd(cmd: &mut Command, bodies: &mut impl Iterator<Item = HereDocBody>) {
1282    if let Command::Simple(sc) = cmd {
1283        for redir in &mut sc.redirections {
1284            if matches!(
1285                redir.op,
1286                RedirectionOp::HereDoc | RedirectionOp::HereDocStrip
1287            ) && redir.here_doc_body.is_none()
1288            {
1289                redir.here_doc_body = bodies.next();
1290            }
1291        }
1292    }
1293}
1294
1295/// Extract the delimiter string from a here-doc target word, stripping quotes.
1296fn heredoc_delimiter(word: &Word) -> String {
1297    let mut result = String::new();
1298    for part in &word.parts {
1299        match part {
1300            WordPart::Literal(s) | WordPart::SingleQuoted(s) => result.push_str(s),
1301            WordPart::DoubleQuoted(parts) => {
1302                for p in parts {
1303                    if let WordPart::Literal(s) = p {
1304                        result.push_str(s);
1305                    }
1306                }
1307            }
1308            _ => {}
1309        }
1310    }
1311    result
1312}
1313
1314fn is_assignment_text(text: &str) -> bool {
1315    let Some(eq_pos) = text.find('=') else {
1316        return false;
1317    };
1318    let mut name = &text[..eq_pos];
1319    if name.is_empty() {
1320        return false;
1321    }
1322    // Handle += operator: strip trailing '+'
1323    if name.ends_with('+') {
1324        name = &name[..name.len() - 1];
1325        if name.is_empty() {
1326            return false;
1327        }
1328    }
1329    // Strip trailing [subscript] for array element assignment
1330    if let Some(bracket_start) = name.find('[') {
1331        if name.ends_with(']') {
1332            name = &name[..bracket_start];
1333        } else {
1334            return false;
1335        }
1336    }
1337    if name.is_empty() {
1338        return false;
1339    }
1340    let mut chars = name.chars();
1341    let first = chars.next().unwrap();
1342    if !first.is_ascii_alphabetic() && first != '_' {
1343        return false;
1344    }
1345    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1346}
1347
1348fn lex_err(e: wasmsh_lex::LexerError) -> ParseError {
1349    ParseError {
1350        message: e.message,
1351        offset: e.span.start,
1352    }
1353}
1354
1355#[cfg(test)]
1356mod tests {
1357    use super::*;
1358
1359    fn parse_ok(source: &str) -> Program {
1360        parse(source).unwrap_or_else(|e| panic!("parse failed for {source:?}: {e}"))
1361    }
1362
1363    fn first_simple(source: &str) -> SimpleCommand {
1364        let prog = parse_ok(source);
1365        let cmd = &prog.commands[0].list[0].first.commands[0];
1366        match cmd {
1367            Command::Simple(sc) => sc.clone(),
1368            other => panic!("expected simple command, got {other:?}"),
1369        }
1370    }
1371
1372    fn first_command(source: &str) -> Command {
1373        let prog = parse_ok(source);
1374        prog.commands[0].list[0].first.commands[0].clone()
1375    }
1376
1377    fn word_texts(sc: &SimpleCommand) -> Vec<&str> {
1378        sc.words
1379            .iter()
1380            .map(|w| match &w.parts[0] {
1381                WordPart::Literal(s) => s.as_str(),
1382                _ => panic!("expected literal word part"),
1383            })
1384            .collect()
1385    }
1386
1387    // ---- Simple commands (from prompt 02) ----
1388
1389    #[test]
1390    fn parse_empty_input() {
1391        let program = parse_ok("");
1392        assert!(program.commands.is_empty());
1393    }
1394
1395    #[test]
1396    fn parse_echo_hi() {
1397        let sc = first_simple("echo hi");
1398        assert_eq!(word_texts(&sc), vec!["echo", "hi"]);
1399    }
1400
1401    #[test]
1402    fn parse_assignment_prefix() {
1403        let sc = first_simple("FOO=1 BAR=2 env");
1404        assert_eq!(sc.assignments.len(), 2);
1405        assert_eq!(word_texts(&sc), vec!["env"]);
1406    }
1407
1408    #[test]
1409    fn parse_redirections() {
1410        let sc = first_simple("cat < in > out");
1411        assert_eq!(word_texts(&sc), vec!["cat"]);
1412        assert_eq!(sc.redirections.len(), 2);
1413    }
1414
1415    #[test]
1416    fn parse_pipeline() {
1417        let prog = parse_ok("a | b | c");
1418        assert_eq!(prog.commands[0].list[0].first.commands.len(), 3);
1419    }
1420
1421    #[test]
1422    fn parse_and_or_semicolons() {
1423        let prog = parse_ok("false && echo x; true || echo y");
1424        assert_eq!(prog.commands[0].list.len(), 2);
1425    }
1426
1427    #[test]
1428    fn parse_spans_preserved() {
1429        let sc = first_simple("echo hello");
1430        assert_eq!(sc.span, Span { start: 0, end: 10 });
1431    }
1432
1433    // ---- Subshell ----
1434
1435    #[test]
1436    fn parse_subshell() {
1437        let cmd = first_command("(echo hi)");
1438        let Command::Subshell(sub) = cmd else {
1439            panic!("expected subshell");
1440        };
1441        assert_eq!(sub.body.len(), 1);
1442    }
1443
1444    #[test]
1445    fn parse_subshell_with_semicolons() {
1446        let cmd = first_command("(echo a; echo b)");
1447        let Command::Subshell(sub) = cmd else {
1448            panic!("expected subshell");
1449        };
1450        // one complete command with two and_or entries
1451        assert_eq!(sub.body[0].list.len(), 2);
1452    }
1453
1454    // ---- Group ----
1455
1456    #[test]
1457    fn parse_group() {
1458        let cmd = first_command("{ echo hi; }");
1459        let Command::Group(grp) = cmd else {
1460            panic!("expected group");
1461        };
1462        assert_eq!(grp.body.len(), 1);
1463    }
1464
1465    // ---- If ----
1466
1467    #[test]
1468    fn parse_if_then_fi() {
1469        let cmd = first_command("if true; then echo yes; fi");
1470        let Command::If(if_cmd) = cmd else {
1471            panic!("expected if");
1472        };
1473        assert_eq!(if_cmd.condition.len(), 1);
1474        assert_eq!(if_cmd.then_body.len(), 1);
1475        assert!(if_cmd.elifs.is_empty());
1476        assert!(if_cmd.else_body.is_none());
1477    }
1478
1479    #[test]
1480    fn parse_if_else() {
1481        let cmd = first_command("if true; then echo yes; else echo no; fi");
1482        let Command::If(if_cmd) = cmd else {
1483            panic!("expected if");
1484        };
1485        assert!(if_cmd.else_body.is_some());
1486    }
1487
1488    #[test]
1489    fn parse_if_elif_else() {
1490        let cmd = first_command("if a; then b; elif c; then d; elif e; then f; else g; fi");
1491        let Command::If(if_cmd) = cmd else {
1492            panic!("expected if");
1493        };
1494        assert_eq!(if_cmd.elifs.len(), 2);
1495        assert!(if_cmd.else_body.is_some());
1496    }
1497
1498    // ---- While ----
1499
1500    #[test]
1501    fn parse_while() {
1502        let cmd = first_command("while true; do echo loop; done");
1503        let Command::While(w) = cmd else {
1504            panic!("expected while");
1505        };
1506        assert_eq!(w.condition.len(), 1);
1507        assert_eq!(w.body.len(), 1);
1508    }
1509
1510    // ---- Until ----
1511
1512    #[test]
1513    fn parse_until() {
1514        let cmd = first_command("until false; do echo loop; done");
1515        let Command::Until(u) = cmd else {
1516            panic!("expected until");
1517        };
1518        assert_eq!(u.condition.len(), 1);
1519        assert_eq!(u.body.len(), 1);
1520    }
1521
1522    // ---- For ----
1523
1524    #[test]
1525    fn parse_for_in() {
1526        let cmd = first_command("for x in a b c; do echo x; done");
1527        let Command::For(f) = cmd else {
1528            panic!("expected for");
1529        };
1530        assert_eq!(f.var_name.as_str(), "x");
1531        let words = f.words.as_ref().unwrap();
1532        assert_eq!(words.len(), 3);
1533        assert_eq!(f.body.len(), 1);
1534    }
1535
1536    #[test]
1537    fn parse_for_no_in() {
1538        let cmd = first_command("for x; do echo x; done");
1539        let Command::For(f) = cmd else {
1540            panic!("expected for");
1541        };
1542        assert!(f.words.is_none());
1543    }
1544
1545    #[test]
1546    fn parse_for_newline_before_do() {
1547        let cmd = first_command("for x in a b c\ndo\necho x\ndone");
1548        let Command::For(f) = cmd else {
1549            panic!("expected for");
1550        };
1551        assert_eq!(f.words.as_ref().unwrap().len(), 3);
1552    }
1553
1554    // ---- Case ----
1555
1556    #[test]
1557    fn parse_case() {
1558        let cmd = first_command("case x in\na) echo a;;\nb) echo b;;\nesac");
1559        let Command::Case(c) = cmd else {
1560            panic!("expected case");
1561        };
1562        assert_eq!(c.items.len(), 2);
1563    }
1564
1565    #[test]
1566    fn parse_case_wildcard() {
1567        let cmd = first_command("case x in\n*) echo default;;\nesac");
1568        let Command::Case(c) = cmd else {
1569            panic!("expected case");
1570        };
1571        assert_eq!(c.items.len(), 1);
1572    }
1573
1574    // ---- Function definitions ----
1575
1576    #[test]
1577    fn parse_function_posix() {
1578        let cmd = first_command("greet() { echo hello; }");
1579        let Command::FunctionDef(fd) = cmd else {
1580            panic!("expected function def");
1581        };
1582        assert_eq!(fd.name.as_str(), "greet");
1583        assert!(matches!(*fd.body, Command::Group(_)));
1584    }
1585
1586    #[test]
1587    fn parse_function_bash() {
1588        let cmd = first_command("function greet { echo hello; }");
1589        let Command::FunctionDef(fd) = cmd else {
1590            panic!("expected function def");
1591        };
1592        assert_eq!(fd.name.as_str(), "greet");
1593    }
1594
1595    #[test]
1596    fn parse_function_bash_with_parens() {
1597        let cmd = first_command("function greet() { echo hello; }");
1598        let Command::FunctionDef(fd) = cmd else {
1599            panic!("expected function def");
1600        };
1601        assert_eq!(fd.name.as_str(), "greet");
1602    }
1603
1604    // ---- Reserved words are context-sensitive ----
1605
1606    #[test]
1607    fn reserved_word_as_argument() {
1608        // `if` as an argument, not as a keyword
1609        let sc = first_simple("echo if then else");
1610        assert_eq!(word_texts(&sc), vec!["echo", "if", "then", "else"]);
1611    }
1612
1613    // ---- Here-docs ----
1614
1615    #[test]
1616    fn parse_heredoc() {
1617        let source = "cat <<EOF\nhello world\nEOF\n";
1618        let sc = first_simple(source);
1619        assert_eq!(sc.redirections.len(), 1);
1620        assert_eq!(sc.redirections[0].op, RedirectionOp::HereDoc);
1621        let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1622        assert_eq!(body.content.as_str(), "hello world\n");
1623    }
1624
1625    #[test]
1626    fn parse_heredoc_multiline() {
1627        let source = "cat <<EOF\nline1\nline2\nline3\nEOF\n";
1628        let sc = first_simple(source);
1629        let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1630        assert_eq!(body.content.as_str(), "line1\nline2\nline3\n");
1631    }
1632
1633    #[test]
1634    fn parse_heredoc_strip_tabs() {
1635        let source = "cat <<-EOF\n\thello\n\tworld\n\tEOF\n";
1636        let sc = first_simple(source);
1637        assert_eq!(sc.redirections[0].op, RedirectionOp::HereDocStrip);
1638        let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1639        assert_eq!(body.content.as_str(), "hello\nworld\n");
1640    }
1641
1642    #[test]
1643    fn parse_multiple_heredocs() {
1644        let source = "cmd <<A <<B\nbody_a\nA\nbody_b\nB\n";
1645        let sc = first_simple(source);
1646        assert_eq!(sc.redirections.len(), 2);
1647        let body_a = sc.redirections[0].here_doc_body.as_ref().unwrap();
1648        assert_eq!(body_a.content.as_str(), "body_a\n");
1649        let body_b = sc.redirections[1].here_doc_body.as_ref().unwrap();
1650        assert_eq!(body_b.content.as_str(), "body_b\n");
1651    }
1652
1653    #[test]
1654    fn parse_heredoc_in_pipeline() {
1655        let source = "cat <<EOF | wc -l\nhello\nEOF\n";
1656        let prog = parse_ok(source);
1657        let pipeline = &prog.commands[0].list[0].first;
1658        assert_eq!(pipeline.commands.len(), 2);
1659        if let Command::Simple(sc) = &pipeline.commands[0] {
1660            let body = sc.redirections[0].here_doc_body.as_ref().unwrap();
1661            assert_eq!(body.content.as_str(), "hello\n");
1662        } else {
1663            panic!("expected simple command");
1664        }
1665    }
1666
1667    #[test]
1668    fn parse_heredoc_followed_by_command() {
1669        let source = "cat <<EOF\nhello\nEOF\necho done";
1670        let prog = parse_ok(source);
1671        assert_eq!(prog.commands.len(), 2);
1672    }
1673
1674    #[test]
1675    fn parse_error_unterminated_heredoc() {
1676        assert!(parse("cat <<EOF\nhello\n").is_err());
1677    }
1678
1679    // ---- Error cases ----
1680
1681    #[test]
1682    fn parse_error_on_lone_pipe() {
1683        assert!(parse("|").is_err());
1684    }
1685
1686    #[test]
1687    fn parse_error_on_lone_semicolon() {
1688        assert!(parse(";").is_err());
1689    }
1690
1691    #[test]
1692    fn parse_error_redirect_no_target() {
1693        assert!(parse("echo >").is_err());
1694    }
1695
1696    #[test]
1697    fn parse_error_unclosed_if() {
1698        assert!(parse("if true; then echo x").is_err());
1699    }
1700
1701    #[test]
1702    fn parse_error_unclosed_group() {
1703        assert!(parse("{ echo x").is_err());
1704    }
1705
1706    #[test]
1707    fn parse_error_unclosed_subshell() {
1708        assert!(parse("(echo x").is_err());
1709    }
1710
1711    // ---- Here-string ----
1712
1713    #[test]
1714    fn parse_here_string() {
1715        let sc = first_simple("cat <<< hello");
1716        assert_eq!(word_texts(&sc), vec!["cat"]);
1717        assert_eq!(sc.redirections.len(), 1);
1718        assert_eq!(sc.redirections[0].op, RedirectionOp::HereString);
1719    }
1720
1721    // ---- Fd-prefix redirection ----
1722
1723    #[test]
1724    fn parse_stderr_redirect() {
1725        let sc = first_simple("cmd 2> file");
1726        assert_eq!(sc.redirections.len(), 1);
1727        assert_eq!(sc.redirections[0].fd, Some(2));
1728        assert_eq!(sc.redirections[0].op, RedirectionOp::Output);
1729    }
1730
1731    #[test]
1732    fn parse_fd_dup_output() {
1733        let sc = first_simple("cmd 2>&1");
1734        assert_eq!(sc.redirections.len(), 1);
1735        assert_eq!(sc.redirections[0].fd, Some(2));
1736        assert_eq!(sc.redirections[0].op, RedirectionOp::DupOutput);
1737    }
1738
1739    #[test]
1740    fn parse_amp_greater() {
1741        let sc = first_simple("cmd &> file");
1742        assert_eq!(sc.redirections.len(), 1);
1743        // &> is encoded as fd=MAX, op=Output
1744        assert_eq!(sc.redirections[0].fd, Some(u32::MAX));
1745        assert_eq!(sc.redirections[0].op, RedirectionOp::Output);
1746    }
1747
1748    // ---- Double bracket ----
1749
1750    #[test]
1751    fn parse_double_bracket_basic() {
1752        let cmd = first_command("[[ hello == world ]]");
1753        let Command::DoubleBracket(db) = cmd else {
1754            panic!("expected DoubleBracket, got {cmd:?}");
1755        };
1756        assert_eq!(db.words.len(), 3);
1757    }
1758
1759    #[test]
1760    fn parse_double_bracket_with_var() {
1761        let cmd = first_command("[[ $x == hello ]]");
1762        let Command::DoubleBracket(db) = cmd else {
1763            panic!("expected DoubleBracket");
1764        };
1765        assert_eq!(db.words.len(), 3);
1766    }
1767
1768    #[test]
1769    fn parse_double_bracket_logical_ops() {
1770        // && and || inside [[ ]] should be captured as expression tokens
1771        let cmd = first_command("[[ a == a && b == b ]]");
1772        let Command::DoubleBracket(db) = cmd else {
1773            panic!("expected DoubleBracket");
1774        };
1775        // a, ==, a, &&, b, ==, b
1776        assert_eq!(db.words.len(), 7);
1777    }
1778
1779    #[test]
1780    fn parse_double_bracket_in_if() {
1781        let prog = parse_ok("if [[ x == x ]]; then echo yes; fi");
1782        let cmd = &prog.commands[0].list[0].first.commands[0];
1783        assert!(matches!(cmd, Command::If(_)));
1784    }
1785
1786    #[test]
1787    fn parse_double_bracket_in_pipeline() {
1788        let prog = parse_ok("[[ x == x ]] && echo yes");
1789        assert!(!prog.commands.is_empty());
1790    }
1791
1792    // ---- Arithmetic command (( )) ----
1793
1794    #[test]
1795    fn parse_arith_command_basic() {
1796        let cmd = first_command("((1+2))");
1797        let Command::ArithCommand(ac) = cmd else {
1798            panic!("expected ArithCommand, got {cmd:?}");
1799        };
1800        assert_eq!(ac.expr.as_str(), "1+2");
1801    }
1802
1803    #[test]
1804    fn parse_arith_command_with_spaces() {
1805        let cmd = first_command("(( x = 1 + 2 ))");
1806        let Command::ArithCommand(ac) = cmd else {
1807            panic!("expected ArithCommand, got {cmd:?}");
1808        };
1809        assert_eq!(ac.expr.as_str(), "x = 1 + 2");
1810    }
1811
1812    #[test]
1813    fn parse_arith_command_with_parens() {
1814        let cmd = first_command("(( (1+2) * 3 ))");
1815        let Command::ArithCommand(ac) = cmd else {
1816            panic!("expected ArithCommand, got {cmd:?}");
1817        };
1818        assert_eq!(ac.expr.as_str(), "(1+2) * 3");
1819    }
1820
1821    #[test]
1822    fn parse_arith_command_in_if() {
1823        let prog = parse_ok("if (( x > 0 )); then echo yes; fi");
1824        let cmd = &prog.commands[0].list[0].first.commands[0];
1825        assert!(matches!(cmd, Command::If(_)));
1826    }
1827
1828    #[test]
1829    fn parse_arith_command_in_and_or() {
1830        let prog = parse_ok("(( x > 0 )) && echo yes");
1831        assert!(!prog.commands.is_empty());
1832    }
1833
1834    // ---- C-style for (( )) ----
1835
1836    #[test]
1837    fn parse_arith_for_basic() {
1838        let cmd = first_command("for ((i=0; i<10; i++)) do echo $i; done");
1839        let Command::ArithFor(af) = cmd else {
1840            panic!("expected ArithFor, got {cmd:?}");
1841        };
1842        assert_eq!(af.init.as_str(), "i=0");
1843        assert_eq!(af.cond.as_str(), "i<10");
1844        assert_eq!(af.step.as_str(), "i++");
1845        assert_eq!(af.body.len(), 1);
1846    }
1847
1848    #[test]
1849    fn parse_arith_for_with_spaces() {
1850        let cmd = first_command("for (( i = 0; i < 5; i++ )) do echo $i; done");
1851        let Command::ArithFor(af) = cmd else {
1852            panic!("expected ArithFor, got {cmd:?}");
1853        };
1854        assert_eq!(af.init.as_str(), "i = 0");
1855        assert_eq!(af.cond.as_str(), "i < 5");
1856        assert_eq!(af.step.as_str(), "i++");
1857    }
1858
1859    #[test]
1860    fn parse_arith_for_with_semicolon_before_do() {
1861        let cmd = first_command("for ((i=0; i<3; i++)); do echo $i; done");
1862        let Command::ArithFor(af) = cmd else {
1863            panic!("expected ArithFor, got {cmd:?}");
1864        };
1865        assert_eq!(af.init.as_str(), "i=0");
1866        assert_eq!(af.cond.as_str(), "i<3");
1867        assert_eq!(af.step.as_str(), "i++");
1868    }
1869
1870    #[test]
1871    fn parse_arith_for_newline_before_do() {
1872        let cmd = first_command("for ((i=0; i<3; i++))\ndo\necho $i\ndone");
1873        let Command::ArithFor(af) = cmd else {
1874            panic!("expected ArithFor, got {cmd:?}");
1875        };
1876        assert_eq!(af.init.as_str(), "i=0");
1877    }
1878
1879    #[test]
1880    fn parse_subshell_not_confused_with_arith() {
1881        // A subshell ( echo hi ) should not be confused with (( ))
1882        let cmd = first_command("(echo hi)");
1883        assert!(matches!(cmd, Command::Subshell(_)));
1884    }
1885}