kaish_kernel/
parser.rs

1//! Parser for kaish source code.
2//!
3//! Transforms a token stream from the lexer into an Abstract Syntax Tree.
4//! Uses chumsky for parser combinators with good error recovery.
5
6use crate::ast::{
7    Arg, Assignment, BinaryOp, CaseBranch, CaseStmt, Command, Expr, FileTestOp, ForLoop, IfStmt,
8    Pipeline, Program, Redirect, RedirectKind, Stmt, StringPart, StringTestOp, TestCmpOp, TestExpr,
9    ToolDef, Value, VarPath, VarSegment, WhileLoop,
10};
11use crate::lexer::{self, HereDocData, Token};
12use chumsky::{input::ValueInput, prelude::*};
13
14/// Span type used throughout the parser.
15pub type Span = SimpleSpan;
16
17/// Parse a raw `${...}` string into an Expr.
18///
19/// Handles:
20/// - Special variables: `${?}` → LastExitCode, `${$}` → CurrentPid
21/// - Simple paths: `${VAR}`, `${VAR.field}`, `${VAR[0]}`, `${?.ok}` → VarRef
22/// - Default values: `${VAR:-default}` → VarWithDefault (with nested expansion support)
23fn parse_var_expr(raw: &str) -> Expr {
24    // Special case: ${?} is the last exit code (same as $?)
25    if raw == "${?}" {
26        return Expr::LastExitCode;
27    }
28
29    // Special case: ${$} is the current PID (same as $$)
30    if raw == "${$}" {
31        return Expr::CurrentPid;
32    }
33
34    // Check for default value syntax: ${VAR:-default}
35    // Need to find :- that's not inside a nested ${...}
36    if let Some(colon_idx) = find_default_separator(raw) {
37        // Extract variable name (between ${ and :-)
38        let name = raw[2..colon_idx].to_string();
39        // Extract default value (between :- and }) and recursively parse it
40        let default_str = &raw[colon_idx + 2..raw.len() - 1];
41        let default = parse_interpolated_string(default_str);
42        return Expr::VarWithDefault { name, default };
43    }
44
45    // Regular variable path
46    Expr::VarRef(parse_varpath(raw))
47}
48
49/// Find the position of :- in a ${VAR:-default} expression, accounting for nested ${...}.
50fn find_default_separator(raw: &str) -> Option<usize> {
51    let bytes = raw.as_bytes();
52    let mut depth = 0;
53    let mut i = 0;
54
55    while i < bytes.len() {
56        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
57            depth += 1;
58            i += 2;
59            continue;
60        }
61        if bytes[i] == b'}' && depth > 0 {
62            depth -= 1;
63            i += 1;
64            continue;
65        }
66        // Only find :- at the top level (depth == 1 means we're inside the outer ${...})
67        if depth == 1 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
68            return Some(i);
69        }
70        i += 1;
71    }
72    None
73}
74
75/// Find the position of :- in variable content (without outer braces), accounting for nested ${...}.
76fn find_default_separator_in_content(content: &str) -> Option<usize> {
77    let bytes = content.as_bytes();
78    let mut depth = 0;
79    let mut i = 0;
80
81    while i < bytes.len() {
82        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
83            depth += 1;
84            i += 2;
85            continue;
86        }
87        if bytes[i] == b'}' && depth > 0 {
88            depth -= 1;
89            i += 1;
90            continue;
91        }
92        // Find :- at the top level (depth == 0)
93        if depth == 0 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
94            return Some(i);
95        }
96        i += 1;
97    }
98    None
99}
100
101/// Parse a raw `${...}` string into a VarPath.
102///
103/// Handles paths like `${VAR}`, `${?.ok}`. Array indexing is not supported.
104fn parse_varpath(raw: &str) -> VarPath {
105    let segments_strs = lexer::parse_var_ref(raw).unwrap_or_default();
106    let segments = segments_strs
107        .into_iter()
108        .filter(|s| !s.starts_with('['))  // Skip index segments
109        .map(VarSegment::Field)
110        .collect();
111    VarPath { segments }
112}
113
114/// Parse an interpolated string like "Hello ${NAME}!" or "Hello $NAME!" into parts.
115/// Extract a pipeline from a statement if possible.
116fn stmt_to_pipeline(stmt: Stmt) -> Option<Pipeline> {
117    match stmt {
118        Stmt::Pipeline(p) => Some(p),
119        Stmt::Command(cmd) => Some(Pipeline {
120            commands: vec![cmd],
121            background: false,
122        }),
123        _ => None,
124    }
125}
126
127fn parse_interpolated_string(s: &str) -> Vec<StringPart> {
128    // First, replace escaped dollar markers with a temporary placeholder
129    // The lexer uses __KAISH_ESCAPED_DOLLAR__ for \$ to prevent re-interpretation
130    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
131
132    let mut parts = Vec::new();
133    let mut current_text = String::new();
134    let mut chars = s.chars().peekable();
135
136    while let Some(ch) = chars.next() {
137        if ch == '\x00' {
138            // This is our escaped dollar marker - skip "DOLLAR" and the closing \x00
139            let mut marker = String::new();
140            while let Some(&c) = chars.peek() {
141                if c == '\x00' {
142                    chars.next(); // consume closing marker
143                    break;
144                }
145                if let Some(c) = chars.next() {
146                    marker.push(c);
147                }
148            }
149            if marker == "DOLLAR" {
150                current_text.push('$');
151            }
152        } else if ch == '$' {
153            // Check for command substitution $(...)
154            if chars.peek() == Some(&'(') {
155                // Command substitution $(...)
156                if !current_text.is_empty() {
157                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
158                }
159
160                // Consume the '('
161                chars.next();
162
163                // Collect until matching ')' accounting for nested parens
164                let mut cmd_content = String::new();
165                let mut paren_depth = 1;
166                for c in chars.by_ref() {
167                    if c == '(' {
168                        paren_depth += 1;
169                        cmd_content.push(c);
170                    } else if c == ')' {
171                        paren_depth -= 1;
172                        if paren_depth == 0 {
173                            break;
174                        }
175                        cmd_content.push(c);
176                    } else {
177                        cmd_content.push(c);
178                    }
179                }
180
181                // Parse the command content as a pipeline
182                // We need to use the main parser for this
183                if let Ok(program) = parse(&cmd_content) {
184                    // Extract the pipeline from the parsed result
185                    if let Some(stmt) = program.statements.first() {
186                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
187                            parts.push(StringPart::CommandSubst(pipeline));
188                        } else {
189                            // If we can't extract a pipeline, treat as literal
190                            current_text.push_str("$(");
191                            current_text.push_str(&cmd_content);
192                            current_text.push(')');
193                        }
194                    }
195                } else {
196                    // Parse failed - treat as literal
197                    current_text.push_str("$(");
198                    current_text.push_str(&cmd_content);
199                    current_text.push(')');
200                }
201            } else if chars.peek() == Some(&'{') {
202                // Braced variable reference ${...}
203                if !current_text.is_empty() {
204                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
205                }
206
207                // Consume the '{'
208                chars.next();
209
210                // Collect until matching '}', tracking nesting depth
211                let mut var_content = String::new();
212                let mut depth = 1;
213                for c in chars.by_ref() {
214                    if c == '{' && var_content.ends_with('$') {
215                        depth += 1;
216                        var_content.push(c);
217                    } else if c == '}' {
218                        depth -= 1;
219                        if depth == 0 {
220                            break;
221                        }
222                        var_content.push(c);
223                    } else {
224                        var_content.push(c);
225                    }
226                }
227
228                // Parse the content for special syntax
229                let part = if let Some(name) = var_content.strip_prefix('#') {
230                    // Variable length: ${#VAR}
231                    StringPart::VarLength(name.to_string())
232                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
233                    // Arithmetic expression: ${__ARITH:expr__}
234                    let expr = var_content
235                        .strip_prefix("__ARITH:")
236                        .and_then(|s| s.strip_suffix("__"))
237                        .unwrap_or("");
238                    StringPart::Arithmetic(expr.to_string())
239                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
240                    // Variable with default: ${VAR:-default} - recursively parse the default
241                    let name = var_content[..colon_idx].to_string();
242                    let default_str = &var_content[colon_idx + 2..];
243                    let default = parse_interpolated_string(default_str);
244                    StringPart::VarWithDefault { name, default }
245                } else {
246                    // Regular variable: ${VAR} or ${VAR.field}
247                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
248                };
249                parts.push(part);
250            } else if chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
251                // Positional parameter $0-$9
252                if !current_text.is_empty() {
253                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
254                }
255                if let Some(digit) = chars.next() {
256                    let n = digit.to_digit(10).unwrap_or(0) as usize;
257                    parts.push(StringPart::Positional(n));
258                }
259            } else if chars.peek() == Some(&'@') {
260                // All arguments $@
261                if !current_text.is_empty() {
262                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
263                }
264                chars.next(); // consume '@'
265                parts.push(StringPart::AllArgs);
266            } else if chars.peek() == Some(&'#') {
267                // Argument count $#
268                if !current_text.is_empty() {
269                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
270                }
271                chars.next(); // consume '#'
272                parts.push(StringPart::ArgCount);
273            } else if chars.peek() == Some(&'?') {
274                // Last exit code $?
275                if !current_text.is_empty() {
276                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
277                }
278                chars.next(); // consume '?'
279                parts.push(StringPart::LastExitCode);
280            } else if chars.peek() == Some(&'$') {
281                // Current PID $$
282                if !current_text.is_empty() {
283                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
284                }
285                chars.next(); // consume second '$'
286                parts.push(StringPart::CurrentPid);
287            } else if chars.peek().map(|c| c.is_ascii_alphabetic() || *c == '_').unwrap_or(false) {
288                // Simple variable reference $NAME
289                if !current_text.is_empty() {
290                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
291                }
292
293                // Collect identifier characters
294                let mut var_name = String::new();
295                while let Some(&c) = chars.peek() {
296                    if c.is_ascii_alphanumeric() || c == '_' {
297                        if let Some(c) = chars.next() {
298                            var_name.push(c);
299                        }
300                    } else {
301                        break;
302                    }
303                }
304
305                parts.push(StringPart::Var(VarPath::simple(var_name)));
306            } else {
307                // Literal $ (not followed by { or identifier start)
308                current_text.push(ch);
309            }
310        } else {
311            current_text.push(ch);
312        }
313    }
314
315    if !current_text.is_empty() {
316        parts.push(StringPart::Literal(current_text));
317    }
318
319    parts
320}
321
322/// Parse error with location and context.
323#[derive(Debug, Clone)]
324pub struct ParseError {
325    pub span: Span,
326    pub message: String,
327}
328
329impl std::fmt::Display for ParseError {
330    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331        write!(f, "{} at {:?}", self.message, self.span)
332    }
333}
334
335impl std::error::Error for ParseError {}
336
337/// Parse kaish source code into a Program AST.
338pub fn parse(source: &str) -> Result<Program, Vec<ParseError>> {
339    // Tokenize with logos
340    let tokens = lexer::tokenize(source).map_err(|errs| {
341        errs.into_iter()
342            .map(|e| ParseError {
343                span: (e.span.start..e.span.end).into(),
344                message: format!("lexer error: {}", e.token),
345            })
346            .collect::<Vec<_>>()
347    })?;
348
349    // Convert tokens to (Token, SimpleSpan) pairs
350    let tokens: Vec<(Token, Span)> = tokens
351        .into_iter()
352        .map(|spanned| (spanned.token, (spanned.span.start..spanned.span.end).into()))
353        .collect();
354
355    // End-of-input span
356    let end_span: Span = (source.len()..source.len()).into();
357
358    // Parse using slice-based input (like nano_rust example)
359    let parser = program_parser();
360    let result = parser.parse(tokens.as_slice().map(end_span, |(t, s)| (t, s)));
361
362    result.into_result().map_err(|errs| {
363        errs.into_iter()
364            .map(|e| ParseError {
365                span: *e.span(),
366                message: e.to_string(),
367            })
368            .collect()
369    })
370}
371
372/// Parse a single statement (useful for REPL).
373pub fn parse_statement(source: &str) -> Result<Stmt, Vec<ParseError>> {
374    let program = parse(source)?;
375    program
376        .statements
377        .into_iter()
378        .find(|s| !matches!(s, Stmt::Empty))
379        .ok_or_else(|| {
380            vec![ParseError {
381                span: (0..source.len()).into(),
382                message: "empty input".to_string(),
383            }]
384        })
385}
386
387// ═══════════════════════════════════════════════════════════════════════════
388// Parser Combinators - generic over input type
389// ═══════════════════════════════════════════════════════════════════════════
390
391/// Top-level program parser.
392fn program_parser<'tokens, 'src: 'tokens, I>(
393) -> impl Parser<'tokens, I, Program, extra::Err<Rich<'tokens, Token, Span>>>
394where
395    I: ValueInput<'tokens, Token = Token, Span = Span>,
396{
397    statement_parser()
398        .repeated()
399        .collect::<Vec<_>>()
400        .map(|statements| Program { statements })
401}
402
403/// Statement parser - dispatches based on leading token.
404/// Supports statement-level chaining with && and ||.
405fn statement_parser<'tokens, I>(
406) -> impl Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
407where
408    I: ValueInput<'tokens, Token = Token, Span = Span>,
409{
410    recursive(|stmt| {
411        let terminator = choice((just(Token::Newline), just(Token::Semi))).repeated();
412
413        // break [N] - break out of N levels of loops (default 1)
414        let break_stmt = just(Token::Break)
415            .ignore_then(
416                select! { Token::Int(n) => n as usize }.or_not()
417            )
418            .map(Stmt::Break);
419
420        // continue [N] - continue to next iteration, skipping N levels (default 1)
421        let continue_stmt = just(Token::Continue)
422            .ignore_then(
423                select! { Token::Int(n) => n as usize }.or_not()
424            )
425            .map(Stmt::Continue);
426
427        // return [expr] - return from a tool
428        let return_stmt = just(Token::Return)
429            .ignore_then(primary_expr_parser().or_not())
430            .map(|e| Stmt::Return(e.map(Box::new)));
431
432        // exit [code] - exit the script
433        let exit_stmt = just(Token::Exit)
434            .ignore_then(primary_expr_parser().or_not())
435            .map(|e| Stmt::Exit(e.map(Box::new)));
436
437        // set command: `set -e`, `set +e`, `set` (no args), `set -o pipefail`
438        // This must come BEFORE assignment_parser to handle `set -e` vs `X=value`
439        //
440        // Strategy: Use lookahead to check what follows `set`:
441        // - If followed by a flag (-e, --long, +e): parse as set command
442        // - If followed by identifier NOT followed by =: parse as set command (e.g., `set pipefail`)
443        // - If followed by nothing (end/newline/semi): parse as set command
444        // - If followed by identifier then =: let assignment_parser handle it
445        let set_flag_arg = choice((
446            select! { Token::ShortFlag(f) => Arg::ShortFlag(f) },
447            select! { Token::LongFlag(f) => Arg::LongFlag(f) },
448            // PlusFlag for +e, +x etc. - convert to positional arg with + prefix
449            select! { Token::PlusFlag(f) => Arg::Positional(Expr::Literal(Value::String(format!("+{}", f)))) },
450        ));
451
452        // set with flags: `set -e`, `set -e -u -o pipefail`
453        let set_with_flags = just(Token::Set)
454            .then(set_flag_arg)
455            .then(
456                choice((
457                    set_flag_arg,
458                    // Identifiers like 'pipefail' after -o
459                    ident_parser().map(|name| Arg::Positional(Expr::Literal(Value::String(name)))),
460                ))
461                .repeated()
462                .collect::<Vec<_>>(),
463            )
464            .map(|((_, first_arg), mut rest_args)| {
465                let mut args = vec![first_arg];
466                args.append(&mut rest_args);
467                Stmt::Command(Command {
468                    name: "set".to_string(),
469                    args,
470                    redirects: vec![],
471                })
472            });
473
474        // set with no args: `set` alone (shows settings)
475        // Must be followed by newline, semicolon, end of input, or a chaining operator (&&, ||)
476        let set_no_args = just(Token::Set)
477            .then(
478                choice((
479                    just(Token::Newline).to(()),
480                    just(Token::Semi).to(()),
481                    just(Token::And).to(()),
482                    just(Token::Or).to(()),
483                    end(),
484                ))
485                .rewind(),
486            )
487            .map(|_| Stmt::Command(Command {
488                name: "set".to_string(),
489                args: vec![],
490                redirects: vec![],
491            }));
492
493        // Try set_with_flags first (requires at least one flag)
494        // Then try set_no_args (no args, followed by terminator)
495        // If neither matches, fall through to assignment_parser
496        let set_command = set_with_flags.or(set_no_args);
497
498        // Base statement (without chaining)
499        let base_statement = choice((
500            just(Token::Newline).to(Stmt::Empty),
501            set_command,
502            assignment_parser().map(Stmt::Assignment),
503            // Shell-style functions (use $1, $2 positional params)
504            posix_function_parser(stmt.clone()).map(Stmt::ToolDef),  // name() { }
505            bash_function_parser(stmt.clone()).map(Stmt::ToolDef),   // function name { }
506            if_parser(stmt.clone()).map(Stmt::If),
507            for_parser(stmt.clone()).map(Stmt::For),
508            while_parser(stmt.clone()).map(Stmt::While),
509            case_parser(stmt.clone()).map(Stmt::Case),
510            break_stmt,
511            continue_stmt,
512            return_stmt,
513            exit_stmt,
514            test_expr_stmt_parser().map(Stmt::Test),
515            // Note: 'true' and 'false' are handled by command_parser/pipeline_parser
516            pipeline_parser().map(|p| {
517                // Unwrap single-command pipelines without background and without redirects
518                if p.commands.len() == 1 && !p.background {
519                    // Only unwrap if no redirects - redirects require pipeline processing
520                    if p.commands[0].redirects.is_empty() {
521                        // Safe: we just checked len == 1
522                        match p.commands.into_iter().next() {
523                            Some(cmd) => Stmt::Command(cmd),
524                            None => Stmt::Empty, // unreachable but safe
525                        }
526                    } else {
527                        Stmt::Pipeline(p)
528                    }
529                } else {
530                    Stmt::Pipeline(p)
531                }
532            }),
533        ))
534        .boxed();
535
536        // Statement chaining with precedence: && binds tighter than ||
537        // and_chain = base_stmt { "&&" base_stmt }
538        // or_chain  = and_chain { "||" and_chain }
539        let and_chain = base_statement
540            .clone()
541            .foldl(
542                just(Token::And).ignore_then(base_statement).repeated(),
543                |left, right| Stmt::AndChain {
544                    left: Box::new(left),
545                    right: Box::new(right),
546                },
547            );
548
549        and_chain
550            .clone()
551            .foldl(
552                just(Token::Or).ignore_then(and_chain).repeated(),
553                |left, right| Stmt::OrChain {
554                    left: Box::new(left),
555                    right: Box::new(right),
556                },
557            )
558            .then_ignore(terminator)
559    })
560}
561
562/// Assignment: `NAME=value` (bash-style) or `local NAME = value` (scoped)
563fn assignment_parser<'tokens, I>(
564) -> impl Parser<'tokens, I, Assignment, extra::Err<Rich<'tokens, Token, Span>>> + Clone
565where
566    I: ValueInput<'tokens, Token = Token, Span = Span>,
567{
568    // local NAME = value (with spaces around =)
569    let local_assignment = just(Token::Local)
570        .ignore_then(ident_parser())
571        .then_ignore(just(Token::Eq))
572        .then(expr_parser())
573        .map(|(name, value)| Assignment {
574            name,
575            value,
576            local: true,
577        });
578
579    // Bash-style: NAME=value (no spaces around =)
580    // The lexer produces IDENT EQ EXPR, so we parse it here
581    let bash_assignment = ident_parser()
582        .then_ignore(just(Token::Eq))
583        .then(expr_parser())
584        .map(|(name, value)| Assignment {
585            name,
586            value,
587            local: false,
588        });
589
590    choice((local_assignment, bash_assignment))
591        .labelled("assignment")
592        .boxed()
593}
594
595/// POSIX-style function: `name() { body }`
596///
597/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
598fn posix_function_parser<'tokens, I, S>(
599    stmt: S,
600) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
601where
602    I: ValueInput<'tokens, Token = Token, Span = Span>,
603    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
604{
605    ident_parser()
606        .then_ignore(just(Token::LParen))
607        .then_ignore(just(Token::RParen))
608        .then_ignore(just(Token::LBrace))
609        .then_ignore(just(Token::Newline).repeated())
610        .then(
611            stmt.repeated()
612                .collect::<Vec<_>>()
613                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
614        )
615        .then_ignore(just(Token::Newline).repeated())
616        .then_ignore(just(Token::RBrace))
617        .map(|(name, body)| ToolDef { name, params: vec![], body })
618        .labelled("POSIX function")
619        .boxed()
620}
621
622/// Bash-style function: `function name { body }` (without parens)
623///
624/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
625fn bash_function_parser<'tokens, I, S>(
626    stmt: S,
627) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
628where
629    I: ValueInput<'tokens, Token = Token, Span = Span>,
630    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
631{
632    just(Token::Function)
633        .ignore_then(ident_parser())
634        .then_ignore(just(Token::LBrace))
635        .then_ignore(just(Token::Newline).repeated())
636        .then(
637            stmt.repeated()
638                .collect::<Vec<_>>()
639                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
640        )
641        .then_ignore(just(Token::Newline).repeated())
642        .then_ignore(just(Token::RBrace))
643        .map(|(name, body)| ToolDef { name, params: vec![], body })
644        .labelled("bash function")
645        .boxed()
646}
647
648/// If statement: `if COND; then STMTS [elif COND; then STMTS]* [else STMTS] fi`
649///
650/// elif clauses are desugared to nested if/else:
651///   `if A; then X elif B; then Y else Z fi`
652/// becomes:
653///   `if A; then X else { if B; then Y else Z fi } fi`
654fn if_parser<'tokens, I, S>(
655    stmt: S,
656) -> impl Parser<'tokens, I, IfStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
657where
658    I: ValueInput<'tokens, Token = Token, Span = Span>,
659    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
660{
661    // Parse a single branch: condition + then statements
662    let branch = condition_parser()
663        .then_ignore(just(Token::Semi).or_not())
664        .then_ignore(just(Token::Newline).repeated())
665        .then_ignore(just(Token::Then))
666        .then_ignore(just(Token::Newline).repeated())
667        .then(
668            stmt.clone()
669                .repeated()
670                .collect::<Vec<_>>()
671                .map(|stmts: Vec<Stmt>| {
672                    stmts
673                        .into_iter()
674                        .filter(|s| !matches!(s, Stmt::Empty))
675                        .collect::<Vec<_>>()
676                }),
677        );
678
679    // Parse elif branches: `elif COND; then STMTS`
680    let elif_branch = just(Token::Elif)
681        .ignore_then(condition_parser())
682        .then_ignore(just(Token::Semi).or_not())
683        .then_ignore(just(Token::Newline).repeated())
684        .then_ignore(just(Token::Then))
685        .then_ignore(just(Token::Newline).repeated())
686        .then(
687            stmt.clone()
688                .repeated()
689                .collect::<Vec<_>>()
690                .map(|stmts: Vec<Stmt>| {
691                    stmts
692                        .into_iter()
693                        .filter(|s| !matches!(s, Stmt::Empty))
694                        .collect::<Vec<_>>()
695                }),
696        );
697
698    // Parse else branch: `else STMTS`
699    let else_branch = just(Token::Else)
700        .ignore_then(just(Token::Newline).repeated())
701        .ignore_then(stmt.repeated().collect::<Vec<_>>())
702        .map(|stmts: Vec<Stmt>| {
703            stmts
704                .into_iter()
705                .filter(|s| !matches!(s, Stmt::Empty))
706                .collect::<Vec<_>>()
707        });
708
709    just(Token::If)
710        .ignore_then(branch)
711        .then(elif_branch.repeated().collect::<Vec<_>>())
712        .then(else_branch.or_not())
713        .then_ignore(just(Token::Fi))
714        .map(|(((condition, then_branch), elif_branches), else_branch)| {
715            // Build nested if/else structure from elif branches
716            build_if_chain(condition, then_branch, elif_branches, else_branch)
717        })
718        .labelled("if statement")
719        .boxed()
720}
721
722/// Build a nested IfStmt chain from elif branches.
723///
724/// Transforms:
725///   if A then X elif B then Y elif C then Z else W fi
726/// Into:
727///   IfStmt { cond: A, then: X, else: Some([IfStmt { cond: B, then: Y, else: Some([IfStmt { cond: C, then: Z, else: Some(W) }]) }]) }
728fn build_if_chain(
729    condition: Expr,
730    then_branch: Vec<Stmt>,
731    mut elif_branches: Vec<(Expr, Vec<Stmt>)>,
732    else_branch: Option<Vec<Stmt>>,
733) -> IfStmt {
734    if elif_branches.is_empty() {
735        // No elif, just if/else
736        IfStmt {
737            condition: Box::new(condition),
738            then_branch,
739            else_branch,
740        }
741    } else {
742        // Pop the first elif and recursively build the rest
743        let (elif_cond, elif_then) = elif_branches.remove(0);
744        let nested_if = build_if_chain(elif_cond, elif_then, elif_branches, else_branch);
745        IfStmt {
746            condition: Box::new(condition),
747            then_branch,
748            else_branch: Some(vec![Stmt::If(nested_if)]),
749        }
750    }
751}
752
753/// For loop: `for VAR in ITEMS; do STMTS done`
754fn for_parser<'tokens, I, S>(
755    stmt: S,
756) -> impl Parser<'tokens, I, ForLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
757where
758    I: ValueInput<'tokens, Token = Token, Span = Span>,
759    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
760{
761    just(Token::For)
762        .ignore_then(ident_parser())
763        .then_ignore(just(Token::In))
764        .then(expr_parser().repeated().at_least(1).collect::<Vec<_>>())
765        .then_ignore(just(Token::Semi).or_not())
766        .then_ignore(just(Token::Newline).repeated())
767        .then_ignore(just(Token::Do))
768        .then_ignore(just(Token::Newline).repeated())
769        .then(
770            stmt.repeated()
771                .collect::<Vec<_>>()
772                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
773        )
774        .then_ignore(just(Token::Done))
775        .map(|((variable, items), body)| ForLoop {
776            variable,
777            items,
778            body,
779        })
780        .labelled("for loop")
781        .boxed()
782}
783
784/// While loop: `while condition; do ...; done`
785fn while_parser<'tokens, I, S>(
786    stmt: S,
787) -> impl Parser<'tokens, I, WhileLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
788where
789    I: ValueInput<'tokens, Token = Token, Span = Span>,
790    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
791{
792    just(Token::While)
793        .ignore_then(condition_parser())
794        .then_ignore(just(Token::Semi).or_not())
795        .then_ignore(just(Token::Newline).repeated())
796        .then_ignore(just(Token::Do))
797        .then_ignore(just(Token::Newline).repeated())
798        .then(
799            stmt.repeated()
800                .collect::<Vec<_>>()
801                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
802        )
803        .then_ignore(just(Token::Done))
804        .map(|(condition, body)| WhileLoop {
805            condition: Box::new(condition),
806            body,
807        })
808        .labelled("while loop")
809        .boxed()
810}
811
812/// Case statement: `case expr in pattern) commands ;; esac`
813///
814/// Supports:
815/// - Single patterns: `pattern) commands ;;`
816/// - Multiple patterns: `pattern1|pattern2) commands ;;`
817/// - Optional leading `(` before patterns: `(pattern) commands ;;`
818fn case_parser<'tokens, I, S>(
819    stmt: S,
820) -> impl Parser<'tokens, I, CaseStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
821where
822    I: ValueInput<'tokens, Token = Token, Span = Span>,
823    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
824{
825    // Pattern part: individual tokens that make up a glob pattern
826    // e.g., "*.rs" is Star + Dot + Ident("rs")
827    let pattern_part = choice((
828        select! { Token::GlobWord(s) => s },
829        select! { Token::Ident(s) => s },
830        select! { Token::String(s) => s },
831        select! { Token::SingleString(s) => s },
832        select! { Token::Int(n) => n.to_string() },
833        select! { Token::Star => "*".to_string() },
834        select! { Token::Question => "?".to_string() },
835        select! { Token::Dot => ".".to_string() },
836        select! { Token::DotDot => "..".to_string() },
837        select! { Token::Tilde => "~".to_string() },
838        select! { Token::TildePath(s) => s },
839        select! { Token::RelativePath(s) => s },
840        select! { Token::DotSlashPath(s) => s },
841        select! { Token::Path(p) => p },
842        select! { Token::VarRef(v) => v },
843        select! { Token::SimpleVarRef(v) => format!("${}", v) },
844        // Character class: [a-z], [!abc], [^abc], etc.
845        just(Token::LBracket)
846            .ignore_then(
847                choice((
848                    select! { Token::Ident(s) => s },
849                    select! { Token::Int(n) => n.to_string() },
850                    just(Token::Colon).to(":".to_string()),
851                    // Negation: ! or ^ at start of char class
852                    just(Token::Bang).to("!".to_string()),
853                    // Range like a-z
854                    select! { Token::ShortFlag(s) => format!("-{}", s) },
855                ))
856                .repeated()
857                .at_least(1)
858                .collect::<Vec<String>>()
859            )
860            .then_ignore(just(Token::RBracket))
861            .map(|parts| format!("[{}]", parts.join(""))),
862        // Brace expansion: {a,b,c} or {js,ts}
863        just(Token::LBrace)
864            .ignore_then(
865                choice((
866                    select! { Token::Ident(s) => s },
867                    select! { Token::Int(n) => n.to_string() },
868                ))
869                .separated_by(just(Token::Comma))
870                .at_least(1)
871                .collect::<Vec<String>>()
872            )
873            .then_ignore(just(Token::RBrace))
874            .map(|parts| format!("{{{}}}", parts.join(","))),
875    ));
876
877    // A complete pattern is one or more pattern parts joined together
878    // e.g., "*.rs" = Star + Dot + Ident
879    let pattern = pattern_part
880        .repeated()
881        .at_least(1)
882        .collect::<Vec<String>>()
883        .map(|parts| parts.join(""))
884        .labelled("case pattern");
885
886    // Multiple patterns separated by pipe: `pattern1 | pattern2`
887    let patterns = pattern
888        .separated_by(just(Token::Pipe))
889        .at_least(1)
890        .collect::<Vec<String>>()
891        .labelled("case patterns");
892
893    // Branch: `[( ] patterns ) commands ;;`
894    let branch = just(Token::LParen)
895        .or_not()
896        .ignore_then(just(Token::Newline).repeated())
897        .ignore_then(patterns)
898        .then_ignore(just(Token::RParen))
899        .then_ignore(just(Token::Newline).repeated())
900        .then(
901            stmt.clone()
902                .repeated()
903                .collect::<Vec<_>>()
904                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
905        )
906        .then_ignore(just(Token::DoubleSemi))
907        .then_ignore(just(Token::Newline).repeated())
908        .map(|(patterns, body)| CaseBranch { patterns, body })
909        .labelled("case branch");
910
911    just(Token::Case)
912        .ignore_then(expr_parser())
913        .then_ignore(just(Token::In))
914        .then_ignore(just(Token::Newline).repeated())
915        .then(branch.repeated().collect::<Vec<_>>())
916        .then_ignore(just(Token::Esac))
917        .map(|(expr, branches)| CaseStmt { expr, branches })
918        .labelled("case statement")
919        .boxed()
920}
921
922/// Pipeline: `cmd | cmd | cmd [&]`
923fn pipeline_parser<'tokens, I>(
924) -> impl Parser<'tokens, I, Pipeline, extra::Err<Rich<'tokens, Token, Span>>> + Clone
925where
926    I: ValueInput<'tokens, Token = Token, Span = Span>,
927{
928    command_parser()
929        .separated_by(just(Token::Pipe))
930        .at_least(1)
931        .collect::<Vec<_>>()
932        .then(just(Token::Amp).or_not())
933        .map(|(commands, bg)| Pipeline {
934            commands,
935            background: bg.is_some(),
936        })
937        .labelled("pipeline")
938        .boxed()
939}
940
941/// Command: `name args... [redirects...]`
942/// Command names can be identifiers, 'true', 'false', or '.' (source alias).
943fn command_parser<'tokens, I>(
944) -> impl Parser<'tokens, I, Command, extra::Err<Rich<'tokens, Token, Span>>> + Clone
945where
946    I: ValueInput<'tokens, Token = Token, Span = Span>,
947{
948    // Command name can be an identifier, path, 'true', 'false', '.' (source alias), or ./path
949    let command_name = choice((
950        ident_parser(),
951        path_parser(),
952        select! { Token::DotSlashPath(s) => s },
953        just(Token::True).to("true".to_string()),
954        just(Token::False).to("false".to_string()),
955        just(Token::Dot).to(".".to_string()),
956    ));
957
958    command_name
959        .then(args_list_parser())
960        .then(redirect_parser().repeated().collect::<Vec<_>>())
961        .map(|((name, args), redirects)| Command {
962            name,
963            args,
964            redirects,
965        })
966        .labelled("command")
967        .boxed()
968}
969
970/// Arguments list parser that handles `--` flag terminator.
971///
972/// After `--`, all subsequent flags are converted to positional string arguments.
973fn args_list_parser<'tokens, I>(
974) -> impl Parser<'tokens, I, Vec<Arg>, extra::Err<Rich<'tokens, Token, Span>>> + Clone
975where
976    I: ValueInput<'tokens, Token = Token, Span = Span>,
977{
978    // Arguments before `--` (normal parsing)
979    let pre_dash = arg_before_double_dash_parser()
980        .repeated()
981        .collect::<Vec<_>>();
982
983    // The `--` marker itself
984    let double_dash = select! {
985        Token::DoubleDash => Arg::DoubleDash,
986    };
987
988    // Arguments after `--` (flags become positional strings)
989    let post_dash_arg = choice((
990        // Flags become positional strings
991        select! {
992            Token::ShortFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("-{}", name)))),
993            Token::LongFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("--{}", name)))),
994        },
995        // Everything else stays the same
996        primary_expr_parser().map(Arg::Positional),
997    ));
998
999    let post_dash = post_dash_arg.repeated().collect::<Vec<_>>();
1000
1001    // Combine: args_before ++ [--] ++ args_after
1002    pre_dash
1003        .then(double_dash.then(post_dash).or_not())
1004        .map(|(mut args, maybe_dd)| {
1005            if let Some((dd, post)) = maybe_dd {
1006                args.push(dd);
1007                args.extend(post);
1008            }
1009            args
1010        })
1011}
1012
1013/// Argument parser for arguments before `--` (normal flag handling).
1014fn arg_before_double_dash_parser<'tokens, I>(
1015) -> impl Parser<'tokens, I, Arg, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1016where
1017    I: ValueInput<'tokens, Token = Token, Span = Span>,
1018{
1019    // Long flag with value: --name=value
1020    let long_flag_with_value = select! {
1021        Token::LongFlag(name) => name,
1022    }
1023    .then_ignore(just(Token::Eq))
1024    .then(primary_expr_parser())
1025    .map(|(key, value)| Arg::Named { key, value });
1026
1027    // Boolean long flag: --name
1028    let long_flag = select! {
1029        Token::LongFlag(name) => Arg::LongFlag(name),
1030    };
1031
1032    // Boolean short flag: -x
1033    let short_flag = select! {
1034        Token::ShortFlag(name) => Arg::ShortFlag(name),
1035    };
1036
1037    // Named argument: name=value (must not have spaces around =)
1038    // We use map_with to capture spans and validate adjacency
1039    let named = select! {
1040        Token::Ident(s) => s,
1041    }
1042    .map_with(|s, e| -> (String, Span) { (s, e.span()) })
1043    .then(just(Token::Eq).map_with(|_, e| -> Span { e.span() }))
1044    .then(primary_expr_parser().map_with(|expr, e| -> (Expr, Span) { (expr, e.span()) }))
1045    .try_map(|(((key, key_span), eq_span), (value, value_span)): (((String, Span), Span), (Expr, Span)), span| {
1046        // Check that key ends where = starts and = ends where value starts
1047        if key_span.end != eq_span.start || eq_span.end != value_span.start {
1048            Err(Rich::custom(
1049                span,
1050                "named argument must not have spaces around '=' (use 'key=value' not 'key = value')",
1051            ))
1052        } else {
1053            Ok(Arg::Named { key, value })
1054        }
1055    });
1056
1057    // Positional argument
1058    let positional = primary_expr_parser().map(Arg::Positional);
1059
1060    // Order matters: try more specific patterns first
1061    // Note: DoubleDash is NOT included here - it's handled by args_list_parser
1062    choice((
1063        long_flag_with_value,
1064        long_flag,
1065        short_flag,
1066        named,
1067        positional,
1068    ))
1069    .boxed()
1070}
1071
1072/// Redirect: `> file`, `>> file`, `< file`, `<< heredoc`, `2> file`, `&> file`, `2>&1`
1073fn redirect_parser<'tokens, I>(
1074) -> impl Parser<'tokens, I, Redirect, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1075where
1076    I: ValueInput<'tokens, Token = Token, Span = Span>,
1077{
1078    // Regular redirects: >, >>, <, 2>, &>
1079    let regular_redirect = select! {
1080        Token::GtGt => RedirectKind::StdoutAppend,
1081        Token::Gt => RedirectKind::StdoutOverwrite,
1082        Token::Lt => RedirectKind::Stdin,
1083        Token::Stderr => RedirectKind::Stderr,
1084        Token::Both => RedirectKind::Both,
1085    }
1086    .then(primary_expr_parser())
1087    .map(|(kind, target)| Redirect { kind, target });
1088
1089    // Here-doc redirect: << content
1090    // Quoted delimiters (<<'EOF' or <<"EOF") produce literal heredocs (no expansion).
1091    // Unquoted delimiters produce interpolated heredocs (variables are expanded).
1092    let heredoc_redirect = just(Token::HereDocStart)
1093        .ignore_then(select! { Token::HereDoc(data) => data })
1094        .map(|data: HereDocData| {
1095            let target = if data.literal {
1096                Expr::Literal(Value::String(data.content))
1097            } else {
1098                let parts = parse_interpolated_string(&data.content);
1099                // If there's only one literal part, simplify to Expr::Literal
1100                if parts.len() == 1 {
1101                    if let StringPart::Literal(text) = &parts[0] {
1102                        return Redirect {
1103                            kind: RedirectKind::HereDoc,
1104                            target: Expr::Literal(Value::String(text.clone())),
1105                        };
1106                    }
1107                }
1108                Expr::Interpolated(parts)
1109            };
1110            Redirect {
1111                kind: RedirectKind::HereDoc,
1112                target,
1113            }
1114        });
1115
1116    // Merge stderr to stdout: 2>&1 (no target needed - implicit)
1117    let merge_stderr_redirect = just(Token::StderrToStdout)
1118        .map(|_| Redirect {
1119            kind: RedirectKind::MergeStderr,
1120            // Target is unused for MergeStderr, but we need something
1121            target: Expr::Literal(Value::Null),
1122        });
1123
1124    // Merge stdout to stderr: 1>&2 or >&2 (no target needed - implicit)
1125    let merge_stdout_redirect = choice((
1126        just(Token::StdoutToStderr),
1127        just(Token::StdoutToStderr2),
1128    ))
1129    .map(|_| Redirect {
1130        kind: RedirectKind::MergeStdout,
1131        // Target is unused for MergeStdout, but we need something
1132        target: Expr::Literal(Value::Null),
1133    });
1134
1135    choice((heredoc_redirect, merge_stderr_redirect, merge_stdout_redirect, regular_redirect))
1136        .labelled("redirect")
1137        .boxed()
1138}
1139
1140/// Test expression parser for `[[ ... ]]` syntax.
1141///
1142/// Supports:
1143/// - File tests: `[[ -f path ]]`, `[[ -d path ]]`, etc.
1144/// - String tests: `[[ -z str ]]`, `[[ -n str ]]`
1145/// - Comparisons: `[[ $X == "value" ]]`, `[[ $NUM -gt 5 ]]`
1146/// - Compound: `[[ -f a && -d b ]]`, `[[ -z x || -n y ]]`, `[[ ! -f file ]]`
1147///
1148/// Precedence (highest to lowest): `!` > `&&` > `||`
1149fn test_expr_stmt_parser<'tokens, I>(
1150) -> impl Parser<'tokens, I, TestExpr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1151where
1152    I: ValueInput<'tokens, Token = Token, Span = Span>,
1153{
1154    // File test operators: -e, -f, -d, -r, -w, -x
1155    let file_test_op = select! {
1156        Token::ShortFlag(s) if s == "e" => FileTestOp::Exists,
1157        Token::ShortFlag(s) if s == "f" => FileTestOp::IsFile,
1158        Token::ShortFlag(s) if s == "d" => FileTestOp::IsDir,
1159        Token::ShortFlag(s) if s == "r" => FileTestOp::Readable,
1160        Token::ShortFlag(s) if s == "w" => FileTestOp::Writable,
1161        Token::ShortFlag(s) if s == "x" => FileTestOp::Executable,
1162    };
1163
1164    // String test operators: -z, -n
1165    let string_test_op = select! {
1166        Token::ShortFlag(s) if s == "z" => StringTestOp::IsEmpty,
1167        Token::ShortFlag(s) if s == "n" => StringTestOp::IsNonEmpty,
1168    };
1169
1170    // Comparison operators: =, ==, !=, =~, !~, >, <, >=, <=, -gt, -lt, -ge, -le, -eq, -ne
1171    // Note: = and == are equivalent inside [[ ]] (matching bash behavior)
1172    let cmp_op = choice((
1173        just(Token::EqEq).to(TestCmpOp::Eq),
1174        just(Token::Eq).to(TestCmpOp::Eq),
1175        just(Token::NotEq).to(TestCmpOp::NotEq),
1176        just(Token::Match).to(TestCmpOp::Match),
1177        just(Token::NotMatch).to(TestCmpOp::NotMatch),
1178        just(Token::Gt).to(TestCmpOp::Gt),
1179        just(Token::Lt).to(TestCmpOp::Lt),
1180        just(Token::GtEq).to(TestCmpOp::GtEq),
1181        just(Token::LtEq).to(TestCmpOp::LtEq),
1182        select! { Token::ShortFlag(s) if s == "eq" => TestCmpOp::Eq },
1183        select! { Token::ShortFlag(s) if s == "ne" => TestCmpOp::NotEq },
1184        select! { Token::ShortFlag(s) if s == "gt" => TestCmpOp::Gt },
1185        select! { Token::ShortFlag(s) if s == "lt" => TestCmpOp::Lt },
1186        select! { Token::ShortFlag(s) if s == "ge" => TestCmpOp::GtEq },
1187        select! { Token::ShortFlag(s) if s == "le" => TestCmpOp::LtEq },
1188    ));
1189
1190    // File test: -f path
1191    let file_test = file_test_op
1192        .then(primary_expr_parser())
1193        .map(|(op, path)| TestExpr::FileTest {
1194            op,
1195            path: Box::new(path),
1196        });
1197
1198    // String test: -z str
1199    let string_test = string_test_op
1200        .then(primary_expr_parser())
1201        .map(|(op, value)| TestExpr::StringTest {
1202            op,
1203            value: Box::new(value),
1204        });
1205
1206    // Comparison: $X == "value" or $NUM -gt 5
1207    let comparison = primary_expr_parser()
1208        .then(cmp_op)
1209        .then(primary_expr_parser())
1210        .map(|((left, op), right)| TestExpr::Comparison {
1211            left: Box::new(left),
1212            op,
1213            right: Box::new(right),
1214        });
1215
1216    // Primary test expression (atomic - no compound operators)
1217    let primary_test = choice((file_test, string_test, comparison));
1218
1219    // Build compound expressions with proper precedence:
1220    // Grammar:
1221    //   test_expr = or_expr
1222    //   or_expr   = and_expr { "||" and_expr }
1223    //   and_expr  = unary_expr { "&&" unary_expr }
1224    //   unary_expr = "!" unary_expr | primary_test
1225    //
1226    // Precedence: ! (highest) > && > ||
1227
1228    // Use recursive for the unary NOT operator
1229    let compound_test = recursive(|compound| {
1230        // Unary NOT: ! expr (can be chained: ! ! expr)
1231        let not_expr = just(Token::Bang)
1232            .ignore_then(compound.clone())
1233            .map(|expr| TestExpr::Not { expr: Box::new(expr) });
1234
1235        // Unary level: ! or primary
1236        let unary = choice((not_expr, primary_test.clone()));
1237
1238        // AND level: unary && unary && ...
1239        let and_expr = unary.clone().foldl(
1240            just(Token::And).ignore_then(unary).repeated(),
1241            |left, right| TestExpr::And {
1242                left: Box::new(left),
1243                right: Box::new(right),
1244            },
1245        );
1246
1247        // OR level: and_expr || and_expr || ...
1248        and_expr.clone().foldl(
1249            just(Token::Or).ignore_then(and_expr).repeated(),
1250            |left, right| TestExpr::Or {
1251                left: Box::new(left),
1252                right: Box::new(right),
1253            },
1254        )
1255    });
1256
1257    // [[ ]] is two consecutive bracket tokens (not a single TestStart token)
1258    // to avoid conflicts with nested array syntax like [[1, 2], [3, 4]]
1259    just(Token::LBracket)
1260        .then(just(Token::LBracket))
1261        .ignore_then(compound_test)
1262        .then_ignore(just(Token::RBracket).then(just(Token::RBracket)))
1263        .labelled("test expression")
1264        .boxed()
1265}
1266
1267/// Condition parser: supports [[ ]] test expressions and commands with && / || chaining.
1268///
1269/// Shell semantics: conditions are commands whose exit codes determine truthiness.
1270/// - `if true; then` → runs `true` builtin, exit code 0 = truthy
1271/// - `if grep -q pattern file; then` → runs command, checks exit code
1272/// - `if a && b; then` → runs `a`, if exit 0, runs `b`
1273///
1274/// Use `[[ ]]` for comparisons: `if [[ $X -gt 5 ]]; then`
1275///
1276/// Grammar (with precedence - && binds tighter than ||):
1277///   condition = or_expr
1278///   or_expr   = and_expr { "||" and_expr }
1279///   and_expr  = base { "&&" base }
1280///   base      = test_expr | command
1281fn condition_parser<'tokens, I>(
1282) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1283where
1284    I: ValueInput<'tokens, Token = Token, Span = Span>,
1285{
1286    // [[ ]] test expression - wrap as Expr::Test
1287    let test_expr_condition = test_expr_stmt_parser().map(|test| Expr::Test(Box::new(test)));
1288
1289    // Command as condition (includes true/false as command names)
1290    // The command's exit code determines truthiness (0 = true, non-zero = false)
1291    let command_condition = command_parser().map(Expr::Command);
1292
1293    // Base: test expr OR command
1294    let base = choice((test_expr_condition, command_condition));
1295
1296    // && has higher precedence than ||
1297    // First chain with && (higher precedence)
1298    let and_expr = base.clone().foldl(
1299        just(Token::And).ignore_then(base).repeated(),
1300        |left, right| Expr::BinaryOp {
1301            left: Box::new(left),
1302            op: BinaryOp::And,
1303            right: Box::new(right),
1304        },
1305    );
1306
1307    // Then chain with || (lower precedence)
1308    and_expr
1309        .clone()
1310        .foldl(
1311            just(Token::Or).ignore_then(and_expr).repeated(),
1312            |left, right| Expr::BinaryOp {
1313                left: Box::new(left),
1314                op: BinaryOp::Or,
1315                right: Box::new(right),
1316            },
1317        )
1318        .labelled("condition")
1319        .boxed()
1320}
1321
1322/// Expression parser - supports && and || binary operators.
1323fn expr_parser<'tokens, I>(
1324) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1325where
1326    I: ValueInput<'tokens, Token = Token, Span = Span>,
1327{
1328    // For now, just primary expressions. Can extend for && / || later if needed.
1329    primary_expr_parser()
1330}
1331
1332/// Primary expression: literal, variable reference, command substitution, or bare identifier.
1333///
1334/// Uses `recursive` to support nested command substitution like `$(echo $(date))`.
1335fn primary_expr_parser<'tokens, I>(
1336) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1337where
1338    I: ValueInput<'tokens, Token = Token, Span = Span>,
1339{
1340    // Positional parameters: $0-$9, $@, $#, ${#VAR}, $?, $$
1341    let positional = select! {
1342        Token::Positional(n) => Expr::Positional(n),
1343        Token::AllArgs => Expr::AllArgs,
1344        Token::ArgCount => Expr::ArgCount,
1345        Token::VarLength(name) => Expr::VarLength(name),
1346        Token::LastExitCode => Expr::LastExitCode,
1347        Token::CurrentPid => Expr::CurrentPid,
1348    };
1349
1350    // Arithmetic expression: $((expr)) - preprocessed into Arithmetic token
1351    let arithmetic = select! {
1352        Token::Arithmetic(expr_str) => Expr::Arithmetic(expr_str),
1353    };
1354
1355    // Keywords that can also be used as barewords in argument position
1356    // (e.g., `echo done` should work even though `done` is a keyword)
1357    let keyword_as_bareword = select! {
1358        Token::Done => "done",
1359        Token::Fi => "fi",
1360        Token::Then => "then",
1361        Token::Else => "else",
1362        Token::Elif => "elif",
1363        Token::In => "in",
1364        Token::Do => "do",
1365        Token::Esac => "esac",
1366    }
1367    .map(|s| Expr::Literal(Value::String(s.to_string())));
1368
1369    // Bare words starting with + or - (e.g., date +%s, cat -)
1370    let plus_minus_bare = select! {
1371        Token::PlusBare(s) => Expr::Literal(Value::String(s)),
1372        Token::MinusBare(s) => Expr::Literal(Value::String(s)),
1373        Token::MinusAlone => Expr::Literal(Value::String("-".to_string())),
1374    };
1375
1376    // Glob patterns: merged GlobWord tokens and bare Star/Question
1377    let glob_pattern = select! {
1378        Token::GlobWord(s) => Expr::GlobPattern(s),
1379        Token::Star => Expr::GlobPattern("*".to_string()),
1380        Token::Question => Expr::GlobPattern("?".to_string()),
1381    };
1382
1383    recursive(|expr| {
1384        choice((
1385            positional,
1386            arithmetic,
1387            cmd_subst_parser(expr.clone()),
1388            var_expr_parser(),
1389            interpolated_string_parser(),
1390            literal_parser().map(Expr::Literal),
1391            // Glob patterns before ident (GlobWord is more specific)
1392            glob_pattern,
1393            // Bare identifiers become string literals (shell barewords)
1394            ident_parser().map(|s| Expr::Literal(Value::String(s))),
1395            // Absolute paths become string literals
1396            path_parser().map(|s| Expr::Literal(Value::String(s))),
1397            // Bare words starting with + or - (date +%s, cat -)
1398            // Shell navigation tokens
1399            select! {
1400                Token::DotDot => Expr::Literal(Value::String("..".into())),
1401                Token::Tilde => Expr::Literal(Value::String("~".into())),
1402                Token::TildePath(s) => Expr::Literal(Value::String(s)),
1403                Token::RelativePath(s) => Expr::Literal(Value::String(s)),
1404                Token::DotSlashPath(s) => Expr::Literal(Value::String(s)),
1405            },
1406            plus_minus_bare,
1407            // Keywords can be used as barewords in argument position
1408            keyword_as_bareword,
1409        ))
1410        .labelled("expression")
1411    })
1412    .boxed()
1413}
1414
1415/// Variable reference: `${VAR}`, `${VAR.field}`, `${VAR:-default}`, or `$VAR` (simple form).
1416/// Returns Expr directly to support both VarRef and VarWithDefault.
1417fn var_expr_parser<'tokens, I>(
1418) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1419where
1420    I: ValueInput<'tokens, Token = Token, Span = Span>,
1421{
1422    select! {
1423        Token::VarRef(raw) => parse_var_expr(&raw),
1424        Token::SimpleVarRef(name) => Expr::VarRef(VarPath::simple(name)),
1425    }
1426    .labelled("variable reference")
1427}
1428
1429/// Command substitution: `$(pipeline)` - runs a pipeline and returns its result.
1430///
1431/// Accepts a recursive expression parser to support nested command substitution.
1432fn cmd_subst_parser<'tokens, I, E>(
1433    expr: E,
1434) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1435where
1436    I: ValueInput<'tokens, Token = Token, Span = Span>,
1437    E: Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone,
1438{
1439    // Argument parser using the recursive expression parser
1440    // Long flag with value: --name=value
1441    let long_flag_with_value = select! {
1442        Token::LongFlag(name) => name,
1443    }
1444    .then_ignore(just(Token::Eq))
1445    .then(expr.clone())
1446    .map(|(key, value)| Arg::Named { key, value });
1447
1448    // Boolean long flag: --name
1449    let long_flag = select! {
1450        Token::LongFlag(name) => Arg::LongFlag(name),
1451    };
1452
1453    // Boolean short flag: -x
1454    let short_flag = select! {
1455        Token::ShortFlag(name) => Arg::ShortFlag(name),
1456    };
1457
1458    // Named argument: name=value
1459    let named = ident_parser()
1460        .then_ignore(just(Token::Eq))
1461        .then(expr.clone())
1462        .map(|(key, value)| Arg::Named { key, value });
1463
1464    // Positional argument
1465    let positional = expr.map(Arg::Positional);
1466
1467    let arg = choice((
1468        long_flag_with_value,
1469        long_flag,
1470        short_flag,
1471        named,
1472        positional,
1473    ));
1474
1475    // Command name parser - accepts identifiers and boolean keywords (true/false are builtins)
1476    let command_name = choice((
1477        ident_parser(),
1478        just(Token::True).to("true".to_string()),
1479        just(Token::False).to("false".to_string()),
1480    ));
1481
1482    // Command parser
1483    let command = command_name
1484        .then(arg.repeated().collect::<Vec<_>>())
1485        .map(|(name, args)| Command {
1486            name,
1487            args,
1488            redirects: vec![],
1489        });
1490
1491    // Pipeline parser
1492    let pipeline = command
1493        .separated_by(just(Token::Pipe))
1494        .at_least(1)
1495        .collect::<Vec<_>>()
1496        .map(|commands| Pipeline {
1497            commands,
1498            background: false,
1499        });
1500
1501    just(Token::CmdSubstStart)
1502        .ignore_then(pipeline)
1503        .then_ignore(just(Token::RParen))
1504        .map(|pipeline| Expr::CommandSubst(Box::new(pipeline)))
1505        .labelled("command substitution")
1506}
1507
1508/// String parser - handles double-quoted strings (with interpolation) and single-quoted (literal).
1509fn interpolated_string_parser<'tokens, I>(
1510) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1511where
1512    I: ValueInput<'tokens, Token = Token, Span = Span>,
1513{
1514    // Double-quoted string: may contain $VAR or ${VAR} interpolation
1515    let double_quoted = select! {
1516        Token::String(s) => s,
1517    }
1518    .map(|s| {
1519        // Check if string contains interpolation markers (${} or $NAME) or escaped dollars
1520        if s.contains('$') || s.contains("__KAISH_ESCAPED_DOLLAR__") {
1521            // Parse interpolated parts
1522            let parts = parse_interpolated_string(&s);
1523            if parts.len() == 1
1524                && let StringPart::Literal(text) = &parts[0] {
1525                    return Expr::Literal(Value::String(text.clone()));
1526                }
1527            Expr::Interpolated(parts)
1528        } else {
1529            Expr::Literal(Value::String(s))
1530        }
1531    });
1532
1533    // Single-quoted string: literal, no interpolation
1534    let single_quoted = select! {
1535        Token::SingleString(s) => Expr::Literal(Value::String(s)),
1536    };
1537
1538    choice((single_quoted, double_quoted)).labelled("string")
1539}
1540
1541/// Literal value parser (excluding strings, which are handled by interpolated_string_parser).
1542fn literal_parser<'tokens, I>(
1543) -> impl Parser<'tokens, I, Value, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1544where
1545    I: ValueInput<'tokens, Token = Token, Span = Span>,
1546{
1547    choice((
1548        select! {
1549            Token::True => Value::Bool(true),
1550            Token::False => Value::Bool(false),
1551        },
1552        select! {
1553            Token::Int(n) => Value::Int(n),
1554            Token::Float(f) => Value::Float(f),
1555        },
1556    ))
1557    .labelled("literal")
1558    .boxed()
1559}
1560
1561/// Identifier parser.
1562fn ident_parser<'tokens, I>(
1563) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1564where
1565    I: ValueInput<'tokens, Token = Token, Span = Span>,
1566{
1567    select! {
1568        Token::Ident(s) => s,
1569    }
1570    .labelled("identifier")
1571}
1572
1573/// Path parser: matches absolute paths like `/tmp/out`, `/etc/hosts`.
1574fn path_parser<'tokens, I>(
1575) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1576where
1577    I: ValueInput<'tokens, Token = Token, Span = Span>,
1578{
1579    select! {
1580        Token::Path(s) => s,
1581    }
1582    .labelled("path")
1583}
1584
1585#[cfg(test)]
1586mod tests {
1587    use super::*;
1588
1589    #[test]
1590    fn parse_empty() {
1591        let result = parse("");
1592        assert!(result.is_ok());
1593        assert_eq!(result.expect("ok").statements.len(), 0);
1594    }
1595
1596    #[test]
1597    fn parse_newlines_only() {
1598        let result = parse("\n\n\n");
1599        assert!(result.is_ok());
1600    }
1601
1602    #[test]
1603    fn parse_simple_command() {
1604        let result = parse("echo");
1605        assert!(result.is_ok());
1606        let program = result.expect("ok");
1607        assert_eq!(program.statements.len(), 1);
1608        assert!(matches!(&program.statements[0], Stmt::Command(_)));
1609    }
1610
1611    #[test]
1612    fn parse_command_with_string_arg() {
1613        let result = parse(r#"echo "hello""#);
1614        assert!(result.is_ok());
1615        let program = result.expect("ok");
1616        match &program.statements[0] {
1617            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 1),
1618            _ => panic!("expected Command"),
1619        }
1620    }
1621
1622    #[test]
1623    fn parse_assignment() {
1624        let result = parse("X=5");
1625        assert!(result.is_ok());
1626        let program = result.expect("ok");
1627        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
1628    }
1629
1630    #[test]
1631    fn parse_pipeline() {
1632        let result = parse("a | b | c");
1633        assert!(result.is_ok());
1634        let program = result.expect("ok");
1635        match &program.statements[0] {
1636            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
1637            _ => panic!("expected Pipeline"),
1638        }
1639    }
1640
1641    #[test]
1642    fn parse_background_job() {
1643        let result = parse("cmd &");
1644        assert!(result.is_ok());
1645        let program = result.expect("ok");
1646        match &program.statements[0] {
1647            Stmt::Pipeline(p) => assert!(p.background),
1648            _ => panic!("expected Pipeline with background"),
1649        }
1650    }
1651
1652    #[test]
1653    fn parse_if_simple() {
1654        let result = parse("if true; then echo; fi");
1655        assert!(result.is_ok());
1656        let program = result.expect("ok");
1657        assert!(matches!(&program.statements[0], Stmt::If(_)));
1658    }
1659
1660    #[test]
1661    fn parse_if_else() {
1662        let result = parse("if true; then echo; else echo; fi");
1663        assert!(result.is_ok());
1664        let program = result.expect("ok");
1665        match &program.statements[0] {
1666            Stmt::If(if_stmt) => assert!(if_stmt.else_branch.is_some()),
1667            _ => panic!("expected If"),
1668        }
1669    }
1670
1671    #[test]
1672    fn parse_elif_simple() {
1673        let result = parse("if true; then echo a; elif false; then echo b; fi");
1674        assert!(result.is_ok(), "parse failed: {:?}", result);
1675        let program = result.expect("ok");
1676        match &program.statements[0] {
1677            Stmt::If(if_stmt) => {
1678                // elif is desugared to nested if in else
1679                assert!(if_stmt.else_branch.is_some());
1680                let else_branch = if_stmt.else_branch.as_ref().unwrap();
1681                assert_eq!(else_branch.len(), 1);
1682                assert!(matches!(&else_branch[0], Stmt::If(_)));
1683            }
1684            _ => panic!("expected If"),
1685        }
1686    }
1687
1688    #[test]
1689    fn parse_elif_with_else() {
1690        let result = parse("if true; then echo a; elif false; then echo b; else echo c; fi");
1691        assert!(result.is_ok(), "parse failed: {:?}", result);
1692        let program = result.expect("ok");
1693        match &program.statements[0] {
1694            Stmt::If(outer_if) => {
1695                // Check nested structure: if -> elif -> else
1696                let else_branch = outer_if.else_branch.as_ref().expect("outer else");
1697                assert_eq!(else_branch.len(), 1);
1698                match &else_branch[0] {
1699                    Stmt::If(inner_if) => {
1700                        // The inner if (from elif) should have the final else
1701                        assert!(inner_if.else_branch.is_some());
1702                    }
1703                    _ => panic!("expected nested If from elif"),
1704                }
1705            }
1706            _ => panic!("expected If"),
1707        }
1708    }
1709
1710    #[test]
1711    fn parse_multiple_elif() {
1712        // Shell-compatible: use [[ ]] for comparisons
1713        let result = parse(
1714            "if [[ ${X} == 1 ]]; then echo one; elif [[ ${X} == 2 ]]; then echo two; elif [[ ${X} == 3 ]]; then echo three; else echo other; fi",
1715        );
1716        assert!(result.is_ok(), "parse failed: {:?}", result);
1717    }
1718
1719    #[test]
1720    fn parse_for_loop() {
1721        let result = parse("for X in items; do echo; done");
1722        assert!(result.is_ok());
1723        let program = result.expect("ok");
1724        assert!(matches!(&program.statements[0], Stmt::For(_)));
1725    }
1726
1727    #[test]
1728    fn parse_brackets_not_array_literal() {
1729        // Array literals are no longer supported, [ is just a regular char
1730        let result = parse("cmd [1");
1731        // This should fail or parse unexpectedly - arrays are removed
1732        // Just verify we don't crash
1733        let _ = result;
1734    }
1735
1736    #[test]
1737    fn parse_named_arg() {
1738        let result = parse("cmd foo=5");
1739        assert!(result.is_ok());
1740        let program = result.expect("ok");
1741        match &program.statements[0] {
1742            Stmt::Command(cmd) => {
1743                assert_eq!(cmd.args.len(), 1);
1744                assert!(matches!(&cmd.args[0], Arg::Named { .. }));
1745            }
1746            _ => panic!("expected Command"),
1747        }
1748    }
1749
1750    #[test]
1751    fn parse_short_flag() {
1752        let result = parse("ls -l");
1753        assert!(result.is_ok());
1754        let program = result.expect("ok");
1755        match &program.statements[0] {
1756            Stmt::Command(cmd) => {
1757                assert_eq!(cmd.name, "ls");
1758                assert_eq!(cmd.args.len(), 1);
1759                match &cmd.args[0] {
1760                    Arg::ShortFlag(name) => assert_eq!(name, "l"),
1761                    _ => panic!("expected ShortFlag"),
1762                }
1763            }
1764            _ => panic!("expected Command"),
1765        }
1766    }
1767
1768    #[test]
1769    fn parse_long_flag() {
1770        let result = parse("git push --force");
1771        assert!(result.is_ok());
1772        let program = result.expect("ok");
1773        match &program.statements[0] {
1774            Stmt::Command(cmd) => {
1775                assert_eq!(cmd.name, "git");
1776                assert_eq!(cmd.args.len(), 2);
1777                match &cmd.args[0] {
1778                    Arg::Positional(Expr::Literal(Value::String(s))) => assert_eq!(s, "push"),
1779                    _ => panic!("expected Positional push"),
1780                }
1781                match &cmd.args[1] {
1782                    Arg::LongFlag(name) => assert_eq!(name, "force"),
1783                    _ => panic!("expected LongFlag"),
1784                }
1785            }
1786            _ => panic!("expected Command"),
1787        }
1788    }
1789
1790    #[test]
1791    fn parse_long_flag_with_value() {
1792        let result = parse(r#"git commit --message="hello""#);
1793        assert!(result.is_ok());
1794        let program = result.expect("ok");
1795        match &program.statements[0] {
1796            Stmt::Command(cmd) => {
1797                assert_eq!(cmd.name, "git");
1798                assert_eq!(cmd.args.len(), 2);
1799                match &cmd.args[1] {
1800                    Arg::Named { key, value } => {
1801                        assert_eq!(key, "message");
1802                        match value {
1803                            Expr::Literal(Value::String(s)) => assert_eq!(s, "hello"),
1804                            _ => panic!("expected String value"),
1805                        }
1806                    }
1807                    _ => panic!("expected Named from --flag=value"),
1808                }
1809            }
1810            _ => panic!("expected Command"),
1811        }
1812    }
1813
1814    #[test]
1815    fn parse_mixed_flags_and_args() {
1816        let result = parse(r#"git commit -m "message" --amend"#);
1817        assert!(result.is_ok());
1818        let program = result.expect("ok");
1819        match &program.statements[0] {
1820            Stmt::Command(cmd) => {
1821                assert_eq!(cmd.name, "git");
1822                assert_eq!(cmd.args.len(), 4);
1823                // commit (positional)
1824                assert!(matches!(&cmd.args[0], Arg::Positional(_)));
1825                // -m (short flag)
1826                match &cmd.args[1] {
1827                    Arg::ShortFlag(name) => assert_eq!(name, "m"),
1828                    _ => panic!("expected ShortFlag -m"),
1829                }
1830                // "message" (positional)
1831                assert!(matches!(&cmd.args[2], Arg::Positional(_)));
1832                // --amend (long flag)
1833                match &cmd.args[3] {
1834                    Arg::LongFlag(name) => assert_eq!(name, "amend"),
1835                    _ => panic!("expected LongFlag --amend"),
1836                }
1837            }
1838            _ => panic!("expected Command"),
1839        }
1840    }
1841
1842    #[test]
1843    fn parse_redirect_stdout() {
1844        let result = parse("cmd > file");
1845        assert!(result.is_ok());
1846        let program = result.expect("ok");
1847        // Commands with redirects stay as Pipeline, not Command
1848        match &program.statements[0] {
1849            Stmt::Pipeline(p) => {
1850                assert_eq!(p.commands.len(), 1);
1851                let cmd = &p.commands[0];
1852                assert_eq!(cmd.redirects.len(), 1);
1853                assert!(matches!(cmd.redirects[0].kind, RedirectKind::StdoutOverwrite));
1854            }
1855            _ => panic!("expected Pipeline"),
1856        }
1857    }
1858
1859    #[test]
1860    fn parse_var_ref() {
1861        let result = parse("echo ${VAR}");
1862        assert!(result.is_ok());
1863        let program = result.expect("ok");
1864        match &program.statements[0] {
1865            Stmt::Command(cmd) => {
1866                assert_eq!(cmd.args.len(), 1);
1867                assert!(matches!(&cmd.args[0], Arg::Positional(Expr::VarRef(_))));
1868            }
1869            _ => panic!("expected Command"),
1870        }
1871    }
1872
1873    #[test]
1874    fn parse_multiple_statements() {
1875        let result = parse("a\nb\nc");
1876        assert!(result.is_ok());
1877        let program = result.expect("ok");
1878        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
1879        assert_eq!(non_empty.len(), 3);
1880    }
1881
1882    #[test]
1883    fn parse_semicolon_separated() {
1884        let result = parse("a; b; c");
1885        assert!(result.is_ok());
1886        let program = result.expect("ok");
1887        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
1888        assert_eq!(non_empty.len(), 3);
1889    }
1890
1891    #[test]
1892    fn parse_complex_pipeline() {
1893        let result = parse(r#"cat file | grep pattern="foo" | head count=10"#);
1894        assert!(result.is_ok());
1895        let program = result.expect("ok");
1896        match &program.statements[0] {
1897            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
1898            _ => panic!("expected Pipeline"),
1899        }
1900    }
1901
1902    #[test]
1903    fn parse_json_as_string_arg() {
1904        // JSON arrays/objects should be passed as string arguments
1905        let result = parse(r#"cmd '[[1, 2], [3, 4]]'"#);
1906        assert!(result.is_ok());
1907    }
1908
1909    #[test]
1910    fn parse_mixed_args() {
1911        let result = parse(r#"cmd pos1 key="val" pos2 num=42"#);
1912        assert!(result.is_ok());
1913        let program = result.expect("ok");
1914        match &program.statements[0] {
1915            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 4),
1916            _ => panic!("expected Command"),
1917        }
1918    }
1919
1920    #[test]
1921    fn error_unterminated_string() {
1922        let result = parse(r#"echo "hello"#);
1923        assert!(result.is_err());
1924    }
1925
1926    #[test]
1927    fn error_unterminated_var_ref() {
1928        let result = parse("echo ${VAR");
1929        assert!(result.is_err());
1930    }
1931
1932    #[test]
1933    fn error_missing_fi() {
1934        let result = parse("if true; then echo");
1935        assert!(result.is_err());
1936    }
1937
1938    #[test]
1939    fn error_missing_done() {
1940        let result = parse("for X in items; do echo");
1941        assert!(result.is_err());
1942    }
1943
1944    #[test]
1945    fn parse_nested_cmd_subst() {
1946        // Nested command substitution is supported
1947        let result = parse("X=$(echo $(date))").unwrap();
1948        match &result.statements[0] {
1949            Stmt::Assignment(a) => {
1950                assert_eq!(a.name, "X");
1951                match &a.value {
1952                    Expr::CommandSubst(outer) => {
1953                        assert_eq!(outer.commands[0].name, "echo");
1954                        // The argument should be another command substitution
1955                        match &outer.commands[0].args[0] {
1956                            Arg::Positional(Expr::CommandSubst(inner)) => {
1957                                assert_eq!(inner.commands[0].name, "date");
1958                            }
1959                            other => panic!("expected nested cmd subst, got {:?}", other),
1960                        }
1961                    }
1962                    other => panic!("expected cmd subst, got {:?}", other),
1963                }
1964            }
1965            other => panic!("expected assignment, got {:?}", other),
1966        }
1967    }
1968
1969    #[test]
1970    fn parse_deeply_nested_cmd_subst() {
1971        // Three levels deep
1972        let result = parse("X=$(a $(b $(c)))").unwrap();
1973        match &result.statements[0] {
1974            Stmt::Assignment(a) => match &a.value {
1975                Expr::CommandSubst(level1) => {
1976                    assert_eq!(level1.commands[0].name, "a");
1977                    match &level1.commands[0].args[0] {
1978                        Arg::Positional(Expr::CommandSubst(level2)) => {
1979                            assert_eq!(level2.commands[0].name, "b");
1980                            match &level2.commands[0].args[0] {
1981                                Arg::Positional(Expr::CommandSubst(level3)) => {
1982                                    assert_eq!(level3.commands[0].name, "c");
1983                                }
1984                                other => panic!("expected level3 cmd subst, got {:?}", other),
1985                            }
1986                        }
1987                        other => panic!("expected level2 cmd subst, got {:?}", other),
1988                    }
1989                }
1990                other => panic!("expected cmd subst, got {:?}", other),
1991            },
1992            other => panic!("expected assignment, got {:?}", other),
1993        }
1994    }
1995
1996    // ═══════════════════════════════════════════════════════════════════════════
1997    // Value Preservation Tests - These test that actual values are captured
1998    // ═══════════════════════════════════════════════════════════════════════════
1999
2000    #[test]
2001    fn value_int_preserved() {
2002        let result = parse("X=42").unwrap();
2003        match &result.statements[0] {
2004            Stmt::Assignment(a) => {
2005                assert_eq!(a.name, "X");
2006                match &a.value {
2007                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2008                    other => panic!("expected int literal, got {:?}", other),
2009                }
2010            }
2011            other => panic!("expected assignment, got {:?}", other),
2012        }
2013    }
2014
2015    #[test]
2016    fn value_negative_int_preserved() {
2017        let result = parse("X=-99").unwrap();
2018        match &result.statements[0] {
2019            Stmt::Assignment(a) => match &a.value {
2020                Expr::Literal(Value::Int(n)) => assert_eq!(*n, -99),
2021                other => panic!("expected int, got {:?}", other),
2022            },
2023            other => panic!("expected assignment, got {:?}", other),
2024        }
2025    }
2026
2027    #[test]
2028    fn value_float_preserved() {
2029        let result = parse("PI=3.14").unwrap();
2030        match &result.statements[0] {
2031            Stmt::Assignment(a) => match &a.value {
2032                Expr::Literal(Value::Float(f)) => assert!((*f - 3.14).abs() < 0.001),
2033                other => panic!("expected float, got {:?}", other),
2034            },
2035            other => panic!("expected assignment, got {:?}", other),
2036        }
2037    }
2038
2039    #[test]
2040    fn value_string_preserved() {
2041        let result = parse(r#"echo "hello world""#).unwrap();
2042        match &result.statements[0] {
2043            Stmt::Command(cmd) => {
2044                assert_eq!(cmd.name, "echo");
2045                match &cmd.args[0] {
2046                    Arg::Positional(Expr::Literal(Value::String(s))) => {
2047                        assert_eq!(s, "hello world");
2048                    }
2049                    other => panic!("expected string arg, got {:?}", other),
2050                }
2051            }
2052            other => panic!("expected command, got {:?}", other),
2053        }
2054    }
2055
2056    #[test]
2057    fn value_string_with_escapes_preserved() {
2058        let result = parse(r#"echo "line1\nline2""#).unwrap();
2059        match &result.statements[0] {
2060            Stmt::Command(cmd) => match &cmd.args[0] {
2061                Arg::Positional(Expr::Literal(Value::String(s))) => {
2062                    assert_eq!(s, "line1\nline2");
2063                }
2064                other => panic!("expected string, got {:?}", other),
2065            },
2066            other => panic!("expected command, got {:?}", other),
2067        }
2068    }
2069
2070    #[test]
2071    fn value_command_name_preserved() {
2072        let result = parse("my-command").unwrap();
2073        match &result.statements[0] {
2074            Stmt::Command(cmd) => assert_eq!(cmd.name, "my-command"),
2075            other => panic!("expected command, got {:?}", other),
2076        }
2077    }
2078
2079    #[test]
2080    fn value_assignment_name_preserved() {
2081        let result = parse("MY_VAR=1").unwrap();
2082        match &result.statements[0] {
2083            Stmt::Assignment(a) => assert_eq!(a.name, "MY_VAR"),
2084            other => panic!("expected assignment, got {:?}", other),
2085        }
2086    }
2087
2088    #[test]
2089    fn value_for_variable_preserved() {
2090        let result = parse("for ITEM in items; do echo; done").unwrap();
2091        match &result.statements[0] {
2092            Stmt::For(f) => assert_eq!(f.variable, "ITEM"),
2093            other => panic!("expected for, got {:?}", other),
2094        }
2095    }
2096
2097    #[test]
2098    fn value_varref_name_preserved() {
2099        let result = parse("echo ${MESSAGE}").unwrap();
2100        match &result.statements[0] {
2101            Stmt::Command(cmd) => match &cmd.args[0] {
2102                Arg::Positional(Expr::VarRef(path)) => {
2103                    assert_eq!(path.segments.len(), 1);
2104                    let VarSegment::Field(name) = &path.segments[0];
2105                    assert_eq!(name, "MESSAGE");
2106                }
2107                other => panic!("expected varref, got {:?}", other),
2108            },
2109            other => panic!("expected command, got {:?}", other),
2110        }
2111    }
2112
2113    #[test]
2114    fn value_varref_field_access_preserved() {
2115        let result = parse("echo ${RESULT.data}").unwrap();
2116        match &result.statements[0] {
2117            Stmt::Command(cmd) => match &cmd.args[0] {
2118                Arg::Positional(Expr::VarRef(path)) => {
2119                    assert_eq!(path.segments.len(), 2);
2120                    let VarSegment::Field(a) = &path.segments[0];
2121                    let VarSegment::Field(b) = &path.segments[1];
2122                    assert_eq!(a, "RESULT");
2123                    assert_eq!(b, "data");
2124                }
2125                other => panic!("expected varref, got {:?}", other),
2126            },
2127            other => panic!("expected command, got {:?}", other),
2128        }
2129    }
2130
2131    #[test]
2132    fn value_varref_index_ignored() {
2133        // Index segments are no longer supported - they're filtered out by parse_varpath
2134        let result = parse("echo ${ITEMS[0]}").unwrap();
2135        match &result.statements[0] {
2136            Stmt::Command(cmd) => match &cmd.args[0] {
2137                Arg::Positional(Expr::VarRef(path)) => {
2138                    // Index segment [0] is skipped, only ITEMS remains
2139                    assert_eq!(path.segments.len(), 1);
2140                    let VarSegment::Field(name) = &path.segments[0];
2141                    assert_eq!(name, "ITEMS");
2142                }
2143                other => panic!("expected varref, got {:?}", other),
2144            },
2145            other => panic!("expected command, got {:?}", other),
2146        }
2147    }
2148
2149    #[test]
2150    fn value_last_result_ref_preserved() {
2151        let result = parse("echo ${?.ok}").unwrap();
2152        match &result.statements[0] {
2153            Stmt::Command(cmd) => match &cmd.args[0] {
2154                Arg::Positional(Expr::VarRef(path)) => {
2155                    assert_eq!(path.segments.len(), 2);
2156                    let VarSegment::Field(name) = &path.segments[0];
2157                    assert_eq!(name, "?");
2158                }
2159                other => panic!("expected varref, got {:?}", other),
2160            },
2161            other => panic!("expected command, got {:?}", other),
2162        }
2163    }
2164
2165    #[test]
2166    fn value_named_arg_preserved() {
2167        let result = parse("cmd count=42").unwrap();
2168        match &result.statements[0] {
2169            Stmt::Command(cmd) => {
2170                assert_eq!(cmd.name, "cmd");
2171                match &cmd.args[0] {
2172                    Arg::Named { key, value } => {
2173                        assert_eq!(key, "count");
2174                        match value {
2175                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2176                            other => panic!("expected int, got {:?}", other),
2177                        }
2178                    }
2179                    other => panic!("expected named arg, got {:?}", other),
2180                }
2181            }
2182            other => panic!("expected command, got {:?}", other),
2183        }
2184    }
2185
2186    #[test]
2187    fn value_function_def_name_preserved() {
2188        let result = parse("greet() { echo }").unwrap();
2189        match &result.statements[0] {
2190            Stmt::ToolDef(t) => {
2191                assert_eq!(t.name, "greet");
2192                assert!(t.params.is_empty());
2193            }
2194            other => panic!("expected function def, got {:?}", other),
2195        }
2196    }
2197
2198    // ═══════════════════════════════════════════════════════════════════════════
2199    // New Feature Tests - Comparisons, Interpolation, Nested Structures
2200    // ═══════════════════════════════════════════════════════════════════════════
2201
2202    #[test]
2203    fn parse_comparison_equals() {
2204        // Shell-compatible: use [[ ]] for comparisons
2205        let result = parse("if [[ ${X} == 5 ]]; then echo; fi").unwrap();
2206        match &result.statements[0] {
2207            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2208                Expr::Test(test) => match test.as_ref() {
2209                    TestExpr::Comparison { left, op, right } => {
2210                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2211                        assert_eq!(*op, TestCmpOp::Eq);
2212                        match right.as_ref() {
2213                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 5),
2214                            other => panic!("expected int, got {:?}", other),
2215                        }
2216                    }
2217                    other => panic!("expected comparison, got {:?}", other),
2218                },
2219                other => panic!("expected test expr, got {:?}", other),
2220            },
2221            other => panic!("expected if, got {:?}", other),
2222        }
2223    }
2224
2225    #[test]
2226    fn parse_comparison_not_equals() {
2227        let result = parse("if [[ ${X} != 0 ]]; then echo; fi").unwrap();
2228        match &result.statements[0] {
2229            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2230                Expr::Test(test) => match test.as_ref() {
2231                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotEq),
2232                    other => panic!("expected comparison, got {:?}", other),
2233                },
2234                other => panic!("expected test expr, got {:?}", other),
2235            },
2236            other => panic!("expected if, got {:?}", other),
2237        }
2238    }
2239
2240    #[test]
2241    fn parse_comparison_less_than() {
2242        let result = parse("if [[ ${COUNT} -lt 10 ]]; then echo; fi").unwrap();
2243        match &result.statements[0] {
2244            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2245                Expr::Test(test) => match test.as_ref() {
2246                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Lt),
2247                    other => panic!("expected comparison, got {:?}", other),
2248                },
2249                other => panic!("expected test expr, got {:?}", other),
2250            },
2251            other => panic!("expected if, got {:?}", other),
2252        }
2253    }
2254
2255    #[test]
2256    fn parse_comparison_greater_than() {
2257        let result = parse("if [[ ${COUNT} -gt 0 ]]; then echo; fi").unwrap();
2258        match &result.statements[0] {
2259            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2260                Expr::Test(test) => match test.as_ref() {
2261                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Gt),
2262                    other => panic!("expected comparison, got {:?}", other),
2263                },
2264                other => panic!("expected test expr, got {:?}", other),
2265            },
2266            other => panic!("expected if, got {:?}", other),
2267        }
2268    }
2269
2270    #[test]
2271    fn parse_comparison_less_equal() {
2272        let result = parse("if [[ ${X} -le 100 ]]; then echo; fi").unwrap();
2273        match &result.statements[0] {
2274            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2275                Expr::Test(test) => match test.as_ref() {
2276                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::LtEq),
2277                    other => panic!("expected comparison, got {:?}", other),
2278                },
2279                other => panic!("expected test expr, got {:?}", other),
2280            },
2281            other => panic!("expected if, got {:?}", other),
2282        }
2283    }
2284
2285    #[test]
2286    fn parse_comparison_greater_equal() {
2287        let result = parse("if [[ ${X} -ge 1 ]]; then echo; fi").unwrap();
2288        match &result.statements[0] {
2289            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2290                Expr::Test(test) => match test.as_ref() {
2291                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::GtEq),
2292                    other => panic!("expected comparison, got {:?}", other),
2293                },
2294                other => panic!("expected test expr, got {:?}", other),
2295            },
2296            other => panic!("expected if, got {:?}", other),
2297        }
2298    }
2299
2300    #[test]
2301    fn parse_regex_match() {
2302        let result = parse(r#"if [[ ${NAME} =~ "^test" ]]; then echo; fi"#).unwrap();
2303        match &result.statements[0] {
2304            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2305                Expr::Test(test) => match test.as_ref() {
2306                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Match),
2307                    other => panic!("expected comparison, got {:?}", other),
2308                },
2309                other => panic!("expected test expr, got {:?}", other),
2310            },
2311            other => panic!("expected if, got {:?}", other),
2312        }
2313    }
2314
2315    #[test]
2316    fn parse_regex_not_match() {
2317        let result = parse(r#"if [[ ${NAME} !~ "^test" ]]; then echo; fi"#).unwrap();
2318        match &result.statements[0] {
2319            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2320                Expr::Test(test) => match test.as_ref() {
2321                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotMatch),
2322                    other => panic!("expected comparison, got {:?}", other),
2323                },
2324                other => panic!("expected test expr, got {:?}", other),
2325            },
2326            other => panic!("expected if, got {:?}", other),
2327        }
2328    }
2329
2330    #[test]
2331    fn parse_string_interpolation() {
2332        let result = parse(r#"echo "Hello ${NAME}!""#).unwrap();
2333        match &result.statements[0] {
2334            Stmt::Command(cmd) => match &cmd.args[0] {
2335                Arg::Positional(Expr::Interpolated(parts)) => {
2336                    assert_eq!(parts.len(), 3);
2337                    match &parts[0] {
2338                        StringPart::Literal(s) => assert_eq!(s, "Hello "),
2339                        other => panic!("expected literal, got {:?}", other),
2340                    }
2341                    match &parts[1] {
2342                        StringPart::Var(path) => {
2343                            assert_eq!(path.segments.len(), 1);
2344                            let VarSegment::Field(name) = &path.segments[0];
2345                            assert_eq!(name, "NAME");
2346                        }
2347                        other => panic!("expected var, got {:?}", other),
2348                    }
2349                    match &parts[2] {
2350                        StringPart::Literal(s) => assert_eq!(s, "!"),
2351                        other => panic!("expected literal, got {:?}", other),
2352                    }
2353                }
2354                other => panic!("expected interpolated, got {:?}", other),
2355            },
2356            other => panic!("expected command, got {:?}", other),
2357        }
2358    }
2359
2360    #[test]
2361    fn parse_string_interpolation_multiple_vars() {
2362        let result = parse(r#"echo "${FIRST} and ${SECOND}""#).unwrap();
2363        match &result.statements[0] {
2364            Stmt::Command(cmd) => match &cmd.args[0] {
2365                Arg::Positional(Expr::Interpolated(parts)) => {
2366                    // ${FIRST} + " and " + ${SECOND} = 3 parts
2367                    assert_eq!(parts.len(), 3);
2368                    assert!(matches!(&parts[0], StringPart::Var(_)));
2369                    assert!(matches!(&parts[1], StringPart::Literal(_)));
2370                    assert!(matches!(&parts[2], StringPart::Var(_)));
2371                }
2372                other => panic!("expected interpolated, got {:?}", other),
2373            },
2374            other => panic!("expected command, got {:?}", other),
2375        }
2376    }
2377
2378    #[test]
2379    fn parse_empty_function_body() {
2380        let result = parse("empty() { }").unwrap();
2381        match &result.statements[0] {
2382            Stmt::ToolDef(t) => {
2383                assert_eq!(t.name, "empty");
2384                assert!(t.params.is_empty());
2385                assert!(t.body.is_empty());
2386            }
2387            other => panic!("expected function def, got {:?}", other),
2388        }
2389    }
2390
2391    #[test]
2392    fn parse_bash_style_function() {
2393        let result = parse("function greet { echo hello }").unwrap();
2394        match &result.statements[0] {
2395            Stmt::ToolDef(t) => {
2396                assert_eq!(t.name, "greet");
2397                assert!(t.params.is_empty());
2398                assert_eq!(t.body.len(), 1);
2399            }
2400            other => panic!("expected function def, got {:?}", other),
2401        }
2402    }
2403
2404    #[test]
2405    fn parse_comparison_string_values() {
2406        let result = parse(r#"if [[ ${STATUS} == "ok" ]]; then echo; fi"#).unwrap();
2407        match &result.statements[0] {
2408            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2409                Expr::Test(test) => match test.as_ref() {
2410                    TestExpr::Comparison { left, op, right } => {
2411                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2412                        assert_eq!(*op, TestCmpOp::Eq);
2413                        match right.as_ref() {
2414                            Expr::Literal(Value::String(s)) => assert_eq!(s, "ok"),
2415                            other => panic!("expected string, got {:?}", other),
2416                        }
2417                    }
2418                    other => panic!("expected comparison, got {:?}", other),
2419                },
2420                other => panic!("expected test expr, got {:?}", other),
2421            },
2422            other => panic!("expected if, got {:?}", other),
2423        }
2424    }
2425
2426    // ═══════════════════════════════════════════════════════════════════════════
2427    // Command Substitution Tests
2428    // ═══════════════════════════════════════════════════════════════════════════
2429
2430    #[test]
2431    fn parse_cmd_subst_simple() {
2432        let result = parse("X=$(echo)").unwrap();
2433        match &result.statements[0] {
2434            Stmt::Assignment(a) => {
2435                assert_eq!(a.name, "X");
2436                match &a.value {
2437                    Expr::CommandSubst(pipeline) => {
2438                        assert_eq!(pipeline.commands.len(), 1);
2439                        assert_eq!(pipeline.commands[0].name, "echo");
2440                    }
2441                    other => panic!("expected command subst, got {:?}", other),
2442                }
2443            }
2444            other => panic!("expected assignment, got {:?}", other),
2445        }
2446    }
2447
2448    #[test]
2449    fn parse_cmd_subst_with_args() {
2450        let result = parse(r#"X=$(fetch url="http://example.com")"#).unwrap();
2451        match &result.statements[0] {
2452            Stmt::Assignment(a) => match &a.value {
2453                Expr::CommandSubst(pipeline) => {
2454                    assert_eq!(pipeline.commands[0].name, "fetch");
2455                    assert_eq!(pipeline.commands[0].args.len(), 1);
2456                    match &pipeline.commands[0].args[0] {
2457                        Arg::Named { key, .. } => assert_eq!(key, "url"),
2458                        other => panic!("expected named arg, got {:?}", other),
2459                    }
2460                }
2461                other => panic!("expected command subst, got {:?}", other),
2462            },
2463            other => panic!("expected assignment, got {:?}", other),
2464        }
2465    }
2466
2467    #[test]
2468    fn parse_cmd_subst_pipeline() {
2469        let result = parse("X=$(cat file | grep pattern)").unwrap();
2470        match &result.statements[0] {
2471            Stmt::Assignment(a) => match &a.value {
2472                Expr::CommandSubst(pipeline) => {
2473                    assert_eq!(pipeline.commands.len(), 2);
2474                    assert_eq!(pipeline.commands[0].name, "cat");
2475                    assert_eq!(pipeline.commands[1].name, "grep");
2476                }
2477                other => panic!("expected command subst, got {:?}", other),
2478            },
2479            other => panic!("expected assignment, got {:?}", other),
2480        }
2481    }
2482
2483    #[test]
2484    fn parse_cmd_subst_in_condition() {
2485        // Shell-compatible: conditions are commands, not command substitutions
2486        let result = parse("if kaish-validate; then echo; fi").unwrap();
2487        match &result.statements[0] {
2488            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2489                Expr::Command(cmd) => {
2490                    assert_eq!(cmd.name, "kaish-validate");
2491                }
2492                other => panic!("expected command, got {:?}", other),
2493            },
2494            other => panic!("expected if, got {:?}", other),
2495        }
2496    }
2497
2498    #[test]
2499    fn parse_cmd_subst_in_command_arg() {
2500        let result = parse("echo $(whoami)").unwrap();
2501        match &result.statements[0] {
2502            Stmt::Command(cmd) => {
2503                assert_eq!(cmd.name, "echo");
2504                match &cmd.args[0] {
2505                    Arg::Positional(Expr::CommandSubst(pipeline)) => {
2506                        assert_eq!(pipeline.commands[0].name, "whoami");
2507                    }
2508                    other => panic!("expected command subst, got {:?}", other),
2509                }
2510            }
2511            other => panic!("expected command, got {:?}", other),
2512        }
2513    }
2514
2515    // ═══════════════════════════════════════════════════════════════════════════
2516    // Logical Operator Tests (&&, ||)
2517    // ═══════════════════════════════════════════════════════════════════════════
2518
2519    #[test]
2520    fn parse_condition_and() {
2521        // Shell-compatible: commands chained with &&
2522        let result = parse("if check-a && check-b; then echo; fi").unwrap();
2523        match &result.statements[0] {
2524            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2525                Expr::BinaryOp { left, op, right } => {
2526                    assert_eq!(*op, BinaryOp::And);
2527                    assert!(matches!(left.as_ref(), Expr::Command(_)));
2528                    assert!(matches!(right.as_ref(), Expr::Command(_)));
2529                }
2530                other => panic!("expected binary op, got {:?}", other),
2531            },
2532            other => panic!("expected if, got {:?}", other),
2533        }
2534    }
2535
2536    #[test]
2537    fn parse_condition_or() {
2538        let result = parse("if try-a || try-b; then echo; fi").unwrap();
2539        match &result.statements[0] {
2540            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2541                Expr::BinaryOp { left, op, right } => {
2542                    assert_eq!(*op, BinaryOp::Or);
2543                    assert!(matches!(left.as_ref(), Expr::Command(_)));
2544                    assert!(matches!(right.as_ref(), Expr::Command(_)));
2545                }
2546                other => panic!("expected binary op, got {:?}", other),
2547            },
2548            other => panic!("expected if, got {:?}", other),
2549        }
2550    }
2551
2552    #[test]
2553    fn parse_condition_and_or_precedence() {
2554        // a && b || c should parse as (a && b) || c
2555        let result = parse("if cmd-a && cmd-b || cmd-c; then echo; fi").unwrap();
2556        match &result.statements[0] {
2557            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2558                Expr::BinaryOp { left, op, right } => {
2559                    // Top level should be ||
2560                    assert_eq!(*op, BinaryOp::Or);
2561                    // Left side should be && expression
2562                    match left.as_ref() {
2563                        Expr::BinaryOp { op: inner_op, .. } => {
2564                            assert_eq!(*inner_op, BinaryOp::And);
2565                        }
2566                        other => panic!("expected binary op (&&), got {:?}", other),
2567                    }
2568                    // Right side should be command
2569                    assert!(matches!(right.as_ref(), Expr::Command(_)));
2570                }
2571                other => panic!("expected binary op, got {:?}", other),
2572            },
2573            other => panic!("expected if, got {:?}", other),
2574        }
2575    }
2576
2577    #[test]
2578    fn parse_condition_multiple_and() {
2579        let result = parse("if cmd-a && cmd-b && cmd-c; then echo; fi").unwrap();
2580        match &result.statements[0] {
2581            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2582                Expr::BinaryOp { left, op, .. } => {
2583                    assert_eq!(*op, BinaryOp::And);
2584                    // Left side should also be &&
2585                    match left.as_ref() {
2586                        Expr::BinaryOp { op: inner_op, .. } => {
2587                            assert_eq!(*inner_op, BinaryOp::And);
2588                        }
2589                        other => panic!("expected binary op, got {:?}", other),
2590                    }
2591                }
2592                other => panic!("expected binary op, got {:?}", other),
2593            },
2594            other => panic!("expected if, got {:?}", other),
2595        }
2596    }
2597
2598    #[test]
2599    fn parse_condition_mixed_comparison_and_logical() {
2600        // Shell-compatible: use [[ ]] for comparisons, && to chain them
2601        let result = parse("if [[ ${X} == 5 ]] && [[ ${Y} -gt 0 ]]; then echo; fi").unwrap();
2602        match &result.statements[0] {
2603            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2604                Expr::BinaryOp { left, op, right } => {
2605                    assert_eq!(*op, BinaryOp::And);
2606                    // Left: [[ ${X} == 5 ]]
2607                    match left.as_ref() {
2608                        Expr::Test(test) => match test.as_ref() {
2609                            TestExpr::Comparison { op: left_op, .. } => {
2610                                assert_eq!(*left_op, TestCmpOp::Eq);
2611                            }
2612                            other => panic!("expected comparison, got {:?}", other),
2613                        },
2614                        other => panic!("expected test, got {:?}", other),
2615                    }
2616                    // Right: [[ ${Y} -gt 0 ]]
2617                    match right.as_ref() {
2618                        Expr::Test(test) => match test.as_ref() {
2619                            TestExpr::Comparison { op: right_op, .. } => {
2620                                assert_eq!(*right_op, TestCmpOp::Gt);
2621                            }
2622                            other => panic!("expected comparison, got {:?}", other),
2623                        },
2624                        other => panic!("expected test, got {:?}", other),
2625                    }
2626                }
2627                other => panic!("expected binary op, got {:?}", other),
2628            },
2629            other => panic!("expected if, got {:?}", other),
2630        }
2631    }
2632
2633    // ═══════════════════════════════════════════════════════════════════════════
2634    // Integration Tests - Complete Scripts
2635    // ═══════════════════════════════════════════════════════════════════════════
2636
2637    /// Level 1: Linear script using core features
2638    #[test]
2639    fn script_level1_linear() {
2640        let script = r#"
2641NAME="kaish"
2642VERSION=1
2643TIMEOUT=30
2644ITEMS="alpha beta gamma"
2645
2646echo "Starting ${NAME} v${VERSION}"
2647cat "README.md" | grep pattern="install" | head count=5
2648fetch url="https://api.example.com/status" timeout=${TIMEOUT} > "/tmp/status.json"
2649echo "Items: ${ITEMS}"
2650"#;
2651        let result = parse(script).unwrap();
2652        let stmts: Vec<_> = result.statements.iter()
2653            .filter(|s| !matches!(s, Stmt::Empty))
2654            .collect();
2655
2656        assert_eq!(stmts.len(), 8);
2657        assert!(matches!(stmts[0], Stmt::Assignment(_)));  // set NAME
2658        assert!(matches!(stmts[1], Stmt::Assignment(_)));  // set VERSION
2659        assert!(matches!(stmts[2], Stmt::Assignment(_)));  // set TIMEOUT
2660        assert!(matches!(stmts[3], Stmt::Assignment(_)));  // set ITEMS
2661        assert!(matches!(stmts[4], Stmt::Command(_)));     // echo "Starting..."
2662        assert!(matches!(stmts[5], Stmt::Pipeline(_)));    // cat | grep | head
2663        assert!(matches!(stmts[6], Stmt::Pipeline(_)));    // fetch (with redirect - Pipeline since it has redirects)
2664        assert!(matches!(stmts[7], Stmt::Command(_)));     // echo "Items: ${ITEMS}"
2665    }
2666
2667    /// Level 2: Script with conditionals (shell-compatible syntax)
2668    #[test]
2669    fn script_level2_branching() {
2670        let script = r#"
2671RESULT=$(kaish-validate "input.json")
2672
2673if [[ ${RESULT.ok} == true ]]; then
2674    echo "Validation passed"
2675    process "input.json" > "output.json"
2676else
2677    echo "Validation failed: ${RESULT.err}"
2678fi
2679
2680if [[ ${COUNT} -gt 0 ]] && [[ ${COUNT} -le 100 ]]; then
2681    echo "Count in valid range"
2682fi
2683
2684if check-network || check-cache; then
2685    fetch url=${URL}
2686fi
2687"#;
2688        let result = parse(script).unwrap();
2689        let stmts: Vec<_> = result.statements.iter()
2690            .filter(|s| !matches!(s, Stmt::Empty))
2691            .collect();
2692
2693        assert_eq!(stmts.len(), 4);
2694
2695        // First: assignment with command substitution
2696        match stmts[0] {
2697            Stmt::Assignment(a) => {
2698                assert_eq!(a.name, "RESULT");
2699                assert!(matches!(&a.value, Expr::CommandSubst(_)));
2700            }
2701            other => panic!("expected assignment, got {:?}", other),
2702        }
2703
2704        // Second: if/else
2705        match stmts[1] {
2706            Stmt::If(if_stmt) => {
2707                assert_eq!(if_stmt.then_branch.len(), 2);
2708                assert!(if_stmt.else_branch.is_some());
2709                assert_eq!(if_stmt.else_branch.as_ref().unwrap().len(), 1);
2710            }
2711            other => panic!("expected if, got {:?}", other),
2712        }
2713
2714        // Third: if with && condition
2715        match stmts[2] {
2716            Stmt::If(if_stmt) => {
2717                match if_stmt.condition.as_ref() {
2718                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
2719                    other => panic!("expected && condition, got {:?}", other),
2720                }
2721            }
2722            other => panic!("expected if, got {:?}", other),
2723        }
2724
2725        // Fourth: if with || of commands
2726        match stmts[3] {
2727            Stmt::If(if_stmt) => {
2728                match if_stmt.condition.as_ref() {
2729                    Expr::BinaryOp { op, left, right } => {
2730                        assert_eq!(*op, BinaryOp::Or);
2731                        assert!(matches!(left.as_ref(), Expr::Command(_)));
2732                        assert!(matches!(right.as_ref(), Expr::Command(_)));
2733                    }
2734                    other => panic!("expected || condition, got {:?}", other),
2735                }
2736            }
2737            other => panic!("expected if, got {:?}", other),
2738        }
2739    }
2740
2741    /// Level 3: Script with loops and function definitions
2742    #[test]
2743    fn script_level3_loops_and_functions() {
2744        let script = r#"
2745greet() {
2746    echo "Hello, $1!"
2747}
2748
2749fetch_all() {
2750    for URL in $@; do
2751        fetch url=${URL}
2752    done
2753}
2754
2755USERS="alice bob charlie"
2756
2757for USER in ${USERS}; do
2758    greet ${USER}
2759    if [[ ${USER} == "bob" ]]; then
2760        echo "Found Bob!"
2761    fi
2762done
2763
2764long-running-task &
2765"#;
2766        let result = parse(script).unwrap();
2767        let stmts: Vec<_> = result.statements.iter()
2768            .filter(|s| !matches!(s, Stmt::Empty))
2769            .collect();
2770
2771        assert_eq!(stmts.len(), 5);
2772
2773        // First function def
2774        match stmts[0] {
2775            Stmt::ToolDef(t) => {
2776                assert_eq!(t.name, "greet");
2777                assert!(t.params.is_empty());
2778            }
2779            other => panic!("expected function def, got {:?}", other),
2780        }
2781
2782        // Second function def with nested for loop
2783        match stmts[1] {
2784            Stmt::ToolDef(t) => {
2785                assert_eq!(t.name, "fetch_all");
2786                assert_eq!(t.body.len(), 1);
2787                assert!(matches!(&t.body[0], Stmt::For(_)));
2788            }
2789            other => panic!("expected function def, got {:?}", other),
2790        }
2791
2792        // Assignment
2793        assert!(matches!(stmts[2], Stmt::Assignment(_)));
2794
2795        // For loop with nested if
2796        match stmts[3] {
2797            Stmt::For(f) => {
2798                assert_eq!(f.variable, "USER");
2799                assert_eq!(f.body.len(), 2);
2800                assert!(matches!(&f.body[0], Stmt::Command(_)));
2801                assert!(matches!(&f.body[1], Stmt::If(_)));
2802            }
2803            other => panic!("expected for loop, got {:?}", other),
2804        }
2805
2806        // Background job
2807        match stmts[4] {
2808            Stmt::Pipeline(p) => {
2809                assert!(p.background);
2810                assert_eq!(p.commands[0].name, "long-running-task");
2811            }
2812            other => panic!("expected pipeline (background), got {:?}", other),
2813        }
2814    }
2815
2816    /// Level 4: Complex nested control flow (shell-compatible syntax)
2817    #[test]
2818    fn script_level4_complex_nesting() {
2819        let script = r#"
2820RESULT=$(cat "config.json" | jq query=".servers" | kaish-validate schema="server-schema.json")
2821
2822if ping host=${HOST} && [[ ${RESULT} == true ]]; then
2823    for SERVER in "prod-1 prod-2"; do
2824        deploy target=${SERVER} port=8080
2825        if [[ ${?.code} != 0 ]]; then
2826            notify channel="ops" message="Deploy failed"
2827        fi
2828    done
2829fi
2830"#;
2831        let result = parse(script).unwrap();
2832        let stmts: Vec<_> = result.statements.iter()
2833            .filter(|s| !matches!(s, Stmt::Empty))
2834            .collect();
2835
2836        assert_eq!(stmts.len(), 2);
2837
2838        // Command substitution with pipeline
2839        match stmts[0] {
2840            Stmt::Assignment(a) => {
2841                assert_eq!(a.name, "RESULT");
2842                match &a.value {
2843                    Expr::CommandSubst(pipeline) => {
2844                        assert_eq!(pipeline.commands.len(), 3);
2845                    }
2846                    other => panic!("expected command subst, got {:?}", other),
2847                }
2848            }
2849            other => panic!("expected assignment, got {:?}", other),
2850        }
2851
2852        // If with && condition, containing for loop with nested if
2853        match stmts[1] {
2854            Stmt::If(if_stmt) => {
2855                match if_stmt.condition.as_ref() {
2856                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
2857                    other => panic!("expected && condition, got {:?}", other),
2858                }
2859                assert_eq!(if_stmt.then_branch.len(), 1);
2860                match &if_stmt.then_branch[0] {
2861                    Stmt::For(f) => {
2862                        assert_eq!(f.body.len(), 2);
2863                        assert!(matches!(&f.body[1], Stmt::If(_)));
2864                    }
2865                    other => panic!("expected for in if body, got {:?}", other),
2866                }
2867            }
2868            other => panic!("expected if, got {:?}", other),
2869        }
2870    }
2871
2872    /// Level 5: Edge cases and parser stress test
2873    #[test]
2874    fn script_level5_edge_cases() {
2875        let script = r#"
2876echo ""
2877echo "quotes: \"nested\" here"
2878echo "escapes: \n\t\r\\"
2879echo "unicode: \u2764"
2880
2881X=-99999
2882Y=3.14159265358979
2883Z=-0.001
2884
2885cmd a=1 b="two" c=true d=false e=null
2886
2887if true; then
2888    if false; then
2889        echo "inner"
2890    else
2891        echo "else"
2892    fi
2893fi
2894
2895for I in "a b c"; do
2896    echo ${I}
2897done
2898
2899no_params() {
2900    echo "no params"
2901}
2902
2903function all_args {
2904    echo "args: $@"
2905}
2906
2907a | b | c | d | e &
2908cmd 2> "errors.log"
2909cmd &> "all.log"
2910cmd >> "append.log"
2911cmd < "input.txt"
2912"#;
2913        let result = parse(script).unwrap();
2914        let stmts: Vec<_> = result.statements.iter()
2915            .filter(|s| !matches!(s, Stmt::Empty))
2916            .collect();
2917
2918        // Verify it parses without error
2919        assert!(stmts.len() >= 10, "expected many statements, got {}", stmts.len());
2920
2921        // Background pipeline
2922        let bg_stmt = stmts.iter().find(|s| matches!(s, Stmt::Pipeline(p) if p.background));
2923        assert!(bg_stmt.is_some(), "expected background pipeline");
2924
2925        match bg_stmt.unwrap() {
2926            Stmt::Pipeline(p) => {
2927                assert_eq!(p.commands.len(), 5);
2928                assert!(p.background);
2929            }
2930            _ => unreachable!(),
2931        }
2932    }
2933
2934    // ═══════════════════════════════════════════════════════════════════════════
2935    // Edge Case Tests: Ambiguity Resolution
2936    // ═══════════════════════════════════════════════════════════════════════════
2937
2938    #[test]
2939    fn parse_keyword_as_variable_rejected() {
2940        // Keywords CANNOT be used as variable names - this is intentional
2941        // to avoid ambiguity. Use different names instead.
2942        let result = parse(r#"if="value""#);
2943        assert!(result.is_err(), "if= should fail - 'if' is a keyword");
2944
2945        let result = parse("while=true");
2946        assert!(result.is_err(), "while= should fail - 'while' is a keyword");
2947
2948        let result = parse(r#"then="next""#);
2949        assert!(result.is_err(), "then= should fail - 'then' is a keyword");
2950    }
2951
2952    #[test]
2953    fn parse_set_command_with_flag() {
2954        let result = parse("set -e");
2955        assert!(result.is_ok(), "failed to parse set -e: {:?}", result);
2956        let program = result.unwrap();
2957        match &program.statements[0] {
2958            Stmt::Command(cmd) => {
2959                assert_eq!(cmd.name, "set");
2960                assert_eq!(cmd.args.len(), 1);
2961                match &cmd.args[0] {
2962                    Arg::ShortFlag(f) => assert_eq!(f, "e"),
2963                    other => panic!("expected ShortFlag, got {:?}", other),
2964                }
2965            }
2966            other => panic!("expected Command, got {:?}", other),
2967        }
2968    }
2969
2970    #[test]
2971    fn parse_set_command_no_args() {
2972        let result = parse("set");
2973        assert!(result.is_ok(), "failed to parse set: {:?}", result);
2974        let program = result.unwrap();
2975        match &program.statements[0] {
2976            Stmt::Command(cmd) => {
2977                assert_eq!(cmd.name, "set");
2978                assert_eq!(cmd.args.len(), 0);
2979            }
2980            other => panic!("expected Command, got {:?}", other),
2981        }
2982    }
2983
2984    #[test]
2985    fn parse_set_assignment_vs_command() {
2986        // X=5 should be assignment
2987        let result = parse("X=5");
2988        assert!(result.is_ok());
2989        let program = result.unwrap();
2990        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
2991
2992        // set -e should be command
2993        let result = parse("set -e");
2994        assert!(result.is_ok());
2995        let program = result.unwrap();
2996        assert!(matches!(&program.statements[0], Stmt::Command(_)));
2997    }
2998
2999    #[test]
3000    fn parse_true_as_command() {
3001        let result = parse("true");
3002        assert!(result.is_ok());
3003        let program = result.unwrap();
3004        match &program.statements[0] {
3005            Stmt::Command(cmd) => assert_eq!(cmd.name, "true"),
3006            other => panic!("expected Command(true), got {:?}", other),
3007        }
3008    }
3009
3010    #[test]
3011    fn parse_false_as_command() {
3012        let result = parse("false");
3013        assert!(result.is_ok());
3014        let program = result.unwrap();
3015        match &program.statements[0] {
3016            Stmt::Command(cmd) => assert_eq!(cmd.name, "false"),
3017            other => panic!("expected Command(false), got {:?}", other),
3018        }
3019    }
3020
3021    #[test]
3022    fn parse_dot_as_source_alias() {
3023        let result = parse(". script.kai");
3024        assert!(result.is_ok(), "failed to parse . script.kai: {:?}", result);
3025        let program = result.unwrap();
3026        match &program.statements[0] {
3027            Stmt::Command(cmd) => {
3028                assert_eq!(cmd.name, ".");
3029                assert_eq!(cmd.args.len(), 1);
3030            }
3031            other => panic!("expected Command(.), got {:?}", other),
3032        }
3033    }
3034
3035    #[test]
3036    fn parse_source_command() {
3037        let result = parse("source utils.kai");
3038        assert!(result.is_ok(), "failed to parse source: {:?}", result);
3039        let program = result.unwrap();
3040        match &program.statements[0] {
3041            Stmt::Command(cmd) => {
3042                assert_eq!(cmd.name, "source");
3043                assert_eq!(cmd.args.len(), 1);
3044            }
3045            other => panic!("expected Command(source), got {:?}", other),
3046        }
3047    }
3048
3049    #[test]
3050    fn parse_test_expr_file_test() {
3051        // Paths must be quoted strings in test expressions
3052        let result = parse(r#"[[ -f "/path/file" ]]"#);
3053        assert!(result.is_ok(), "failed to parse file test: {:?}", result);
3054    }
3055
3056    #[test]
3057    fn parse_test_expr_comparison() {
3058        let result = parse(r#"[[ $X == "value" ]]"#);
3059        assert!(result.is_ok(), "failed to parse comparison test: {:?}", result);
3060    }
3061
3062    #[test]
3063    fn parse_test_expr_single_eq() {
3064        // = and == are equivalent inside [[ ]] (matching bash behavior)
3065        let result = parse(r#"[[ $X = "value" ]]"#);
3066        assert!(result.is_ok(), "failed to parse single-= comparison: {:?}", result);
3067        let program = result.unwrap();
3068        match &program.statements[0] {
3069            Stmt::Test(TestExpr::Comparison { op, .. }) => {
3070                assert_eq!(op, &TestCmpOp::Eq);
3071            }
3072            other => panic!("expected Test(Comparison), got {:?}", other),
3073        }
3074    }
3075
3076    #[test]
3077    fn parse_while_loop() {
3078        let result = parse("while true; do echo; done");
3079        assert!(result.is_ok(), "failed to parse while loop: {:?}", result);
3080        let program = result.unwrap();
3081        assert!(matches!(&program.statements[0], Stmt::While(_)));
3082    }
3083
3084    #[test]
3085    fn parse_break_with_level() {
3086        let result = parse("break 2");
3087        assert!(result.is_ok());
3088        let program = result.unwrap();
3089        match &program.statements[0] {
3090            Stmt::Break(Some(n)) => assert_eq!(*n, 2),
3091            other => panic!("expected Break(2), got {:?}", other),
3092        }
3093    }
3094
3095    #[test]
3096    fn parse_continue_with_level() {
3097        let result = parse("continue 3");
3098        assert!(result.is_ok());
3099        let program = result.unwrap();
3100        match &program.statements[0] {
3101            Stmt::Continue(Some(n)) => assert_eq!(*n, 3),
3102            other => panic!("expected Continue(3), got {:?}", other),
3103        }
3104    }
3105
3106    #[test]
3107    fn parse_exit_with_code() {
3108        let result = parse("exit 1");
3109        assert!(result.is_ok());
3110        let program = result.unwrap();
3111        match &program.statements[0] {
3112            Stmt::Exit(Some(expr)) => {
3113                match expr.as_ref() {
3114                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 1),
3115                    other => panic!("expected Int(1), got {:?}", other),
3116                }
3117            }
3118            other => panic!("expected Exit(1), got {:?}", other),
3119        }
3120    }
3121}
kaish_kernel/parser.rs

kaish_kernel/
parser.rs