kaish_kernel/
parser.rs

1//! Parser for kaish source code.
2//!
3//! Transforms a token stream from the lexer into an Abstract Syntax Tree.
4//! Uses chumsky for parser combinators with good error recovery.
5
6use crate::ast::{
7    Arg, Assignment, BinaryOp, CaseBranch, CaseStmt, Command, Expr, FileTestOp, ForLoop, IfStmt,
8    Pipeline, Program, Redirect, RedirectKind, SpannedPart, Stmt, StringPart, StringTestOp,
9    TestCmpOp, TestExpr, ToolDef, Value, VarPath, VarSegment, WhileLoop,
10};
11use crate::lexer::{self, HereDocData, Token};
12use chumsky::{input::ValueInput, prelude::*};
13
14/// Span type used throughout the parser.
15pub type Span = SimpleSpan;
16
17/// Parse a raw `${...}` string into an Expr.
18///
19/// Handles:
20/// - Special variables: `${?}` → LastExitCode, `${$}` → CurrentPid
21/// - Simple paths: `${VAR}`, `${VAR.field}`, `${VAR[0]}` → VarRef
22/// - Default values: `${VAR:-default}` → VarWithDefault (with nested expansion support)
23fn parse_var_expr(raw: &str) -> Expr {
24    // Special case: ${?} is the last exit code (same as $?)
25    if raw == "${?}" {
26        return Expr::LastExitCode;
27    }
28
29    // Special case: ${$} is the current PID (same as $$)
30    if raw == "${$}" {
31        return Expr::CurrentPid;
32    }
33
34    // Check for default value syntax: ${VAR:-default}
35    // Need to find :- that's not inside a nested ${...}
36    if let Some(colon_idx) = find_default_separator(raw) {
37        // Extract variable name (between ${ and :-)
38        let name = raw[2..colon_idx].to_string();
39        // Extract default value (between :- and }) and recursively parse it
40        let default_str = &raw[colon_idx + 2..raw.len() - 1];
41        let default = parse_interpolated_string(default_str);
42        return Expr::VarWithDefault { name, default };
43    }
44
45    // Regular variable path
46    Expr::VarRef(parse_varpath(raw))
47}
48
49/// Find the position of :- in a ${VAR:-default} expression, accounting for nested ${...}.
50fn find_default_separator(raw: &str) -> Option<usize> {
51    let bytes = raw.as_bytes();
52    let mut depth = 0;
53    let mut i = 0;
54
55    while i < bytes.len() {
56        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
57            depth += 1;
58            i += 2;
59            continue;
60        }
61        if bytes[i] == b'}' && depth > 0 {
62            depth -= 1;
63            i += 1;
64            continue;
65        }
66        // Only find :- at the top level (depth == 1 means we're inside the outer ${...})
67        if depth == 1 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
68            return Some(i);
69        }
70        i += 1;
71    }
72    None
73}
74
75/// Find the position of :- in variable content (without outer braces), accounting for nested ${...}.
76fn find_default_separator_in_content(content: &str) -> Option<usize> {
77    let bytes = content.as_bytes();
78    let mut depth = 0;
79    let mut i = 0;
80
81    while i < bytes.len() {
82        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
83            depth += 1;
84            i += 2;
85            continue;
86        }
87        if bytes[i] == b'}' && depth > 0 {
88            depth -= 1;
89            i += 1;
90            continue;
91        }
92        // Find :- at the top level (depth == 0)
93        if depth == 0 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
94            return Some(i);
95        }
96        i += 1;
97    }
98    None
99}
100
101/// Parse a raw `${...}` string into a VarPath.
102///
103/// Handles paths like `${VAR}` and `${VAR.field}`. Array indexing is not supported.
104fn parse_varpath(raw: &str) -> VarPath {
105    let segments_strs = lexer::parse_var_ref(raw).unwrap_or_default();
106    let segments = segments_strs
107        .into_iter()
108        .filter(|s| !s.starts_with('['))  // Skip index segments
109        .map(VarSegment::Field)
110        .collect();
111    VarPath { segments }
112}
113
114/// Parse an interpolated string like "Hello ${NAME}!" or "Hello $NAME!" into parts.
115/// Extract a pipeline from a statement if possible.
116fn stmt_to_pipeline(stmt: Stmt) -> Option<Pipeline> {
117    match stmt {
118        Stmt::Pipeline(p) => Some(p),
119        Stmt::Command(cmd) => Some(Pipeline {
120            commands: vec![cmd],
121            background: false,
122        }),
123        _ => None,
124    }
125}
126
127/// Parse an unquoted heredoc body's interpolation while tracking each part's
128/// byte offset in the source.
129///
130/// `base_offset` is added to every part's offset so callers can attribute
131/// positions to a larger source (e.g., heredoc body inside the original
132/// script). Returns parts in source order with offset+len populated.
133///
134/// **Heredoc-specific behaviour**: per POSIX, unquoted heredoc bodies process
135/// three backslash escapes — `\$` (suppress expansion), `\\` (literal
136/// backslash), and `\<newline>` (line continuation). All other backslashes
137/// are kept verbatim. This differs from [`parse_interpolated_string`], which
138/// is called on double-quoted string content where the lexer has already
139/// processed escapes via `__KAISH_ESCAPED_DOLLAR__`.
140///
141/// This sibling of [`parse_interpolated_string`] duplicates parsing logic
142/// for now; unifying them behind a position-tracking core is a follow-up
143/// cleanup. Behaviour MUST stay aligned for the non-escape paths — bug fixes
144/// for the shared interpolation logic here should land there as well.
145fn parse_interpolated_string_spanned(s: &str, base_offset: usize) -> Vec<SpannedPart> {
146    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
147
148    let chars_vec: Vec<char> = s.chars().collect();
149    let mut i = 0;
150    let mut pos: usize = 0;
151
152    let mut parts: Vec<SpannedPart> = Vec::new();
153    let mut current_text = String::new();
154    let mut current_text_start: usize = pos;
155
156    let push_literal =
157        |current_text: &mut String, start: &mut usize, end: usize, parts: &mut Vec<SpannedPart>| {
158            if !current_text.is_empty() {
159                parts.push(SpannedPart {
160                    part: StringPart::Literal(std::mem::take(current_text)),
161                    offset: base_offset + *start,
162                    len: end - *start,
163                });
164                *start = end;
165            }
166        };
167
168    while i < chars_vec.len() {
169        let ch = chars_vec[i];
170
171        if ch == '\x00' {
172            // Escaped-dollar marker: \x00 DOLLAR \x00 → literal '$'
173            let start = pos;
174            i += 1;
175            pos += 1;
176            let mut marker = String::new();
177            while let Some(&c) = chars_vec.get(i) {
178                if c == '\x00' {
179                    i += 1;
180                    pos += 1;
181                    break;
182                }
183                marker.push(c);
184                i += 1;
185                pos += c.len_utf8();
186            }
187            if marker == "DOLLAR" {
188                if current_text.is_empty() {
189                    current_text_start = start;
190                }
191                current_text.push('$');
192            }
193        } else if ch == '\\' {
194            // POSIX heredoc-body escape processing for unquoted heredocs.
195            // Only `\$`, `\\`, and `\<newline>` are escapes; everything else
196            // keeps the backslash verbatim. Each case advances `pos` by the
197            // bytes consumed from the source so subsequent part offsets stay
198            // anchored to original-source coordinates.
199            let next = chars_vec.get(i + 1).copied();
200            match next {
201                Some('$') => {
202                    if current_text.is_empty() {
203                        current_text_start = pos;
204                    }
205                    current_text.push('$');
206                    i += 2;
207                    pos += 2;
208                }
209                Some('\\') => {
210                    if current_text.is_empty() {
211                        current_text_start = pos;
212                    }
213                    current_text.push('\\');
214                    i += 2;
215                    pos += 2;
216                }
217                Some('\n') => {
218                    // Line continuation: consume both bytes, emit nothing.
219                    // The literal run resumes on the next line.
220                    i += 2;
221                    pos += 2;
222                    if current_text.is_empty() {
223                        current_text_start = pos;
224                    }
225                }
226                Some('\r') => {
227                    // \<CR> or \<CR><LF>: line continuation
228                    i += 2;
229                    pos += 2;
230                    if chars_vec.get(i) == Some(&'\n') {
231                        i += 1;
232                        pos += 1;
233                    }
234                    if current_text.is_empty() {
235                        current_text_start = pos;
236                    }
237                }
238                _ => {
239                    // Other backslash sequences: keep `\` literally,
240                    // consume only the backslash. The next iteration will
241                    // process the following char on its own merits.
242                    if current_text.is_empty() {
243                        current_text_start = pos;
244                    }
245                    current_text.push('\\');
246                    i += 1;
247                    pos += 1;
248                }
249            }
250        } else if ch == '$' {
251            // Possible expansion. Save current run before peeking ahead.
252            let part_start = pos;
253            let next = chars_vec.get(i + 1).copied();
254
255            if next == Some('(') && chars_vec.get(i + 2) != Some(&'(') {
256                // $(...) command substitution
257                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
258                i += 2; // consume "$("
259                pos += 2;
260                let mut cmd_content = String::new();
261                let mut depth = 1;
262                while let Some(&c) = chars_vec.get(i) {
263                    i += 1;
264                    pos += c.len_utf8();
265                    if c == '(' {
266                        depth += 1;
267                        cmd_content.push(c);
268                    } else if c == ')' {
269                        depth -= 1;
270                        if depth == 0 {
271                            break;
272                        }
273                        cmd_content.push(c);
274                    } else {
275                        cmd_content.push(c);
276                    }
277                }
278                let inserted = if let Ok(program) = parse(&cmd_content) {
279                    if let Some(stmt) = program.statements.first() {
280                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
281                            parts.push(SpannedPart {
282                                part: StringPart::CommandSubst(pipeline),
283                                offset: base_offset + part_start,
284                                len: pos - part_start,
285                            });
286                            true
287                        } else {
288                            false
289                        }
290                    } else {
291                        false
292                    }
293                } else {
294                    false
295                };
296                if inserted {
297                    // Successfully pushed a CommandSubst; the next literal
298                    // run will start after the closing ')'.
299                    current_text_start = pos;
300                } else {
301                    // Fall back to literal text. The literal run starts at
302                    // the leading '$' (set above only if current_text was
303                    // empty); leave current_text_start alone otherwise so we
304                    // don't lose an in-progress run.
305                    if current_text.is_empty() {
306                        current_text_start = part_start;
307                    }
308                    current_text.push_str("$(");
309                    current_text.push_str(&cmd_content);
310                    current_text.push(')');
311                }
312            } else if next == Some('{') {
313                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
314                i += 2; // consume "${"
315                pos += 2;
316                let mut var_content = String::new();
317                let mut depth = 1;
318                while let Some(&c) = chars_vec.get(i) {
319                    i += 1;
320                    pos += c.len_utf8();
321                    if c == '{' && var_content.ends_with('$') {
322                        depth += 1;
323                        var_content.push(c);
324                    } else if c == '}' {
325                        depth -= 1;
326                        if depth == 0 {
327                            break;
328                        }
329                        var_content.push(c);
330                    } else {
331                        var_content.push(c);
332                    }
333                }
334                let part = if let Some(name) = var_content.strip_prefix('#') {
335                    StringPart::VarLength(name.to_string())
336                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
337                    let expr = var_content
338                        .strip_prefix("__ARITH:")
339                        .and_then(|s| s.strip_suffix("__"))
340                        .unwrap_or("");
341                    StringPart::Arithmetic(expr.to_string())
342                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
343                    let name = var_content[..colon_idx].to_string();
344                    let default_str = &var_content[colon_idx + 2..];
345                    // Default value spans recursively kept relative to the
346                    // outer body — the inner parts get their own offsets via
347                    // the recursive call when needed. For now, the default's
348                    // parts are stored without spans (default is a Vec<StringPart>).
349                    let default = parse_interpolated_string(default_str);
350                    StringPart::VarWithDefault { name, default }
351                } else {
352                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
353                };
354                parts.push(SpannedPart {
355                    part,
356                    offset: base_offset + part_start,
357                    len: pos - part_start,
358                });
359                current_text_start = pos;
360            } else if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
361                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
362                i += 1; // consume '$'
363                pos += 1;
364                if let Some(&digit) = chars_vec.get(i) {
365                    let n = digit.to_digit(10).unwrap_or(0) as usize;
366                    i += 1;
367                    pos += digit.len_utf8();
368                    parts.push(SpannedPart {
369                        part: StringPart::Positional(n),
370                        offset: base_offset + part_start,
371                        len: pos - part_start,
372                    });
373                }
374                current_text_start = pos;
375            } else if next == Some('@') {
376                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
377                i += 2; // consume "$@"
378                pos += 2;
379                parts.push(SpannedPart {
380                    part: StringPart::AllArgs,
381                    offset: base_offset + part_start,
382                    len: pos - part_start,
383                });
384                current_text_start = pos;
385            } else if next == Some('#') {
386                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
387                i += 2; // consume "$#"
388                pos += 2;
389                parts.push(SpannedPart {
390                    part: StringPart::ArgCount,
391                    offset: base_offset + part_start,
392                    len: pos - part_start,
393                });
394                current_text_start = pos;
395            } else if next == Some('?') {
396                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
397                i += 2; // consume "$?"
398                pos += 2;
399                parts.push(SpannedPart {
400                    part: StringPart::LastExitCode,
401                    offset: base_offset + part_start,
402                    len: pos - part_start,
403                });
404                current_text_start = pos;
405            } else if next == Some('$') {
406                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
407                i += 2; // consume "$$"
408                pos += 2;
409                parts.push(SpannedPart {
410                    part: StringPart::CurrentPid,
411                    offset: base_offset + part_start,
412                    len: pos - part_start,
413                });
414                current_text_start = pos;
415            } else if next.map(|c| c.is_ascii_alphabetic() || c == '_').unwrap_or(false) {
416                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
417                i += 1; // consume '$'
418                pos += 1;
419                let mut var_name = String::new();
420                while let Some(&c) = chars_vec.get(i) {
421                    if c.is_ascii_alphanumeric() || c == '_' {
422                        var_name.push(c);
423                        i += 1;
424                        pos += c.len_utf8();
425                    } else {
426                        break;
427                    }
428                }
429                parts.push(SpannedPart {
430                    part: StringPart::Var(VarPath::simple(var_name)),
431                    offset: base_offset + part_start,
432                    len: pos - part_start,
433                });
434                current_text_start = pos;
435            } else {
436                // Bare $ — treat as literal
437                if current_text.is_empty() {
438                    current_text_start = pos;
439                }
440                current_text.push(ch);
441                i += 1;
442                pos += 1;
443            }
444        } else {
445            if current_text.is_empty() {
446                current_text_start = pos;
447            }
448            current_text.push(ch);
449            i += 1;
450            pos += ch.len_utf8();
451        }
452    }
453
454    push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
455
456    parts
457}
458
459fn parse_interpolated_string(s: &str) -> Vec<StringPart> {
460    // First, replace escaped dollar markers with a temporary placeholder
461    // The lexer uses __KAISH_ESCAPED_DOLLAR__ for \$ to prevent re-interpretation
462    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
463
464    let mut parts = Vec::new();
465    let mut current_text = String::new();
466    let mut chars = s.chars().peekable();
467
468    while let Some(ch) = chars.next() {
469        if ch == '\x00' {
470            // This is our escaped dollar marker - skip "DOLLAR" and the closing \x00
471            let mut marker = String::new();
472            while let Some(&c) = chars.peek() {
473                if c == '\x00' {
474                    chars.next(); // consume closing marker
475                    break;
476                }
477                if let Some(c) = chars.next() {
478                    marker.push(c);
479                }
480            }
481            if marker == "DOLLAR" {
482                current_text.push('$');
483            }
484        } else if ch == '$' {
485            // Check for command substitution $(...)
486            if chars.peek() == Some(&'(') {
487                // Command substitution $(...)
488                if !current_text.is_empty() {
489                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
490                }
491
492                // Consume the '('
493                chars.next();
494
495                // Collect until matching ')' accounting for nested parens
496                let mut cmd_content = String::new();
497                let mut paren_depth = 1;
498                for c in chars.by_ref() {
499                    if c == '(' {
500                        paren_depth += 1;
501                        cmd_content.push(c);
502                    } else if c == ')' {
503                        paren_depth -= 1;
504                        if paren_depth == 0 {
505                            break;
506                        }
507                        cmd_content.push(c);
508                    } else {
509                        cmd_content.push(c);
510                    }
511                }
512
513                // Parse the command content as a pipeline
514                // We need to use the main parser for this
515                if let Ok(program) = parse(&cmd_content) {
516                    // Extract the pipeline from the parsed result
517                    if let Some(stmt) = program.statements.first() {
518                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
519                            parts.push(StringPart::CommandSubst(pipeline));
520                        } else {
521                            // If we can't extract a pipeline, treat as literal
522                            current_text.push_str("$(");
523                            current_text.push_str(&cmd_content);
524                            current_text.push(')');
525                        }
526                    }
527                } else {
528                    // Parse failed - treat as literal
529                    current_text.push_str("$(");
530                    current_text.push_str(&cmd_content);
531                    current_text.push(')');
532                }
533            } else if chars.peek() == Some(&'{') {
534                // Braced variable reference ${...}
535                if !current_text.is_empty() {
536                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
537                }
538
539                // Consume the '{'
540                chars.next();
541
542                // Collect until matching '}', tracking nesting depth
543                let mut var_content = String::new();
544                let mut depth = 1;
545                for c in chars.by_ref() {
546                    if c == '{' && var_content.ends_with('$') {
547                        depth += 1;
548                        var_content.push(c);
549                    } else if c == '}' {
550                        depth -= 1;
551                        if depth == 0 {
552                            break;
553                        }
554                        var_content.push(c);
555                    } else {
556                        var_content.push(c);
557                    }
558                }
559
560                // Parse the content for special syntax
561                let part = if let Some(name) = var_content.strip_prefix('#') {
562                    // Variable length: ${#VAR}
563                    StringPart::VarLength(name.to_string())
564                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
565                    // Arithmetic expression: ${__ARITH:expr__}
566                    let expr = var_content
567                        .strip_prefix("__ARITH:")
568                        .and_then(|s| s.strip_suffix("__"))
569                        .unwrap_or("");
570                    StringPart::Arithmetic(expr.to_string())
571                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
572                    // Variable with default: ${VAR:-default} - recursively parse the default
573                    let name = var_content[..colon_idx].to_string();
574                    let default_str = &var_content[colon_idx + 2..];
575                    let default = parse_interpolated_string(default_str);
576                    StringPart::VarWithDefault { name, default }
577                } else {
578                    // Regular variable: ${VAR} or ${VAR.field}
579                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
580                };
581                parts.push(part);
582            } else if chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
583                // Positional parameter $0-$9
584                if !current_text.is_empty() {
585                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
586                }
587                if let Some(digit) = chars.next() {
588                    let n = digit.to_digit(10).unwrap_or(0) as usize;
589                    parts.push(StringPart::Positional(n));
590                }
591            } else if chars.peek() == Some(&'@') {
592                // All arguments $@
593                if !current_text.is_empty() {
594                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
595                }
596                chars.next(); // consume '@'
597                parts.push(StringPart::AllArgs);
598            } else if chars.peek() == Some(&'#') {
599                // Argument count $#
600                if !current_text.is_empty() {
601                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
602                }
603                chars.next(); // consume '#'
604                parts.push(StringPart::ArgCount);
605            } else if chars.peek() == Some(&'?') {
606                // Last exit code $?
607                if !current_text.is_empty() {
608                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
609                }
610                chars.next(); // consume '?'
611                parts.push(StringPart::LastExitCode);
612            } else if chars.peek() == Some(&'$') {
613                // Current PID $$
614                if !current_text.is_empty() {
615                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
616                }
617                chars.next(); // consume second '$'
618                parts.push(StringPart::CurrentPid);
619            } else if chars.peek().map(|c| c.is_ascii_alphabetic() || *c == '_').unwrap_or(false) {
620                // Simple variable reference $NAME
621                if !current_text.is_empty() {
622                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
623                }
624
625                // Collect identifier characters
626                let mut var_name = String::new();
627                while let Some(&c) = chars.peek() {
628                    if c.is_ascii_alphanumeric() || c == '_' {
629                        if let Some(c) = chars.next() {
630                            var_name.push(c);
631                        }
632                    } else {
633                        break;
634                    }
635                }
636
637                parts.push(StringPart::Var(VarPath::simple(var_name)));
638            } else {
639                // Literal $ (not followed by { or identifier start)
640                current_text.push(ch);
641            }
642        } else {
643            current_text.push(ch);
644        }
645    }
646
647    if !current_text.is_empty() {
648        parts.push(StringPart::Literal(current_text));
649    }
650
651    parts
652}
653
654/// Parse error with location and context.
655#[derive(Debug, Clone)]
656pub struct ParseError {
657    pub span: Span,
658    pub message: String,
659}
660
661impl ParseError {
662    /// Format the error against the original source, emitting a 1-indexed
663    /// `line:col [parse]: <message>` prefix and a snippet of the offending
664    /// line. Mirrors `ValidationIssue::format` so error reporting feels
665    /// consistent across pipeline phases.
666    pub fn format(&self, source: &str) -> String {
667        let start = self.span.start;
668        let mut line = 1usize;
669        let mut col = 1usize;
670        for (i, ch) in source.char_indices() {
671            if i >= start {
672                break;
673            }
674            if ch == '\n' {
675                line += 1;
676                col = 1;
677            } else {
678                col += 1;
679            }
680        }
681        let line_content = {
682            let line_start = source[..start.min(source.len())]
683                .rfind('\n')
684                .map_or(0, |i| i + 1);
685            let line_end = source[start.min(source.len())..]
686                .find('\n')
687                .map_or(source.len(), |i| start + i);
688            source.get(line_start..line_end).unwrap_or("")
689        };
690        if line_content.is_empty() {
691            format!("{}:{} [parse]: {}", line, col, self.message)
692        } else {
693            format!(
694                "{}:{} [parse]: {}\n  | {}",
695                line, col, self.message, line_content
696            )
697        }
698    }
699}
700
701impl std::fmt::Display for ParseError {
702    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
703        write!(f, "{} at {:?}", self.message, self.span)
704    }
705}
706
707impl std::error::Error for ParseError {}
708
709/// Parse kaish source code into a Program AST.
710pub fn parse(source: &str) -> Result<Program, Vec<ParseError>> {
711    // Tokenize with logos
712    let tokens = lexer::tokenize(source).map_err(|errs| {
713        errs.into_iter()
714            .map(|e| ParseError {
715                span: (e.span.start..e.span.end).into(),
716                message: format!("lexer error: {}", e.token),
717            })
718            .collect::<Vec<_>>()
719    })?;
720
721    // Convert tokens to (Token, SimpleSpan) pairs
722    let tokens: Vec<(Token, Span)> = tokens
723        .into_iter()
724        .map(|spanned| (spanned.token, (spanned.span.start..spanned.span.end).into()))
725        .collect();
726
727    // End-of-input span
728    let end_span: Span = (source.len()..source.len()).into();
729
730    // Parse using slice-based input (like nano_rust example)
731    let parser = program_parser();
732    let result = parser.parse(tokens.as_slice().map(end_span, |(t, s)| (t, s)));
733
734    let program = result.into_result().map_err(|errs| {
735        errs.into_iter()
736            .map(|e| ParseError {
737                span: *e.span(),
738                message: e.to_string(),
739            })
740            .collect::<Vec<_>>()
741    })?;
742
743    // Structural well-formedness checks that chumsky's grammar can't surface a
744    // clean message for. A command with two stdin sources (`<`/`<<`/`<<<`)
745    // would silently depend on redirect ordering at execution time, so reject
746    // it here — at parse time, which (unlike validation) can never be skipped.
747    if first_ambiguous_stdin(&program.statements) {
748        return Err(vec![ParseError {
749            // Redirects carry no AST span, so anchor at the start of the
750            // source; the message is the actionable part. Precise columns
751            // would require spanning `Redirect` (deferred — see docs/issues.md).
752            span: (0..0).into(),
753            message: "multiple stdin redirects on one command are ambiguous; \
754                      use exactly one of `<`, `<<`, or `<<<`"
755                .to_string(),
756        }]);
757    }
758
759    Ok(program)
760}
761
762/// Parse a single statement (useful for REPL).
763pub fn parse_statement(source: &str) -> Result<Stmt, Vec<ParseError>> {
764    let program = parse(source)?;
765    program
766        .statements
767        .into_iter()
768        .find(|s| !matches!(s, Stmt::Empty))
769        .ok_or_else(|| {
770            vec![ParseError {
771                span: (0..source.len()).into(),
772                message: "empty input".to_string(),
773            }]
774        })
775}
776
777// ═══════════════════════════════════════════════════════════════════════════
778// Parser Combinators - generic over input type
779// ═══════════════════════════════════════════════════════════════════════════
780
781/// Top-level program parser.
782fn program_parser<'tokens, 'src: 'tokens, I>(
783) -> impl Parser<'tokens, I, Program, extra::Err<Rich<'tokens, Token, Span>>>
784where
785    I: ValueInput<'tokens, Token = Token, Span = Span>,
786{
787    statement_parser()
788        .repeated()
789        .collect::<Vec<_>>()
790        .map(|statements| Program { statements })
791}
792
793/// Statement parser - dispatches based on leading token.
794/// Supports statement-level chaining with && and ||.
795fn statement_parser<'tokens, I>(
796) -> impl Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
797where
798    I: ValueInput<'tokens, Token = Token, Span = Span>,
799{
800    recursive(|stmt| {
801        let terminator = choice((just(Token::Newline), just(Token::Semi))).repeated();
802
803        // break [N] - break out of N levels of loops (default 1)
804        let break_stmt = just(Token::Break)
805            .ignore_then(
806                select! { Token::Int(n) => n as usize }.or_not()
807            )
808            .map(Stmt::Break);
809
810        // continue [N] - continue to next iteration, skipping N levels (default 1)
811        let continue_stmt = just(Token::Continue)
812            .ignore_then(
813                select! { Token::Int(n) => n as usize }.or_not()
814            )
815            .map(Stmt::Continue);
816
817        // return [expr] - return from a tool
818        let return_stmt = just(Token::Return)
819            .ignore_then(primary_expr_parser().or_not())
820            .map(|e| Stmt::Return(e.map(Box::new)));
821
822        // exit [code] - exit the script
823        let exit_stmt = just(Token::Exit)
824            .ignore_then(primary_expr_parser().or_not())
825            .map(|e| Stmt::Exit(e.map(Box::new)));
826
827        // set command: `set -e`, `set +e`, `set` (no args), `set -o pipefail`
828        // This must come BEFORE assignment_parser to handle `set -e` vs `X=value`
829        //
830        // Strategy: Use lookahead to check what follows `set`:
831        // - If followed by a flag (-e, --long, +e): parse as set command
832        // - If followed by identifier NOT followed by =: parse as set command (e.g., `set pipefail`)
833        // - If followed by nothing (end/newline/semi): parse as set command
834        // - If followed by identifier then =: let assignment_parser handle it
835        let set_flag_arg = choice((
836            select! { Token::ShortFlag(f) => Arg::ShortFlag(f) },
837            select! { Token::LongFlag(f) => Arg::LongFlag(f) },
838            // PlusFlag for +e, +x etc. - convert to positional arg with + prefix
839            select! { Token::PlusFlag(f) => Arg::Positional(Expr::Literal(Value::String(format!("+{}", f)))) },
840        ));
841
842        // set with flags: `set -e`, `set -e -u -o pipefail`
843        let set_with_flags = just(Token::Set)
844            .then(set_flag_arg)
845            .then(
846                choice((
847                    set_flag_arg,
848                    // Identifiers like 'pipefail' after -o
849                    ident_parser().map(|name| Arg::Positional(Expr::Literal(Value::String(name)))),
850                ))
851                .repeated()
852                .collect::<Vec<_>>(),
853            )
854            .map(|((_, first_arg), mut rest_args)| {
855                let mut args = vec![first_arg];
856                args.append(&mut rest_args);
857                Stmt::Command(Command {
858                    name: "set".to_string(),
859                    args,
860                    redirects: vec![],
861                })
862            });
863
864        // set with no args: `set` alone (shows settings)
865        // Must be followed by newline, semicolon, end of input, or a chaining operator (&&, ||)
866        let set_no_args = just(Token::Set)
867            .then(
868                choice((
869                    just(Token::Newline).to(()),
870                    just(Token::Semi).to(()),
871                    just(Token::And).to(()),
872                    just(Token::Or).to(()),
873                    end(),
874                ))
875                .rewind(),
876            )
877            .map(|_| Stmt::Command(Command {
878                name: "set".to_string(),
879                args: vec![],
880                redirects: vec![],
881            }));
882
883        // Try set_with_flags first (requires at least one flag)
884        // Then try set_no_args (no args, followed by terminator)
885        // If neither matches, fall through to assignment_parser
886        let set_command = set_with_flags.or(set_no_args);
887
888        // Base statement (without chaining)
889        let base_statement = choice((
890            just(Token::Newline).to(Stmt::Empty),
891            set_command,
892            assignment_parser().map(Stmt::Assignment),
893            // Shell-style functions (use $1, $2 positional params)
894            posix_function_parser(stmt.clone()).map(Stmt::ToolDef),  // name() { }
895            bash_function_parser(stmt.clone()).map(Stmt::ToolDef),   // function name { }
896            if_parser(stmt.clone()).map(Stmt::If),
897            for_parser(stmt.clone()).map(Stmt::For),
898            while_parser(stmt.clone()).map(Stmt::While),
899            case_parser(stmt.clone()).map(Stmt::Case),
900            break_stmt,
901            continue_stmt,
902            return_stmt,
903            exit_stmt,
904            test_expr_stmt_parser().map(Stmt::Test),
905            // Note: 'true' and 'false' are handled by command_parser/pipeline_parser
906            pipeline_parser().map(|p| {
907                // Unwrap single-command pipelines without background and without redirects
908                if p.commands.len() == 1 && !p.background {
909                    // Only unwrap if no redirects - redirects require pipeline processing
910                    if p.commands[0].redirects.is_empty() {
911                        // Safe: we just checked len == 1
912                        match p.commands.into_iter().next() {
913                            Some(cmd) => Stmt::Command(cmd),
914                            None => Stmt::Empty, // unreachable but safe
915                        }
916                    } else {
917                        Stmt::Pipeline(p)
918                    }
919                } else {
920                    Stmt::Pipeline(p)
921                }
922            }),
923        ))
924        .boxed();
925
926        // Statement chaining with precedence: && binds tighter than ||
927        // and_chain = base_stmt { "&&" base_stmt }
928        // or_chain  = and_chain { "||" and_chain }
929        let and_chain = base_statement
930            .clone()
931            .foldl(
932                just(Token::And).ignore_then(base_statement).repeated(),
933                |left, right| Stmt::AndChain {
934                    left: Box::new(left),
935                    right: Box::new(right),
936                },
937            );
938
939        and_chain
940            .clone()
941            .foldl(
942                just(Token::Or).ignore_then(and_chain).repeated(),
943                |left, right| Stmt::OrChain {
944                    left: Box::new(left),
945                    right: Box::new(right),
946                },
947            )
948            .then_ignore(terminator)
949    })
950}
951
952/// Assignment: `NAME=value` (bash-style) or `local NAME = value` (scoped)
953fn assignment_parser<'tokens, I>(
954) -> impl Parser<'tokens, I, Assignment, extra::Err<Rich<'tokens, Token, Span>>> + Clone
955where
956    I: ValueInput<'tokens, Token = Token, Span = Span>,
957{
958    // local NAME = value (with spaces around =)
959    let local_assignment = just(Token::Local)
960        .ignore_then(ident_parser())
961        .then_ignore(just(Token::Eq))
962        .then(expr_parser())
963        .map(|(name, value)| Assignment {
964            name,
965            value,
966            local: true,
967        });
968
969    // Bash-style: NAME=value (no spaces around =)
970    // The lexer produces IDENT EQ EXPR, so we parse it here
971    let bash_assignment = ident_parser()
972        .then_ignore(just(Token::Eq))
973        .then(expr_parser())
974        .map(|(name, value)| Assignment {
975            name,
976            value,
977            local: false,
978        });
979
980    choice((local_assignment, bash_assignment))
981        .labelled("assignment")
982        .boxed()
983}
984
985/// POSIX-style function: `name() { body }`
986///
987/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
988fn posix_function_parser<'tokens, I, S>(
989    stmt: S,
990) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
991where
992    I: ValueInput<'tokens, Token = Token, Span = Span>,
993    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
994{
995    ident_parser()
996        .then_ignore(just(Token::LParen))
997        .then_ignore(just(Token::RParen))
998        .then_ignore(just(Token::LBrace))
999        .then_ignore(just(Token::Newline).repeated())
1000        .then(
1001            stmt.repeated()
1002                .collect::<Vec<_>>()
1003                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1004        )
1005        .then_ignore(just(Token::Newline).repeated())
1006        .then_ignore(just(Token::RBrace))
1007        .map(|(name, body)| ToolDef { name, params: vec![], body })
1008        .labelled("POSIX function")
1009        .boxed()
1010}
1011
1012/// Bash-style function: `function name { body }` (without parens)
1013///
1014/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
1015fn bash_function_parser<'tokens, I, S>(
1016    stmt: S,
1017) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1018where
1019    I: ValueInput<'tokens, Token = Token, Span = Span>,
1020    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1021{
1022    just(Token::Function)
1023        .ignore_then(ident_parser())
1024        .then_ignore(just(Token::LBrace))
1025        .then_ignore(just(Token::Newline).repeated())
1026        .then(
1027            stmt.repeated()
1028                .collect::<Vec<_>>()
1029                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1030        )
1031        .then_ignore(just(Token::Newline).repeated())
1032        .then_ignore(just(Token::RBrace))
1033        .map(|(name, body)| ToolDef { name, params: vec![], body })
1034        .labelled("bash function")
1035        .boxed()
1036}
1037
1038/// If statement: `if COND; then STMTS [elif COND; then STMTS]* [else STMTS] fi`
1039///
1040/// elif clauses are desugared to nested if/else:
1041///   `if A; then X elif B; then Y else Z fi`
1042/// becomes:
1043///   `if A; then X else { if B; then Y else Z fi } fi`
1044fn if_parser<'tokens, I, S>(
1045    stmt: S,
1046) -> impl Parser<'tokens, I, IfStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1047where
1048    I: ValueInput<'tokens, Token = Token, Span = Span>,
1049    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1050{
1051    // Parse a single branch: condition + then statements
1052    let branch = condition_parser()
1053        .then_ignore(just(Token::Semi).or_not())
1054        .then_ignore(just(Token::Newline).repeated())
1055        .then_ignore(just(Token::Then))
1056        .then_ignore(just(Token::Newline).repeated())
1057        .then(
1058            stmt.clone()
1059                .repeated()
1060                .collect::<Vec<_>>()
1061                .map(|stmts: Vec<Stmt>| {
1062                    stmts
1063                        .into_iter()
1064                        .filter(|s| !matches!(s, Stmt::Empty))
1065                        .collect::<Vec<_>>()
1066                }),
1067        );
1068
1069    // Parse elif branches: `elif COND; then STMTS`
1070    let elif_branch = just(Token::Elif)
1071        .ignore_then(condition_parser())
1072        .then_ignore(just(Token::Semi).or_not())
1073        .then_ignore(just(Token::Newline).repeated())
1074        .then_ignore(just(Token::Then))
1075        .then_ignore(just(Token::Newline).repeated())
1076        .then(
1077            stmt.clone()
1078                .repeated()
1079                .collect::<Vec<_>>()
1080                .map(|stmts: Vec<Stmt>| {
1081                    stmts
1082                        .into_iter()
1083                        .filter(|s| !matches!(s, Stmt::Empty))
1084                        .collect::<Vec<_>>()
1085                }),
1086        );
1087
1088    // Parse else branch: `else STMTS`
1089    let else_branch = just(Token::Else)
1090        .ignore_then(just(Token::Newline).repeated())
1091        .ignore_then(stmt.repeated().collect::<Vec<_>>())
1092        .map(|stmts: Vec<Stmt>| {
1093            stmts
1094                .into_iter()
1095                .filter(|s| !matches!(s, Stmt::Empty))
1096                .collect::<Vec<_>>()
1097        });
1098
1099    just(Token::If)
1100        .ignore_then(branch)
1101        .then(elif_branch.repeated().collect::<Vec<_>>())
1102        .then(else_branch.or_not())
1103        .then_ignore(just(Token::Fi))
1104        .map(|(((condition, then_branch), elif_branches), else_branch)| {
1105            // Build nested if/else structure from elif branches
1106            build_if_chain(condition, then_branch, elif_branches, else_branch)
1107        })
1108        .labelled("if statement")
1109        .boxed()
1110}
1111
1112/// Build a nested IfStmt chain from elif branches.
1113///
1114/// Transforms:
1115///   if A then X elif B then Y elif C then Z else W fi
1116/// Into:
1117///   IfStmt { cond: A, then: X, else: Some([IfStmt { cond: B, then: Y, else: Some([IfStmt { cond: C, then: Z, else: Some(W) }]) }]) }
1118fn build_if_chain(
1119    condition: Expr,
1120    then_branch: Vec<Stmt>,
1121    mut elif_branches: Vec<(Expr, Vec<Stmt>)>,
1122    else_branch: Option<Vec<Stmt>>,
1123) -> IfStmt {
1124    if elif_branches.is_empty() {
1125        // No elif, just if/else
1126        IfStmt {
1127            condition: Box::new(condition),
1128            then_branch,
1129            else_branch,
1130        }
1131    } else {
1132        // Pop the first elif and recursively build the rest
1133        let (elif_cond, elif_then) = elif_branches.remove(0);
1134        let nested_if = build_if_chain(elif_cond, elif_then, elif_branches, else_branch);
1135        IfStmt {
1136            condition: Box::new(condition),
1137            then_branch,
1138            else_branch: Some(vec![Stmt::If(nested_if)]),
1139        }
1140    }
1141}
1142
1143/// For loop: `for VAR in ITEMS; do STMTS done`
1144fn for_parser<'tokens, I, S>(
1145    stmt: S,
1146) -> impl Parser<'tokens, I, ForLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1147where
1148    I: ValueInput<'tokens, Token = Token, Span = Span>,
1149    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1150{
1151    just(Token::For)
1152        .ignore_then(ident_parser())
1153        .then_ignore(just(Token::In))
1154        .then(expr_parser().repeated().at_least(1).collect::<Vec<_>>())
1155        .then_ignore(just(Token::Semi).or_not())
1156        .then_ignore(just(Token::Newline).repeated())
1157        .then_ignore(just(Token::Do))
1158        .then_ignore(just(Token::Newline).repeated())
1159        .then(
1160            stmt.repeated()
1161                .collect::<Vec<_>>()
1162                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1163        )
1164        .then_ignore(just(Token::Done))
1165        .map(|((variable, items), body)| ForLoop {
1166            variable,
1167            items,
1168            body,
1169        })
1170        .labelled("for loop")
1171        .boxed()
1172}
1173
1174/// While loop: `while condition; do ...; done`
1175fn while_parser<'tokens, I, S>(
1176    stmt: S,
1177) -> impl Parser<'tokens, I, WhileLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1178where
1179    I: ValueInput<'tokens, Token = Token, Span = Span>,
1180    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1181{
1182    just(Token::While)
1183        .ignore_then(condition_parser())
1184        .then_ignore(just(Token::Semi).or_not())
1185        .then_ignore(just(Token::Newline).repeated())
1186        .then_ignore(just(Token::Do))
1187        .then_ignore(just(Token::Newline).repeated())
1188        .then(
1189            stmt.repeated()
1190                .collect::<Vec<_>>()
1191                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1192        )
1193        .then_ignore(just(Token::Done))
1194        .map(|(condition, body)| WhileLoop {
1195            condition: Box::new(condition),
1196            body,
1197        })
1198        .labelled("while loop")
1199        .boxed()
1200}
1201
1202/// Case statement: `case expr in pattern) commands ;; esac`
1203///
1204/// Supports:
1205/// - Single patterns: `pattern) commands ;;`
1206/// - Multiple patterns: `pattern1|pattern2) commands ;;`
1207/// - Optional leading `(` before patterns: `(pattern) commands ;;`
1208fn case_parser<'tokens, I, S>(
1209    stmt: S,
1210) -> impl Parser<'tokens, I, CaseStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1211where
1212    I: ValueInput<'tokens, Token = Token, Span = Span>,
1213    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1214{
1215    // Pattern part: individual tokens that make up a glob pattern
1216    // e.g., "*.rs" is Star + Dot + Ident("rs")
1217    let pattern_part = choice((
1218        select! { Token::GlobWord(s) => s },
1219        select! { Token::Ident(s) => s },
1220        select! { Token::NumberIdent(s) => s },
1221        select! { Token::DottedIdent(s) => s },
1222        select! { Token::String(s) => s },
1223        select! { Token::SingleString(s) => s },
1224        select! { Token::Int(n) => n.to_string() },
1225        select! { Token::Star => "*".to_string() },
1226        select! { Token::Question => "?".to_string() },
1227        select! { Token::Dot => ".".to_string() },
1228        select! { Token::DotDot => "..".to_string() },
1229        select! { Token::Tilde => "~".to_string() },
1230        select! { Token::TildePath(s) => s },
1231        select! { Token::RelativePath(s) => s },
1232        select! { Token::DotSlashPath(s) => s },
1233        select! { Token::Path(p) => p },
1234        select! { Token::VarRef(v) => v },
1235        select! { Token::SimpleVarRef(v) => format!("${}", v) },
1236        // Character class: [a-z], [!abc], [^abc], etc.
1237        just(Token::LBracket)
1238            .ignore_then(
1239                choice((
1240                    select! { Token::Ident(s) => s },
1241                    select! { Token::Int(n) => n.to_string() },
1242                    just(Token::Colon).to(":".to_string()),
1243                    // Negation: ! or ^ at start of char class
1244                    just(Token::Bang).to("!".to_string()),
1245                    // Range like a-z
1246                    select! { Token::ShortFlag(s) => format!("-{}", s) },
1247                ))
1248                .repeated()
1249                .at_least(1)
1250                .collect::<Vec<String>>()
1251            )
1252            .then_ignore(just(Token::RBracket))
1253            .map(|parts| format!("[{}]", parts.join(""))),
1254        // Brace expansion: {a,b,c} or {js,ts}
1255        just(Token::LBrace)
1256            .ignore_then(
1257                choice((
1258                    select! { Token::Ident(s) => s },
1259                    select! { Token::Int(n) => n.to_string() },
1260                ))
1261                .separated_by(just(Token::Comma))
1262                .at_least(1)
1263                .collect::<Vec<String>>()
1264            )
1265            .then_ignore(just(Token::RBrace))
1266            .map(|parts| format!("{{{}}}", parts.join(","))),
1267    ));
1268
1269    // A complete pattern is one or more pattern parts joined together
1270    // e.g., "*.rs" = Star + Dot + Ident
1271    let pattern = pattern_part
1272        .repeated()
1273        .at_least(1)
1274        .collect::<Vec<String>>()
1275        .map(|parts| parts.join(""))
1276        .labelled("case pattern");
1277
1278    // Multiple patterns separated by pipe: `pattern1 | pattern2`
1279    let patterns = pattern
1280        .separated_by(just(Token::Pipe))
1281        .at_least(1)
1282        .collect::<Vec<String>>()
1283        .labelled("case patterns");
1284
1285    // Branch: `[( ] patterns ) commands ;;`
1286    let branch = just(Token::LParen)
1287        .or_not()
1288        .ignore_then(just(Token::Newline).repeated())
1289        .ignore_then(patterns)
1290        .then_ignore(just(Token::RParen))
1291        .then_ignore(just(Token::Newline).repeated())
1292        .then(
1293            stmt.clone()
1294                .repeated()
1295                .collect::<Vec<_>>()
1296                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1297        )
1298        .then_ignore(just(Token::DoubleSemi))
1299        .then_ignore(just(Token::Newline).repeated())
1300        .map(|(patterns, body)| CaseBranch { patterns, body })
1301        .labelled("case branch");
1302
1303    just(Token::Case)
1304        .ignore_then(expr_parser())
1305        .then_ignore(just(Token::In))
1306        .then_ignore(just(Token::Newline).repeated())
1307        .then(branch.repeated().collect::<Vec<_>>())
1308        .then_ignore(just(Token::Esac))
1309        .map(|(expr, branches)| CaseStmt { expr, branches })
1310        .labelled("case statement")
1311        .boxed()
1312}
1313
1314/// Pipeline: `cmd | cmd | cmd [&]`
1315fn pipeline_parser<'tokens, I>(
1316) -> impl Parser<'tokens, I, Pipeline, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1317where
1318    I: ValueInput<'tokens, Token = Token, Span = Span>,
1319{
1320    command_parser()
1321        .separated_by(just(Token::Pipe))
1322        .at_least(1)
1323        .collect::<Vec<_>>()
1324        .then(just(Token::Amp).or_not())
1325        .map(|(commands, bg)| Pipeline {
1326            commands,
1327            background: bg.is_some(),
1328        })
1329        .labelled("pipeline")
1330        .boxed()
1331}
1332
1333/// Command: `name args... [redirects...]`
1334/// Command names can be identifiers, 'true', 'false', or '.' (source alias).
1335fn command_parser<'tokens, I>(
1336) -> impl Parser<'tokens, I, Command, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1337where
1338    I: ValueInput<'tokens, Token = Token, Span = Span>,
1339{
1340    // Command name can be an identifier, path, 'true', 'false', '.' (source alias), or ./path
1341    let command_name = choice((
1342        ident_parser(),
1343        path_parser(),
1344        select! { Token::DotSlashPath(s) => s },
1345        just(Token::True).to("true".to_string()),
1346        just(Token::False).to("false".to_string()),
1347        just(Token::Dot).to(".".to_string()),
1348    ));
1349
1350    // NB: the "at most one stdin source per command" rule is enforced by a
1351    // post-parse scan in `parse()` (see `first_ambiguous_stdin`), NOT here.
1352    // A `try_map` rejection at this level cannot surface its own message: a
1353    // command like `cat <<< a <<< b` also fails the competing statement-level
1354    // assignment/function alternative ("expected '=', or '('"), and chumsky's
1355    // `choice` merge keeps that alternative's error regardless of which span
1356    // our custom error carries. So we accept the command here and reject it
1357    // structurally after parsing, where the message is fully under our control
1358    // (verified empirically 2026-06-07; see docs/issues.md).
1359    command_name
1360        .then(args_list_parser())
1361        .then(redirect_parser().repeated().collect::<Vec<_>>())
1362        .map(|((name, args), redirects)| Command {
1363            name,
1364            args,
1365            redirects,
1366        })
1367        .labelled("command")
1368        .boxed()
1369}
1370
1371/// True if `cmd` has more than one stdin source (`<`, `<<`, `<<<`). Such a
1372/// command would silently depend on redirect ordering at execution time
1373/// (`setup_stdin_redirects` is last-wins), so `parse()` rejects it loudly.
1374fn command_has_ambiguous_stdin(cmd: &Command) -> bool {
1375    cmd.redirects
1376        .iter()
1377        .filter(|r| {
1378            matches!(
1379                r.kind,
1380                RedirectKind::Stdin | RedirectKind::HereDoc | RedirectKind::HereString
1381            )
1382        })
1383        .count()
1384        > 1
1385}
1386
1387/// Find the first command anywhere in `stmts` (recursing into pipelines,
1388/// control-flow bodies, chains, and tool definitions) that has more than one
1389/// stdin source. Used by `parse()` to reject the ambiguity after parsing.
1390fn first_ambiguous_stdin(stmts: &[Stmt]) -> bool {
1391    stmts.iter().any(stmt_has_ambiguous_stdin)
1392}
1393
1394fn stmt_has_ambiguous_stdin(stmt: &Stmt) -> bool {
1395    match stmt {
1396        Stmt::Command(c) => command_has_ambiguous_stdin(c),
1397        Stmt::Pipeline(p) => p.commands.iter().any(command_has_ambiguous_stdin),
1398        Stmt::If(i) => {
1399            first_ambiguous_stdin(&i.then_branch)
1400                || i.else_branch
1401                    .as_deref()
1402                    .is_some_and(first_ambiguous_stdin)
1403        }
1404        Stmt::For(f) => first_ambiguous_stdin(&f.body),
1405        Stmt::While(w) => first_ambiguous_stdin(&w.body),
1406        Stmt::Case(c) => c.branches.iter().any(|b| first_ambiguous_stdin(&b.body)),
1407        Stmt::ToolDef(t) => first_ambiguous_stdin(&t.body),
1408        Stmt::AndChain { left, right } | Stmt::OrChain { left, right } => {
1409            stmt_has_ambiguous_stdin(left) || stmt_has_ambiguous_stdin(right)
1410        }
1411        Stmt::Assignment(_)
1412        | Stmt::Break(_)
1413        | Stmt::Continue(_)
1414        | Stmt::Return(_)
1415        | Stmt::Exit(_)
1416        | Stmt::Test(_)
1417        | Stmt::Empty => false,
1418    }
1419}
1420
1421/// Arguments list parser that handles `--` flag terminator.
1422///
1423/// After `--`, all subsequent flags are converted to positional string arguments.
1424fn args_list_parser<'tokens, I>(
1425) -> impl Parser<'tokens, I, Vec<Arg>, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1426where
1427    I: ValueInput<'tokens, Token = Token, Span = Span>,
1428{
1429    // Arguments before `--` (normal parsing)
1430    let pre_dash = arg_before_double_dash_parser()
1431        .repeated()
1432        .collect::<Vec<_>>();
1433
1434    // The `--` marker itself
1435    let double_dash = select! {
1436        Token::DoubleDash => Arg::DoubleDash,
1437    };
1438
1439    // Arguments after `--` (flags become positional strings)
1440    let post_dash_arg = choice((
1441        // Flags become positional strings
1442        select! {
1443            Token::ShortFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("-{}", name)))),
1444            Token::LongFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("--{}", name)))),
1445        },
1446        // Everything else stays the same
1447        primary_expr_parser().map(Arg::Positional),
1448    ));
1449
1450    let post_dash = post_dash_arg.repeated().collect::<Vec<_>>();
1451
1452    // Combine: args_before ++ [--] ++ args_after
1453    pre_dash
1454        .then(double_dash.then(post_dash).or_not())
1455        .map(|(mut args, maybe_dd)| {
1456            if let Some((dd, post)) = maybe_dd {
1457                args.push(dd);
1458                args.extend(post);
1459            }
1460            args
1461        })
1462}
1463
1464/// Argument parser for arguments before `--` (normal flag handling).
1465fn arg_before_double_dash_parser<'tokens, I>(
1466) -> impl Parser<'tokens, I, Arg, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1467where
1468    I: ValueInput<'tokens, Token = Token, Span = Span>,
1469{
1470    // Long flag with value: --name=value
1471    let long_flag_with_value = select! {
1472        Token::LongFlag(name) => name,
1473    }
1474    .then_ignore(just(Token::Eq))
1475    .then(primary_expr_parser())
1476    .map(|(key, value)| Arg::Named { key, value });
1477
1478    // Boolean long flag: --name
1479    let long_flag = select! {
1480        Token::LongFlag(name) => Arg::LongFlag(name),
1481    };
1482
1483    // Boolean short flag: -x
1484    let short_flag = select! {
1485        Token::ShortFlag(name) => Arg::ShortFlag(name),
1486    };
1487
1488    // Shell assignment in argv position: name=value (must not have spaces around =).
1489    // Produces Arg::WordAssign; the kernel routes it through tool_args.named
1490    // only for shell-assignment-accepting builtins (export, alias). For every
1491    // other command it materialises as a `"name=value"` positional, matching
1492    // bash semantics (`cat foo=bar` opens a file named `foo=bar`).
1493    let named = select! {
1494        Token::Ident(s) => s,
1495    }
1496    .map_with(|s, e| -> (String, Span) { (s, e.span()) })
1497    .then(just(Token::Eq).map_with(|_, e| -> Span { e.span() }))
1498    .then(primary_expr_parser().map_with(|expr, e| -> (Expr, Span) { (expr, e.span()) }))
1499    .try_map(|(((key, key_span), eq_span), (value, value_span)): (((String, Span), Span), (Expr, Span)), span| {
1500        // Check that key ends where = starts and = ends where value starts
1501        if key_span.end != eq_span.start || eq_span.end != value_span.start {
1502            Err(Rich::custom(
1503                span,
1504                "shell assignment must not have spaces around '=' (use 'key=value' not 'key = value')",
1505            ))
1506        } else {
1507            Ok(Arg::WordAssign { key, value })
1508        }
1509    });
1510
1511    // Positional argument
1512    let positional = primary_expr_parser().map(Arg::Positional);
1513
1514    // Order matters: try more specific patterns first
1515    // Note: DoubleDash is NOT included here - it's handled by args_list_parser
1516    choice((
1517        long_flag_with_value,
1518        long_flag,
1519        short_flag,
1520        named,
1521        positional,
1522    ))
1523    .boxed()
1524}
1525
1526/// Redirect: `> file`, `>> file`, `< file`, `<< heredoc`, `2> file`, `&> file`, `2>&1`
1527fn redirect_parser<'tokens, I>(
1528) -> impl Parser<'tokens, I, Redirect, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1529where
1530    I: ValueInput<'tokens, Token = Token, Span = Span>,
1531{
1532    // Regular redirects: >, >>, <, 2>, &>
1533    let regular_redirect = select! {
1534        Token::GtGt => RedirectKind::StdoutAppend,
1535        Token::Gt => RedirectKind::StdoutOverwrite,
1536        Token::Lt => RedirectKind::Stdin,
1537        Token::Stderr => RedirectKind::Stderr,
1538        Token::Both => RedirectKind::Both,
1539    }
1540    .then(primary_expr_parser())
1541    .map(|(kind, target)| Redirect { kind, target });
1542
1543    // Here-doc redirect: << content
1544    // Quoted delimiters (<<'EOF' or <<"EOF") produce literal heredocs (no expansion).
1545    // Unquoted delimiters produce interpolated heredocs (variables are expanded).
1546    // For literal heredocs the `<<-EOF` tab stripping is applied here at parse
1547    // time (the body is fully known); for interpolated heredocs the stripping
1548    // is deferred to the interpreter so source byte offsets in `parts` stay
1549    // aligned with the original source for span reporting.
1550    let heredoc_redirect = just(Token::HereDocStart)
1551        .ignore_then(select! { Token::HereDoc(data) => data })
1552        .map(|data: HereDocData| {
1553            let target = if data.literal {
1554                let body = if data.strip_tabs {
1555                    crate::interpreter::strip_leading_tabs(&data.content)
1556                } else {
1557                    data.content
1558                };
1559                Expr::Literal(Value::String(body))
1560            } else {
1561                let parts = parse_interpolated_string_spanned(
1562                    &data.content,
1563                    data.body_start_offset,
1564                );
1565                // If there's only one literal part and no tab stripping is
1566                // needed, simplify to Expr::Literal — keeps the AST shape
1567                // identical to the pre-spans path for trivial bodies.
1568                if parts.len() == 1 && !data.strip_tabs {
1569                    if let StringPart::Literal(text) = &parts[0].part {
1570                        return Redirect {
1571                            kind: RedirectKind::HereDoc,
1572                            target: Expr::Literal(Value::String(text.clone())),
1573                        };
1574                    }
1575                }
1576                Expr::HereDocBody {
1577                    parts,
1578                    strip_tabs: data.strip_tabs,
1579                }
1580            };
1581            Redirect {
1582                kind: RedirectKind::HereDoc,
1583                target,
1584            }
1585        });
1586
1587    // Here-string redirect: <<< word
1588    // The target is any single expression; kaish's existing Expr machinery
1589    // handles interpolation, single-quoted literals, and command substitution.
1590    let herestring_redirect = just(Token::HereString)
1591        .ignore_then(primary_expr_parser())
1592        .map(|target| Redirect {
1593            kind: RedirectKind::HereString,
1594            target,
1595        });
1596
1597    // Merge stderr to stdout: 2>&1 (no target needed - implicit)
1598    let merge_stderr_redirect = just(Token::StderrToStdout)
1599        .map(|_| Redirect {
1600            kind: RedirectKind::MergeStderr,
1601            // Target is unused for MergeStderr, but we need something
1602            target: Expr::Literal(Value::Null),
1603        });
1604
1605    // Merge stdout to stderr: 1>&2 or >&2 (no target needed - implicit)
1606    let merge_stdout_redirect = choice((
1607        just(Token::StdoutToStderr),
1608        just(Token::StdoutToStderr2),
1609    ))
1610    .map(|_| Redirect {
1611        kind: RedirectKind::MergeStdout,
1612        // Target is unused for MergeStdout, but we need something
1613        target: Expr::Literal(Value::Null),
1614    });
1615
1616    choice((
1617        heredoc_redirect,
1618        herestring_redirect,
1619        merge_stderr_redirect,
1620        merge_stdout_redirect,
1621        regular_redirect,
1622    ))
1623    .labelled("redirect")
1624    .boxed()
1625}
1626
1627/// Test expression parser for `[[ ... ]]` syntax.
1628///
1629/// Supports:
1630/// - File tests: `[[ -f path ]]`, `[[ -d path ]]`, etc.
1631/// - String tests: `[[ -z str ]]`, `[[ -n str ]]`
1632/// - Comparisons: `[[ $X == "value" ]]`, `[[ $NUM -gt 5 ]]`
1633/// - Compound: `[[ -f a && -d b ]]`, `[[ -z x || -n y ]]`, `[[ ! -f file ]]`
1634///
1635/// Precedence (highest to lowest): `!` > `&&` > `||`
1636fn test_expr_stmt_parser<'tokens, I>(
1637) -> impl Parser<'tokens, I, TestExpr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1638where
1639    I: ValueInput<'tokens, Token = Token, Span = Span>,
1640{
1641    // File test operators: -e, -f, -d, -r, -w, -x
1642    let file_test_op = select! {
1643        Token::ShortFlag(s) if s == "e" => FileTestOp::Exists,
1644        Token::ShortFlag(s) if s == "f" => FileTestOp::IsFile,
1645        Token::ShortFlag(s) if s == "d" => FileTestOp::IsDir,
1646        Token::ShortFlag(s) if s == "r" => FileTestOp::Readable,
1647        Token::ShortFlag(s) if s == "w" => FileTestOp::Writable,
1648        Token::ShortFlag(s) if s == "x" => FileTestOp::Executable,
1649    };
1650
1651    // String test operators: -z, -n
1652    let string_test_op = select! {
1653        Token::ShortFlag(s) if s == "z" => StringTestOp::IsEmpty,
1654        Token::ShortFlag(s) if s == "n" => StringTestOp::IsNonEmpty,
1655    };
1656
1657    // Comparison operators: =, ==, !=, =~, !~, >, <, >=, <=, -gt, -lt, -ge, -le, -eq, -ne
1658    // Note: = and == are equivalent inside [[ ]] (matching bash behavior)
1659    let cmp_op = choice((
1660        just(Token::EqEq).to(TestCmpOp::Eq),
1661        just(Token::Eq).to(TestCmpOp::Eq),
1662        just(Token::NotEq).to(TestCmpOp::NotEq),
1663        just(Token::Match).to(TestCmpOp::Match),
1664        just(Token::NotMatch).to(TestCmpOp::NotMatch),
1665        just(Token::Gt).to(TestCmpOp::Gt),
1666        just(Token::Lt).to(TestCmpOp::Lt),
1667        just(Token::GtEq).to(TestCmpOp::GtEq),
1668        just(Token::LtEq).to(TestCmpOp::LtEq),
1669        select! { Token::ShortFlag(s) if s == "eq" => TestCmpOp::NumEq },
1670        select! { Token::ShortFlag(s) if s == "ne" => TestCmpOp::NumNotEq },
1671        select! { Token::ShortFlag(s) if s == "gt" => TestCmpOp::NumGt },
1672        select! { Token::ShortFlag(s) if s == "lt" => TestCmpOp::NumLt },
1673        select! { Token::ShortFlag(s) if s == "ge" => TestCmpOp::NumGtEq },
1674        select! { Token::ShortFlag(s) if s == "le" => TestCmpOp::NumLtEq },
1675    ));
1676
1677    // File test: -f path
1678    let file_test = file_test_op
1679        .then(primary_expr_parser())
1680        .map(|(op, path)| TestExpr::FileTest {
1681            op,
1682            path: Box::new(path),
1683        });
1684
1685    // String test: -z str
1686    let string_test = string_test_op
1687        .then(primary_expr_parser())
1688        .map(|(op, value)| TestExpr::StringTest {
1689            op,
1690            value: Box::new(value),
1691        });
1692
1693    // Comparison: $X == "value" or $NUM -gt 5
1694    let comparison = primary_expr_parser()
1695        .then(cmp_op)
1696        .then(primary_expr_parser())
1697        .map(|((left, op), right)| TestExpr::Comparison {
1698            left: Box::new(left),
1699            op,
1700            right: Box::new(right),
1701        });
1702
1703    // Primary test expression (atomic - no compound operators)
1704    let primary_test = choice((file_test, string_test, comparison));
1705
1706    // Build compound expressions with proper precedence:
1707    // Grammar:
1708    //   test_expr = or_expr
1709    //   or_expr   = and_expr { "||" and_expr }
1710    //   and_expr  = unary_expr { "&&" unary_expr }
1711    //   unary_expr = "!" unary_expr | primary_test
1712    //
1713    // Precedence: ! (highest) > && > ||
1714
1715    // Use recursive for the unary NOT operator
1716    let compound_test = recursive(|compound| {
1717        // Unary NOT: ! expr (can be chained: ! ! expr)
1718        let not_expr = just(Token::Bang)
1719            .ignore_then(compound.clone())
1720            .map(|expr| TestExpr::Not { expr: Box::new(expr) });
1721
1722        // Unary level: ! or primary
1723        let unary = choice((not_expr, primary_test.clone()));
1724
1725        // AND level: unary && unary && ...
1726        let and_expr = unary.clone().foldl(
1727            just(Token::And).ignore_then(unary).repeated(),
1728            |left, right| TestExpr::And {
1729                left: Box::new(left),
1730                right: Box::new(right),
1731            },
1732        );
1733
1734        // OR level: and_expr || and_expr || ...
1735        and_expr.clone().foldl(
1736            just(Token::Or).ignore_then(and_expr).repeated(),
1737            |left, right| TestExpr::Or {
1738                left: Box::new(left),
1739                right: Box::new(right),
1740            },
1741        )
1742    });
1743
1744    // [[ ]] is two consecutive bracket tokens (not a single TestStart token)
1745    // to avoid conflicts with nested array syntax like [[1, 2], [3, 4]]
1746    just(Token::LBracket)
1747        .then(just(Token::LBracket))
1748        .ignore_then(compound_test)
1749        .then_ignore(just(Token::RBracket).then(just(Token::RBracket)))
1750        .labelled("test expression")
1751        .boxed()
1752}
1753
1754/// Condition parser: supports [[ ]] test expressions and commands with && / || chaining.
1755///
1756/// Shell semantics: conditions are commands whose exit codes determine truthiness.
1757/// - `if true; then` → runs `true` builtin, exit code 0 = truthy
1758/// - `if grep -q pattern file; then` → runs command, checks exit code
1759/// - `if a && b; then` → runs `a`, if exit 0, runs `b`
1760///
1761/// Use `[[ ]]` for comparisons: `if [[ $X -gt 5 ]]; then`
1762///
1763/// Grammar (with precedence - && binds tighter than ||):
1764///   condition = or_expr
1765///   or_expr   = and_expr { "||" and_expr }
1766///   and_expr  = base { "&&" base }
1767///   base      = test_expr | command
1768fn condition_parser<'tokens, I>(
1769) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1770where
1771    I: ValueInput<'tokens, Token = Token, Span = Span>,
1772{
1773    // [[ ]] test expression - wrap as Expr::Test
1774    let test_expr_condition = test_expr_stmt_parser().map(|test| Expr::Test(Box::new(test)));
1775
1776    // Command as condition (includes true/false as command names)
1777    // The command's exit code determines truthiness (0 = true, non-zero = false)
1778    let command_condition = command_parser().map(Expr::Command);
1779
1780    // Base: test expr OR command
1781    let base = choice((test_expr_condition, command_condition));
1782
1783    // && has higher precedence than ||
1784    // First chain with && (higher precedence)
1785    let and_expr = base.clone().foldl(
1786        just(Token::And).ignore_then(base).repeated(),
1787        |left, right| Expr::BinaryOp {
1788            left: Box::new(left),
1789            op: BinaryOp::And,
1790            right: Box::new(right),
1791        },
1792    );
1793
1794    // Then chain with || (lower precedence)
1795    and_expr
1796        .clone()
1797        .foldl(
1798            just(Token::Or).ignore_then(and_expr).repeated(),
1799            |left, right| Expr::BinaryOp {
1800                left: Box::new(left),
1801                op: BinaryOp::Or,
1802                right: Box::new(right),
1803            },
1804        )
1805        .labelled("condition")
1806        .boxed()
1807}
1808
1809/// Expression parser - supports && and || binary operators.
1810fn expr_parser<'tokens, I>(
1811) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1812where
1813    I: ValueInput<'tokens, Token = Token, Span = Span>,
1814{
1815    // For now, just primary expressions. Can extend for && / || later if needed.
1816    primary_expr_parser()
1817}
1818
1819/// Primary expression: literal, variable reference, command substitution, or bare identifier.
1820///
1821/// Uses `recursive` to support nested command substitution like `$(echo $(date))`.
1822fn primary_expr_parser<'tokens, I>(
1823) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1824where
1825    I: ValueInput<'tokens, Token = Token, Span = Span>,
1826{
1827    // Positional parameters: $0-$9, $@, $#, ${#VAR}, $?, $$
1828    let positional = select! {
1829        Token::Positional(n) => Expr::Positional(n),
1830        Token::AllArgs => Expr::AllArgs,
1831        Token::ArgCount => Expr::ArgCount,
1832        Token::VarLength(name) => Expr::VarLength(name),
1833        Token::LastExitCode => Expr::LastExitCode,
1834        Token::CurrentPid => Expr::CurrentPid,
1835    };
1836
1837    // Arithmetic expression: $((expr)) - preprocessed into Arithmetic token
1838    let arithmetic = select! {
1839        Token::Arithmetic(expr_str) => Expr::Arithmetic(expr_str),
1840    };
1841
1842    // Keywords that can also be used as barewords in argument position
1843    // (e.g., `echo done` should work even though `done` is a keyword)
1844    let keyword_as_bareword = select! {
1845        Token::Done => "done",
1846        Token::Fi => "fi",
1847        Token::Then => "then",
1848        Token::Else => "else",
1849        Token::Elif => "elif",
1850        Token::In => "in",
1851        Token::Do => "do",
1852        Token::Esac => "esac",
1853    }
1854    .map(|s| Expr::Literal(Value::String(s.to_string())));
1855
1856    // Bare words starting with + or - (e.g., date +%s, cat -)
1857    let plus_minus_bare = select! {
1858        Token::PlusBare(s) => Expr::Literal(Value::String(s)),
1859        Token::MinusBare(s) => Expr::Literal(Value::String(s)),
1860        Token::MinusAlone => Expr::Literal(Value::String("-".to_string())),
1861    };
1862
1863    // Glob patterns: merged GlobWord tokens and bare Star/Question
1864    let glob_pattern = select! {
1865        Token::GlobWord(s) => Expr::GlobPattern(s),
1866        Token::Star => Expr::GlobPattern("*".to_string()),
1867        Token::Question => Expr::GlobPattern("?".to_string()),
1868    };
1869
1870    recursive(|expr| {
1871        choice((
1872            positional,
1873            arithmetic,
1874            cmd_subst_parser(expr.clone()),
1875            var_expr_parser(),
1876            interpolated_string_parser(),
1877            literal_parser().map(Expr::Literal),
1878            // Glob patterns before ident (GlobWord is more specific)
1879            glob_pattern,
1880            // Bare identifiers become string literals (shell barewords)
1881            ident_parser().map(|s| Expr::Literal(Value::String(s))),
1882            // Absolute paths become string literals
1883            path_parser().map(|s| Expr::Literal(Value::String(s))),
1884            // Bare words starting with + or - (date +%s, cat -)
1885            // Shell navigation tokens
1886            select! {
1887                // Bare `.` in argument/expression position is the literal
1888                // current-directory path (`find .`, `ls .`, `echo .`). The
1889                // `source` alias is unaffected: `command_parser` consumes a
1890                // *leading* `.` as the command name before args are parsed,
1891                // so only a `.` that follows a command reaches here.
1892                Token::Dot => Expr::Literal(Value::String(".".into())),
1893                Token::DotDot => Expr::Literal(Value::String("..".into())),
1894                Token::Tilde => Expr::Literal(Value::String("~".into())),
1895                Token::TildePath(s) => Expr::Literal(Value::String(s)),
1896                Token::RelativePath(s) => Expr::Literal(Value::String(s)),
1897                Token::DotSlashPath(s) => Expr::Literal(Value::String(s)),
1898                // Digit-leading bareword (SHA prefix `019dda1c`, UUIDs).
1899                Token::NumberIdent(s) => Expr::Literal(Value::String(s)),
1900                // Dot-prefixed bareword (`.gitignore`, `.parent`, `.parent.parent`).
1901                // Distinct from `Token::Dot` (the source alias), which only
1902                // matches a bare `.` and requires whitespace before its file
1903                // argument.
1904                Token::DottedIdent(s) => Expr::Literal(Value::String(s)),
1905            },
1906            plus_minus_bare,
1907            // Keywords can be used as barewords in argument position
1908            keyword_as_bareword,
1909        ))
1910        .labelled("expression")
1911    })
1912    .boxed()
1913}
1914
1915/// Variable reference: `${VAR}`, `${VAR.field}`, `${VAR:-default}`, or `$VAR` (simple form).
1916/// Returns Expr directly to support both VarRef and VarWithDefault.
1917fn var_expr_parser<'tokens, I>(
1918) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1919where
1920    I: ValueInput<'tokens, Token = Token, Span = Span>,
1921{
1922    select! {
1923        Token::VarRef(raw) => parse_var_expr(&raw),
1924        Token::SimpleVarRef(name) => Expr::VarRef(VarPath::simple(name)),
1925    }
1926    .labelled("variable reference")
1927}
1928
1929/// Command substitution: `$(pipeline)` - runs a pipeline and returns its result.
1930///
1931/// Accepts a recursive expression parser to support nested command substitution.
1932fn cmd_subst_parser<'tokens, I, E>(
1933    expr: E,
1934) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1935where
1936    I: ValueInput<'tokens, Token = Token, Span = Span>,
1937    E: Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone,
1938{
1939    // Argument parser using the recursive expression parser
1940    // Long flag with value: --name=value
1941    let long_flag_with_value = select! {
1942        Token::LongFlag(name) => name,
1943    }
1944    .then_ignore(just(Token::Eq))
1945    .then(expr.clone())
1946    .map(|(key, value)| Arg::Named { key, value });
1947
1948    // Boolean long flag: --name
1949    let long_flag = select! {
1950        Token::LongFlag(name) => Arg::LongFlag(name),
1951    };
1952
1953    // Boolean short flag: -x
1954    let short_flag = select! {
1955        Token::ShortFlag(name) => Arg::ShortFlag(name),
1956    };
1957
1958    // Shell assignment in argv position: name=value (see arg_before_double_dash_parser).
1959    let named = ident_parser()
1960        .then_ignore(just(Token::Eq))
1961        .then(expr.clone())
1962        .map(|(key, value)| Arg::WordAssign { key, value });
1963
1964    // Positional argument
1965    let positional = expr.map(Arg::Positional);
1966
1967    let arg = choice((
1968        long_flag_with_value,
1969        long_flag,
1970        short_flag,
1971        named,
1972        positional,
1973    ));
1974
1975    // Command name parser - accepts identifiers and boolean keywords (true/false are builtins)
1976    let command_name = choice((
1977        ident_parser(),
1978        just(Token::True).to("true".to_string()),
1979        just(Token::False).to("false".to_string()),
1980    ));
1981
1982    // Command parser
1983    let command = command_name
1984        .then(arg.repeated().collect::<Vec<_>>())
1985        .map(|(name, args)| Command {
1986            name,
1987            args,
1988            redirects: vec![],
1989        });
1990
1991    // Pipeline parser
1992    let pipeline = command
1993        .separated_by(just(Token::Pipe))
1994        .at_least(1)
1995        .collect::<Vec<_>>()
1996        .map(|commands| Pipeline {
1997            commands,
1998            background: false,
1999        });
2000
2001    just(Token::CmdSubstStart)
2002        .ignore_then(pipeline)
2003        .then_ignore(just(Token::RParen))
2004        .map(|pipeline| Expr::CommandSubst(Box::new(pipeline)))
2005        .labelled("command substitution")
2006}
2007
2008/// String parser - handles double-quoted strings (with interpolation) and single-quoted (literal).
2009fn interpolated_string_parser<'tokens, I>(
2010) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2011where
2012    I: ValueInput<'tokens, Token = Token, Span = Span>,
2013{
2014    // Double-quoted string: may contain $VAR or ${VAR} interpolation
2015    let double_quoted = select! {
2016        Token::String(s) => s,
2017    }
2018    .map(|s| {
2019        // Check if string contains interpolation markers (${} or $NAME) or escaped dollars
2020        if s.contains('$') || s.contains("__KAISH_ESCAPED_DOLLAR__") {
2021            // Parse interpolated parts
2022            let parts = parse_interpolated_string(&s);
2023            if parts.len() == 1
2024                && let StringPart::Literal(text) = &parts[0] {
2025                    return Expr::Literal(Value::String(text.clone()));
2026                }
2027            Expr::Interpolated(parts)
2028        } else {
2029            Expr::Literal(Value::String(s))
2030        }
2031    });
2032
2033    // Single-quoted string: literal, no interpolation
2034    let single_quoted = select! {
2035        Token::SingleString(s) => Expr::Literal(Value::String(s)),
2036    };
2037
2038    choice((single_quoted, double_quoted)).labelled("string")
2039}
2040
2041/// Literal value parser (excluding strings, which are handled by interpolated_string_parser).
2042fn literal_parser<'tokens, I>(
2043) -> impl Parser<'tokens, I, Value, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2044where
2045    I: ValueInput<'tokens, Token = Token, Span = Span>,
2046{
2047    choice((
2048        select! {
2049            Token::True => Value::Bool(true),
2050            Token::False => Value::Bool(false),
2051        },
2052        select! {
2053            Token::Int(n) => Value::Int(n),
2054            Token::Float(f) => Value::Float(f),
2055        },
2056    ))
2057    .labelled("literal")
2058    .boxed()
2059}
2060
2061/// Identifier parser.
2062fn ident_parser<'tokens, I>(
2063) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2064where
2065    I: ValueInput<'tokens, Token = Token, Span = Span>,
2066{
2067    select! {
2068        Token::Ident(s) => s,
2069    }
2070    .labelled("identifier")
2071}
2072
2073/// Path parser: matches absolute paths like `/tmp/out`, `/etc/hosts`.
2074fn path_parser<'tokens, I>(
2075) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2076where
2077    I: ValueInput<'tokens, Token = Token, Span = Span>,
2078{
2079    select! {
2080        Token::Path(s) => s,
2081    }
2082    .labelled("path")
2083}
2084
2085#[cfg(test)]
2086mod tests {
2087    use super::*;
2088
2089    #[test]
2090    fn parse_empty() {
2091        let result = parse("");
2092        assert!(result.is_ok());
2093        assert_eq!(result.expect("ok").statements.len(), 0);
2094    }
2095
2096    #[test]
2097    fn parse_newlines_only() {
2098        let result = parse("\n\n\n");
2099        assert!(result.is_ok());
2100    }
2101
2102    #[test]
2103    fn parse_simple_command() {
2104        let result = parse("echo");
2105        assert!(result.is_ok());
2106        let program = result.expect("ok");
2107        assert_eq!(program.statements.len(), 1);
2108        assert!(matches!(&program.statements[0], Stmt::Command(_)));
2109    }
2110
2111    #[test]
2112    fn parse_command_with_string_arg() {
2113        let result = parse(r#"echo "hello""#);
2114        assert!(result.is_ok());
2115        let program = result.expect("ok");
2116        match &program.statements[0] {
2117            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 1),
2118            _ => panic!("expected Command"),
2119        }
2120    }
2121
2122    #[test]
2123    fn parse_assignment() {
2124        let result = parse("X=5");
2125        assert!(result.is_ok());
2126        let program = result.expect("ok");
2127        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
2128    }
2129
2130    #[test]
2131    fn parse_pipeline() {
2132        let result = parse("a | b | c");
2133        assert!(result.is_ok());
2134        let program = result.expect("ok");
2135        match &program.statements[0] {
2136            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2137            _ => panic!("expected Pipeline"),
2138        }
2139    }
2140
2141    #[test]
2142    fn parse_background_job() {
2143        let result = parse("cmd &");
2144        assert!(result.is_ok());
2145        let program = result.expect("ok");
2146        match &program.statements[0] {
2147            Stmt::Pipeline(p) => assert!(p.background),
2148            _ => panic!("expected Pipeline with background"),
2149        }
2150    }
2151
2152    #[test]
2153    fn parse_if_simple() {
2154        let result = parse("if true; then echo; fi");
2155        assert!(result.is_ok());
2156        let program = result.expect("ok");
2157        assert!(matches!(&program.statements[0], Stmt::If(_)));
2158    }
2159
2160    #[test]
2161    fn parse_if_else() {
2162        let result = parse("if true; then echo; else echo; fi");
2163        assert!(result.is_ok());
2164        let program = result.expect("ok");
2165        match &program.statements[0] {
2166            Stmt::If(if_stmt) => assert!(if_stmt.else_branch.is_some()),
2167            _ => panic!("expected If"),
2168        }
2169    }
2170
2171    #[test]
2172    fn parse_elif_simple() {
2173        let result = parse("if true; then echo a; elif false; then echo b; fi");
2174        assert!(result.is_ok(), "parse failed: {:?}", result);
2175        let program = result.expect("ok");
2176        match &program.statements[0] {
2177            Stmt::If(if_stmt) => {
2178                // elif is desugared to nested if in else
2179                assert!(if_stmt.else_branch.is_some());
2180                let else_branch = if_stmt.else_branch.as_ref().unwrap();
2181                assert_eq!(else_branch.len(), 1);
2182                assert!(matches!(&else_branch[0], Stmt::If(_)));
2183            }
2184            _ => panic!("expected If"),
2185        }
2186    }
2187
2188    #[test]
2189    fn parse_elif_with_else() {
2190        let result = parse("if true; then echo a; elif false; then echo b; else echo c; fi");
2191        assert!(result.is_ok(), "parse failed: {:?}", result);
2192        let program = result.expect("ok");
2193        match &program.statements[0] {
2194            Stmt::If(outer_if) => {
2195                // Check nested structure: if -> elif -> else
2196                let else_branch = outer_if.else_branch.as_ref().expect("outer else");
2197                assert_eq!(else_branch.len(), 1);
2198                match &else_branch[0] {
2199                    Stmt::If(inner_if) => {
2200                        // The inner if (from elif) should have the final else
2201                        assert!(inner_if.else_branch.is_some());
2202                    }
2203                    _ => panic!("expected nested If from elif"),
2204                }
2205            }
2206            _ => panic!("expected If"),
2207        }
2208    }
2209
2210    #[test]
2211    fn parse_multiple_elif() {
2212        // Shell-compatible: use [[ ]] for comparisons
2213        let result = parse(
2214            "if [[ ${X} == 1 ]]; then echo one; elif [[ ${X} == 2 ]]; then echo two; elif [[ ${X} == 3 ]]; then echo three; else echo other; fi",
2215        );
2216        assert!(result.is_ok(), "parse failed: {:?}", result);
2217    }
2218
2219    #[test]
2220    fn parse_for_loop() {
2221        let result = parse("for X in items; do echo; done");
2222        assert!(result.is_ok());
2223        let program = result.expect("ok");
2224        assert!(matches!(&program.statements[0], Stmt::For(_)));
2225    }
2226
2227    #[test]
2228    fn parse_brackets_not_array_literal() {
2229        // Array literals are no longer supported, [ is just a regular char
2230        let result = parse("cmd [1");
2231        // This should fail or parse unexpectedly - arrays are removed
2232        // Just verify we don't crash
2233        let _ = result;
2234    }
2235
2236    #[test]
2237    fn parse_named_arg() {
2238        // Bareword key=value parses as WordAssign — the kernel decides per
2239        // command whether to route it to tool_args.named (export/alias) or
2240        // stringify to a positional (every other builtin).
2241        let result = parse("cmd foo=5");
2242        assert!(result.is_ok());
2243        let program = result.expect("ok");
2244        match &program.statements[0] {
2245            Stmt::Command(cmd) => {
2246                assert_eq!(cmd.args.len(), 1);
2247                assert!(matches!(&cmd.args[0], Arg::WordAssign { .. }));
2248            }
2249            _ => panic!("expected Command"),
2250        }
2251    }
2252
2253    #[test]
2254    fn parse_short_flag() {
2255        let result = parse("ls -l");
2256        assert!(result.is_ok());
2257        let program = result.expect("ok");
2258        match &program.statements[0] {
2259            Stmt::Command(cmd) => {
2260                assert_eq!(cmd.name, "ls");
2261                assert_eq!(cmd.args.len(), 1);
2262                match &cmd.args[0] {
2263                    Arg::ShortFlag(name) => assert_eq!(name, "l"),
2264                    _ => panic!("expected ShortFlag"),
2265                }
2266            }
2267            _ => panic!("expected Command"),
2268        }
2269    }
2270
2271    #[test]
2272    fn parse_long_flag() {
2273        let result = parse("git push --force");
2274        assert!(result.is_ok());
2275        let program = result.expect("ok");
2276        match &program.statements[0] {
2277            Stmt::Command(cmd) => {
2278                assert_eq!(cmd.name, "git");
2279                assert_eq!(cmd.args.len(), 2);
2280                match &cmd.args[0] {
2281                    Arg::Positional(Expr::Literal(Value::String(s))) => assert_eq!(s, "push"),
2282                    _ => panic!("expected Positional push"),
2283                }
2284                match &cmd.args[1] {
2285                    Arg::LongFlag(name) => assert_eq!(name, "force"),
2286                    _ => panic!("expected LongFlag"),
2287                }
2288            }
2289            _ => panic!("expected Command"),
2290        }
2291    }
2292
2293    #[test]
2294    fn parse_long_flag_with_value() {
2295        let result = parse(r#"git commit --message="hello""#);
2296        assert!(result.is_ok());
2297        let program = result.expect("ok");
2298        match &program.statements[0] {
2299            Stmt::Command(cmd) => {
2300                assert_eq!(cmd.name, "git");
2301                assert_eq!(cmd.args.len(), 2);
2302                match &cmd.args[1] {
2303                    Arg::Named { key, value } => {
2304                        assert_eq!(key, "message");
2305                        match value {
2306                            Expr::Literal(Value::String(s)) => assert_eq!(s, "hello"),
2307                            _ => panic!("expected String value"),
2308                        }
2309                    }
2310                    _ => panic!("expected Named from --flag=value"),
2311                }
2312            }
2313            _ => panic!("expected Command"),
2314        }
2315    }
2316
2317    #[test]
2318    fn parse_mixed_flags_and_args() {
2319        let result = parse(r#"git commit -m "message" --amend"#);
2320        assert!(result.is_ok());
2321        let program = result.expect("ok");
2322        match &program.statements[0] {
2323            Stmt::Command(cmd) => {
2324                assert_eq!(cmd.name, "git");
2325                assert_eq!(cmd.args.len(), 4);
2326                // commit (positional)
2327                assert!(matches!(&cmd.args[0], Arg::Positional(_)));
2328                // -m (short flag)
2329                match &cmd.args[1] {
2330                    Arg::ShortFlag(name) => assert_eq!(name, "m"),
2331                    _ => panic!("expected ShortFlag -m"),
2332                }
2333                // "message" (positional)
2334                assert!(matches!(&cmd.args[2], Arg::Positional(_)));
2335                // --amend (long flag)
2336                match &cmd.args[3] {
2337                    Arg::LongFlag(name) => assert_eq!(name, "amend"),
2338                    _ => panic!("expected LongFlag --amend"),
2339                }
2340            }
2341            _ => panic!("expected Command"),
2342        }
2343    }
2344
2345    #[test]
2346    fn parse_redirect_stdout() {
2347        let result = parse("cmd > file");
2348        assert!(result.is_ok());
2349        let program = result.expect("ok");
2350        // Commands with redirects stay as Pipeline, not Command
2351        match &program.statements[0] {
2352            Stmt::Pipeline(p) => {
2353                assert_eq!(p.commands.len(), 1);
2354                let cmd = &p.commands[0];
2355                assert_eq!(cmd.redirects.len(), 1);
2356                assert!(matches!(cmd.redirects[0].kind, RedirectKind::StdoutOverwrite));
2357            }
2358            _ => panic!("expected Pipeline"),
2359        }
2360    }
2361
2362    #[test]
2363    fn parse_var_ref() {
2364        let result = parse("echo ${VAR}");
2365        assert!(result.is_ok());
2366        let program = result.expect("ok");
2367        match &program.statements[0] {
2368            Stmt::Command(cmd) => {
2369                assert_eq!(cmd.args.len(), 1);
2370                assert!(matches!(&cmd.args[0], Arg::Positional(Expr::VarRef(_))));
2371            }
2372            _ => panic!("expected Command"),
2373        }
2374    }
2375
2376    #[test]
2377    fn parse_multiple_statements() {
2378        let result = parse("a\nb\nc");
2379        assert!(result.is_ok());
2380        let program = result.expect("ok");
2381        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2382        assert_eq!(non_empty.len(), 3);
2383    }
2384
2385    #[test]
2386    fn parse_semicolon_separated() {
2387        let result = parse("a; b; c");
2388        assert!(result.is_ok());
2389        let program = result.expect("ok");
2390        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2391        assert_eq!(non_empty.len(), 3);
2392    }
2393
2394    #[test]
2395    fn parse_complex_pipeline() {
2396        let result = parse(r#"cat file | grep pattern="foo" | head count=10"#);
2397        assert!(result.is_ok());
2398        let program = result.expect("ok");
2399        match &program.statements[0] {
2400            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2401            _ => panic!("expected Pipeline"),
2402        }
2403    }
2404
2405    #[test]
2406    fn parse_json_as_string_arg() {
2407        // JSON arrays/objects should be passed as string arguments
2408        let result = parse(r#"cmd '[[1, 2], [3, 4]]'"#);
2409        assert!(result.is_ok());
2410    }
2411
2412    #[test]
2413    fn parse_mixed_args() {
2414        let result = parse(r#"cmd pos1 key="val" pos2 num=42"#);
2415        assert!(result.is_ok());
2416        let program = result.expect("ok");
2417        match &program.statements[0] {
2418            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 4),
2419            _ => panic!("expected Command"),
2420        }
2421    }
2422
2423    #[test]
2424    fn error_unterminated_string() {
2425        let result = parse(r#"echo "hello"#);
2426        assert!(result.is_err());
2427    }
2428
2429    #[test]
2430    fn error_unterminated_var_ref() {
2431        let result = parse("echo ${VAR");
2432        assert!(result.is_err());
2433    }
2434
2435    #[test]
2436    fn error_missing_fi() {
2437        let result = parse("if true; then echo");
2438        assert!(result.is_err());
2439    }
2440
2441    #[test]
2442    fn error_missing_done() {
2443        let result = parse("for X in items; do echo");
2444        assert!(result.is_err());
2445    }
2446
2447    #[test]
2448    fn parse_nested_cmd_subst() {
2449        // Nested command substitution is supported
2450        let result = parse("X=$(echo $(date))").unwrap();
2451        match &result.statements[0] {
2452            Stmt::Assignment(a) => {
2453                assert_eq!(a.name, "X");
2454                match &a.value {
2455                    Expr::CommandSubst(outer) => {
2456                        assert_eq!(outer.commands[0].name, "echo");
2457                        // The argument should be another command substitution
2458                        match &outer.commands[0].args[0] {
2459                            Arg::Positional(Expr::CommandSubst(inner)) => {
2460                                assert_eq!(inner.commands[0].name, "date");
2461                            }
2462                            other => panic!("expected nested cmd subst, got {:?}", other),
2463                        }
2464                    }
2465                    other => panic!("expected cmd subst, got {:?}", other),
2466                }
2467            }
2468            other => panic!("expected assignment, got {:?}", other),
2469        }
2470    }
2471
2472    #[test]
2473    fn parse_deeply_nested_cmd_subst() {
2474        // Three levels deep
2475        let result = parse("X=$(a $(b $(c)))").unwrap();
2476        match &result.statements[0] {
2477            Stmt::Assignment(a) => match &a.value {
2478                Expr::CommandSubst(level1) => {
2479                    assert_eq!(level1.commands[0].name, "a");
2480                    match &level1.commands[0].args[0] {
2481                        Arg::Positional(Expr::CommandSubst(level2)) => {
2482                            assert_eq!(level2.commands[0].name, "b");
2483                            match &level2.commands[0].args[0] {
2484                                Arg::Positional(Expr::CommandSubst(level3)) => {
2485                                    assert_eq!(level3.commands[0].name, "c");
2486                                }
2487                                other => panic!("expected level3 cmd subst, got {:?}", other),
2488                            }
2489                        }
2490                        other => panic!("expected level2 cmd subst, got {:?}", other),
2491                    }
2492                }
2493                other => panic!("expected cmd subst, got {:?}", other),
2494            },
2495            other => panic!("expected assignment, got {:?}", other),
2496        }
2497    }
2498
2499    // ═══════════════════════════════════════════════════════════════════════════
2500    // Value Preservation Tests - These test that actual values are captured
2501    // ═══════════════════════════════════════════════════════════════════════════
2502
2503    #[test]
2504    fn value_int_preserved() {
2505        let result = parse("X=42").unwrap();
2506        match &result.statements[0] {
2507            Stmt::Assignment(a) => {
2508                assert_eq!(a.name, "X");
2509                match &a.value {
2510                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2511                    other => panic!("expected int literal, got {:?}", other),
2512                }
2513            }
2514            other => panic!("expected assignment, got {:?}", other),
2515        }
2516    }
2517
2518    #[test]
2519    fn value_negative_int_preserved() {
2520        let result = parse("X=-99").unwrap();
2521        match &result.statements[0] {
2522            Stmt::Assignment(a) => match &a.value {
2523                Expr::Literal(Value::Int(n)) => assert_eq!(*n, -99),
2524                other => panic!("expected int, got {:?}", other),
2525            },
2526            other => panic!("expected assignment, got {:?}", other),
2527        }
2528    }
2529
2530    #[test]
2531    fn value_float_preserved() {
2532        let result = parse("PI=3.14").unwrap();
2533        match &result.statements[0] {
2534            Stmt::Assignment(a) => match &a.value {
2535                Expr::Literal(Value::Float(f)) => assert!((*f - 3.14).abs() < 0.001),
2536                other => panic!("expected float, got {:?}", other),
2537            },
2538            other => panic!("expected assignment, got {:?}", other),
2539        }
2540    }
2541
2542    #[test]
2543    fn value_string_preserved() {
2544        let result = parse(r#"echo "hello world""#).unwrap();
2545        match &result.statements[0] {
2546            Stmt::Command(cmd) => {
2547                assert_eq!(cmd.name, "echo");
2548                match &cmd.args[0] {
2549                    Arg::Positional(Expr::Literal(Value::String(s))) => {
2550                        assert_eq!(s, "hello world");
2551                    }
2552                    other => panic!("expected string arg, got {:?}", other),
2553                }
2554            }
2555            other => panic!("expected command, got {:?}", other),
2556        }
2557    }
2558
2559    #[test]
2560    fn value_string_with_escapes_preserved() {
2561        let result = parse(r#"echo "line1\nline2""#).unwrap();
2562        match &result.statements[0] {
2563            Stmt::Command(cmd) => match &cmd.args[0] {
2564                Arg::Positional(Expr::Literal(Value::String(s))) => {
2565                    assert_eq!(s, "line1\nline2");
2566                }
2567                other => panic!("expected string, got {:?}", other),
2568            },
2569            other => panic!("expected command, got {:?}", other),
2570        }
2571    }
2572
2573    #[test]
2574    fn value_command_name_preserved() {
2575        let result = parse("my-command").unwrap();
2576        match &result.statements[0] {
2577            Stmt::Command(cmd) => assert_eq!(cmd.name, "my-command"),
2578            other => panic!("expected command, got {:?}", other),
2579        }
2580    }
2581
2582    #[test]
2583    fn value_assignment_name_preserved() {
2584        let result = parse("MY_VAR=1").unwrap();
2585        match &result.statements[0] {
2586            Stmt::Assignment(a) => assert_eq!(a.name, "MY_VAR"),
2587            other => panic!("expected assignment, got {:?}", other),
2588        }
2589    }
2590
2591    #[test]
2592    fn value_for_variable_preserved() {
2593        let result = parse("for ITEM in items; do echo; done").unwrap();
2594        match &result.statements[0] {
2595            Stmt::For(f) => assert_eq!(f.variable, "ITEM"),
2596            other => panic!("expected for, got {:?}", other),
2597        }
2598    }
2599
2600    #[test]
2601    fn value_varref_name_preserved() {
2602        let result = parse("echo ${MESSAGE}").unwrap();
2603        match &result.statements[0] {
2604            Stmt::Command(cmd) => match &cmd.args[0] {
2605                Arg::Positional(Expr::VarRef(path)) => {
2606                    assert_eq!(path.segments.len(), 1);
2607                    let VarSegment::Field(name) = &path.segments[0];
2608                    assert_eq!(name, "MESSAGE");
2609                }
2610                other => panic!("expected varref, got {:?}", other),
2611            },
2612            other => panic!("expected command, got {:?}", other),
2613        }
2614    }
2615
2616    #[test]
2617    fn value_varref_field_access_preserved() {
2618        let result = parse("echo ${RESULT.data}").unwrap();
2619        match &result.statements[0] {
2620            Stmt::Command(cmd) => match &cmd.args[0] {
2621                Arg::Positional(Expr::VarRef(path)) => {
2622                    assert_eq!(path.segments.len(), 2);
2623                    let VarSegment::Field(a) = &path.segments[0];
2624                    let VarSegment::Field(b) = &path.segments[1];
2625                    assert_eq!(a, "RESULT");
2626                    assert_eq!(b, "data");
2627                }
2628                other => panic!("expected varref, got {:?}", other),
2629            },
2630            other => panic!("expected command, got {:?}", other),
2631        }
2632    }
2633
2634    #[test]
2635    fn value_varref_index_ignored() {
2636        // Index segments are no longer supported - they're filtered out by parse_varpath
2637        let result = parse("echo ${ITEMS[0]}").unwrap();
2638        match &result.statements[0] {
2639            Stmt::Command(cmd) => match &cmd.args[0] {
2640                Arg::Positional(Expr::VarRef(path)) => {
2641                    // Index segment [0] is skipped, only ITEMS remains
2642                    assert_eq!(path.segments.len(), 1);
2643                    let VarSegment::Field(name) = &path.segments[0];
2644                    assert_eq!(name, "ITEMS");
2645                }
2646                other => panic!("expected varref, got {:?}", other),
2647            },
2648            other => panic!("expected command, got {:?}", other),
2649        }
2650    }
2651
2652    #[test]
2653    fn value_named_arg_preserved() {
2654        // Bareword key=value parses as WordAssign — the kernel decides per
2655        // command whether to route into args.named (export/alias) or
2656        // stringify as a positional.
2657        let result = parse("cmd count=42").unwrap();
2658        match &result.statements[0] {
2659            Stmt::Command(cmd) => {
2660                assert_eq!(cmd.name, "cmd");
2661                match &cmd.args[0] {
2662                    Arg::WordAssign { key, value } => {
2663                        assert_eq!(key, "count");
2664                        match value {
2665                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2666                            other => panic!("expected int, got {:?}", other),
2667                        }
2668                    }
2669                    other => panic!("expected WordAssign arg, got {:?}", other),
2670                }
2671            }
2672            other => panic!("expected command, got {:?}", other),
2673        }
2674    }
2675
2676    #[test]
2677    fn value_function_def_name_preserved() {
2678        let result = parse("greet() { echo }").unwrap();
2679        match &result.statements[0] {
2680            Stmt::ToolDef(t) => {
2681                assert_eq!(t.name, "greet");
2682                assert!(t.params.is_empty());
2683            }
2684            other => panic!("expected function def, got {:?}", other),
2685        }
2686    }
2687
2688    // ═══════════════════════════════════════════════════════════════════════════
2689    // New Feature Tests - Comparisons, Interpolation, Nested Structures
2690    // ═══════════════════════════════════════════════════════════════════════════
2691
2692    #[test]
2693    fn parse_comparison_equals() {
2694        // Shell-compatible: use [[ ]] for comparisons
2695        let result = parse("if [[ ${X} == 5 ]]; then echo; fi").unwrap();
2696        match &result.statements[0] {
2697            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2698                Expr::Test(test) => match test.as_ref() {
2699                    TestExpr::Comparison { left, op, right } => {
2700                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2701                        assert_eq!(*op, TestCmpOp::Eq);
2702                        match right.as_ref() {
2703                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 5),
2704                            other => panic!("expected int, got {:?}", other),
2705                        }
2706                    }
2707                    other => panic!("expected comparison, got {:?}", other),
2708                },
2709                other => panic!("expected test expr, got {:?}", other),
2710            },
2711            other => panic!("expected if, got {:?}", other),
2712        }
2713    }
2714
2715    #[test]
2716    fn parse_comparison_not_equals() {
2717        let result = parse("if [[ ${X} != 0 ]]; then echo; fi").unwrap();
2718        match &result.statements[0] {
2719            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2720                Expr::Test(test) => match test.as_ref() {
2721                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotEq),
2722                    other => panic!("expected comparison, got {:?}", other),
2723                },
2724                other => panic!("expected test expr, got {:?}", other),
2725            },
2726            other => panic!("expected if, got {:?}", other),
2727        }
2728    }
2729
2730    #[test]
2731    fn parse_comparison_less_than() {
2732        let result = parse("if [[ ${COUNT} -lt 10 ]]; then echo; fi").unwrap();
2733        match &result.statements[0] {
2734            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2735                Expr::Test(test) => match test.as_ref() {
2736                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLt),
2737                    other => panic!("expected comparison, got {:?}", other),
2738                },
2739                other => panic!("expected test expr, got {:?}", other),
2740            },
2741            other => panic!("expected if, got {:?}", other),
2742        }
2743    }
2744
2745    #[test]
2746    fn parse_comparison_greater_than() {
2747        let result = parse("if [[ ${COUNT} -gt 0 ]]; then echo; fi").unwrap();
2748        match &result.statements[0] {
2749            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2750                Expr::Test(test) => match test.as_ref() {
2751                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGt),
2752                    other => panic!("expected comparison, got {:?}", other),
2753                },
2754                other => panic!("expected test expr, got {:?}", other),
2755            },
2756            other => panic!("expected if, got {:?}", other),
2757        }
2758    }
2759
2760    #[test]
2761    fn parse_comparison_less_equal() {
2762        let result = parse("if [[ ${X} -le 100 ]]; then echo; fi").unwrap();
2763        match &result.statements[0] {
2764            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2765                Expr::Test(test) => match test.as_ref() {
2766                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLtEq),
2767                    other => panic!("expected comparison, got {:?}", other),
2768                },
2769                other => panic!("expected test expr, got {:?}", other),
2770            },
2771            other => panic!("expected if, got {:?}", other),
2772        }
2773    }
2774
2775    #[test]
2776    fn parse_comparison_greater_equal() {
2777        let result = parse("if [[ ${X} -ge 1 ]]; then echo; fi").unwrap();
2778        match &result.statements[0] {
2779            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2780                Expr::Test(test) => match test.as_ref() {
2781                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGtEq),
2782                    other => panic!("expected comparison, got {:?}", other),
2783                },
2784                other => panic!("expected test expr, got {:?}", other),
2785            },
2786            other => panic!("expected if, got {:?}", other),
2787        }
2788    }
2789
2790    #[test]
2791    fn parse_regex_match() {
2792        let result = parse(r#"if [[ ${NAME} =~ "^test" ]]; then echo; fi"#).unwrap();
2793        match &result.statements[0] {
2794            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2795                Expr::Test(test) => match test.as_ref() {
2796                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Match),
2797                    other => panic!("expected comparison, got {:?}", other),
2798                },
2799                other => panic!("expected test expr, got {:?}", other),
2800            },
2801            other => panic!("expected if, got {:?}", other),
2802        }
2803    }
2804
2805    #[test]
2806    fn parse_regex_not_match() {
2807        let result = parse(r#"if [[ ${NAME} !~ "^test" ]]; then echo; fi"#).unwrap();
2808        match &result.statements[0] {
2809            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2810                Expr::Test(test) => match test.as_ref() {
2811                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotMatch),
2812                    other => panic!("expected comparison, got {:?}", other),
2813                },
2814                other => panic!("expected test expr, got {:?}", other),
2815            },
2816            other => panic!("expected if, got {:?}", other),
2817        }
2818    }
2819
2820    #[test]
2821    fn parse_string_interpolation() {
2822        let result = parse(r#"echo "Hello ${NAME}!""#).unwrap();
2823        match &result.statements[0] {
2824            Stmt::Command(cmd) => match &cmd.args[0] {
2825                Arg::Positional(Expr::Interpolated(parts)) => {
2826                    assert_eq!(parts.len(), 3);
2827                    match &parts[0] {
2828                        StringPart::Literal(s) => assert_eq!(s, "Hello "),
2829                        other => panic!("expected literal, got {:?}", other),
2830                    }
2831                    match &parts[1] {
2832                        StringPart::Var(path) => {
2833                            assert_eq!(path.segments.len(), 1);
2834                            let VarSegment::Field(name) = &path.segments[0];
2835                            assert_eq!(name, "NAME");
2836                        }
2837                        other => panic!("expected var, got {:?}", other),
2838                    }
2839                    match &parts[2] {
2840                        StringPart::Literal(s) => assert_eq!(s, "!"),
2841                        other => panic!("expected literal, got {:?}", other),
2842                    }
2843                }
2844                other => panic!("expected interpolated, got {:?}", other),
2845            },
2846            other => panic!("expected command, got {:?}", other),
2847        }
2848    }
2849
2850    #[test]
2851    fn parse_string_interpolation_multiple_vars() {
2852        let result = parse(r#"echo "${FIRST} and ${SECOND}""#).unwrap();
2853        match &result.statements[0] {
2854            Stmt::Command(cmd) => match &cmd.args[0] {
2855                Arg::Positional(Expr::Interpolated(parts)) => {
2856                    // ${FIRST} + " and " + ${SECOND} = 3 parts
2857                    assert_eq!(parts.len(), 3);
2858                    assert!(matches!(&parts[0], StringPart::Var(_)));
2859                    assert!(matches!(&parts[1], StringPart::Literal(_)));
2860                    assert!(matches!(&parts[2], StringPart::Var(_)));
2861                }
2862                other => panic!("expected interpolated, got {:?}", other),
2863            },
2864            other => panic!("expected command, got {:?}", other),
2865        }
2866    }
2867
2868    #[test]
2869    fn parse_empty_function_body() {
2870        let result = parse("empty() { }").unwrap();
2871        match &result.statements[0] {
2872            Stmt::ToolDef(t) => {
2873                assert_eq!(t.name, "empty");
2874                assert!(t.params.is_empty());
2875                assert!(t.body.is_empty());
2876            }
2877            other => panic!("expected function def, got {:?}", other),
2878        }
2879    }
2880
2881    #[test]
2882    fn parse_bash_style_function() {
2883        let result = parse("function greet { echo hello }").unwrap();
2884        match &result.statements[0] {
2885            Stmt::ToolDef(t) => {
2886                assert_eq!(t.name, "greet");
2887                assert!(t.params.is_empty());
2888                assert_eq!(t.body.len(), 1);
2889            }
2890            other => panic!("expected function def, got {:?}", other),
2891        }
2892    }
2893
2894    #[test]
2895    fn parse_comparison_string_values() {
2896        let result = parse(r#"if [[ ${STATUS} == "ok" ]]; then echo; fi"#).unwrap();
2897        match &result.statements[0] {
2898            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2899                Expr::Test(test) => match test.as_ref() {
2900                    TestExpr::Comparison { left, op, right } => {
2901                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2902                        assert_eq!(*op, TestCmpOp::Eq);
2903                        match right.as_ref() {
2904                            Expr::Literal(Value::String(s)) => assert_eq!(s, "ok"),
2905                            other => panic!("expected string, got {:?}", other),
2906                        }
2907                    }
2908                    other => panic!("expected comparison, got {:?}", other),
2909                },
2910                other => panic!("expected test expr, got {:?}", other),
2911            },
2912            other => panic!("expected if, got {:?}", other),
2913        }
2914    }
2915
2916    // ═══════════════════════════════════════════════════════════════════════════
2917    // Command Substitution Tests
2918    // ═══════════════════════════════════════════════════════════════════════════
2919
2920    #[test]
2921    fn parse_cmd_subst_simple() {
2922        let result = parse("X=$(echo)").unwrap();
2923        match &result.statements[0] {
2924            Stmt::Assignment(a) => {
2925                assert_eq!(a.name, "X");
2926                match &a.value {
2927                    Expr::CommandSubst(pipeline) => {
2928                        assert_eq!(pipeline.commands.len(), 1);
2929                        assert_eq!(pipeline.commands[0].name, "echo");
2930                    }
2931                    other => panic!("expected command subst, got {:?}", other),
2932                }
2933            }
2934            other => panic!("expected assignment, got {:?}", other),
2935        }
2936    }
2937
2938    #[test]
2939    fn parse_cmd_subst_with_args() {
2940        let result = parse(r#"X=$(fetch url="http://example.com")"#).unwrap();
2941        match &result.statements[0] {
2942            Stmt::Assignment(a) => match &a.value {
2943                Expr::CommandSubst(pipeline) => {
2944                    assert_eq!(pipeline.commands[0].name, "fetch");
2945                    assert_eq!(pipeline.commands[0].args.len(), 1);
2946                    match &pipeline.commands[0].args[0] {
2947                        Arg::WordAssign { key, .. } => assert_eq!(key, "url"),
2948                        other => panic!("expected WordAssign arg, got {:?}", other),
2949                    }
2950                }
2951                other => panic!("expected command subst, got {:?}", other),
2952            },
2953            other => panic!("expected assignment, got {:?}", other),
2954        }
2955    }
2956
2957    #[test]
2958    fn parse_cmd_subst_pipeline() {
2959        let result = parse("X=$(cat file | grep pattern)").unwrap();
2960        match &result.statements[0] {
2961            Stmt::Assignment(a) => match &a.value {
2962                Expr::CommandSubst(pipeline) => {
2963                    assert_eq!(pipeline.commands.len(), 2);
2964                    assert_eq!(pipeline.commands[0].name, "cat");
2965                    assert_eq!(pipeline.commands[1].name, "grep");
2966                }
2967                other => panic!("expected command subst, got {:?}", other),
2968            },
2969            other => panic!("expected assignment, got {:?}", other),
2970        }
2971    }
2972
2973    #[test]
2974    fn parse_cmd_subst_in_condition() {
2975        // Shell-compatible: conditions are commands, not command substitutions
2976        let result = parse("if kaish-validate; then echo; fi").unwrap();
2977        match &result.statements[0] {
2978            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2979                Expr::Command(cmd) => {
2980                    assert_eq!(cmd.name, "kaish-validate");
2981                }
2982                other => panic!("expected command, got {:?}", other),
2983            },
2984            other => panic!("expected if, got {:?}", other),
2985        }
2986    }
2987
2988    #[test]
2989    fn parse_cmd_subst_in_command_arg() {
2990        let result = parse("echo $(whoami)").unwrap();
2991        match &result.statements[0] {
2992            Stmt::Command(cmd) => {
2993                assert_eq!(cmd.name, "echo");
2994                match &cmd.args[0] {
2995                    Arg::Positional(Expr::CommandSubst(pipeline)) => {
2996                        assert_eq!(pipeline.commands[0].name, "whoami");
2997                    }
2998                    other => panic!("expected command subst, got {:?}", other),
2999                }
3000            }
3001            other => panic!("expected command, got {:?}", other),
3002        }
3003    }
3004
3005    // ═══════════════════════════════════════════════════════════════════════════
3006    // Logical Operator Tests (&&, ||)
3007    // ═══════════════════════════════════════════════════════════════════════════
3008
3009    #[test]
3010    fn parse_condition_and() {
3011        // Shell-compatible: commands chained with &&
3012        let result = parse("if check-a && check-b; then echo; fi").unwrap();
3013        match &result.statements[0] {
3014            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3015                Expr::BinaryOp { left, op, right } => {
3016                    assert_eq!(*op, BinaryOp::And);
3017                    assert!(matches!(left.as_ref(), Expr::Command(_)));
3018                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3019                }
3020                other => panic!("expected binary op, got {:?}", other),
3021            },
3022            other => panic!("expected if, got {:?}", other),
3023        }
3024    }
3025
3026    #[test]
3027    fn parse_condition_or() {
3028        let result = parse("if try-a || try-b; then echo; fi").unwrap();
3029        match &result.statements[0] {
3030            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3031                Expr::BinaryOp { left, op, right } => {
3032                    assert_eq!(*op, BinaryOp::Or);
3033                    assert!(matches!(left.as_ref(), Expr::Command(_)));
3034                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3035                }
3036                other => panic!("expected binary op, got {:?}", other),
3037            },
3038            other => panic!("expected if, got {:?}", other),
3039        }
3040    }
3041
3042    #[test]
3043    fn parse_condition_and_or_precedence() {
3044        // a && b || c should parse as (a && b) || c
3045        let result = parse("if cmd-a && cmd-b || cmd-c; then echo; fi").unwrap();
3046        match &result.statements[0] {
3047            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3048                Expr::BinaryOp { left, op, right } => {
3049                    // Top level should be ||
3050                    assert_eq!(*op, BinaryOp::Or);
3051                    // Left side should be && expression
3052                    match left.as_ref() {
3053                        Expr::BinaryOp { op: inner_op, .. } => {
3054                            assert_eq!(*inner_op, BinaryOp::And);
3055                        }
3056                        other => panic!("expected binary op (&&), got {:?}", other),
3057                    }
3058                    // Right side should be command
3059                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3060                }
3061                other => panic!("expected binary op, got {:?}", other),
3062            },
3063            other => panic!("expected if, got {:?}", other),
3064        }
3065    }
3066
3067    #[test]
3068    fn parse_condition_multiple_and() {
3069        let result = parse("if cmd-a && cmd-b && cmd-c; then echo; fi").unwrap();
3070        match &result.statements[0] {
3071            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3072                Expr::BinaryOp { left, op, .. } => {
3073                    assert_eq!(*op, BinaryOp::And);
3074                    // Left side should also be &&
3075                    match left.as_ref() {
3076                        Expr::BinaryOp { op: inner_op, .. } => {
3077                            assert_eq!(*inner_op, BinaryOp::And);
3078                        }
3079                        other => panic!("expected binary op, got {:?}", other),
3080                    }
3081                }
3082                other => panic!("expected binary op, got {:?}", other),
3083            },
3084            other => panic!("expected if, got {:?}", other),
3085        }
3086    }
3087
3088    #[test]
3089    fn parse_condition_mixed_comparison_and_logical() {
3090        // Shell-compatible: use [[ ]] for comparisons, && to chain them
3091        let result = parse("if [[ ${X} == 5 ]] && [[ ${Y} -gt 0 ]]; then echo; fi").unwrap();
3092        match &result.statements[0] {
3093            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3094                Expr::BinaryOp { left, op, right } => {
3095                    assert_eq!(*op, BinaryOp::And);
3096                    // Left: [[ ${X} == 5 ]]
3097                    match left.as_ref() {
3098                        Expr::Test(test) => match test.as_ref() {
3099                            TestExpr::Comparison { op: left_op, .. } => {
3100                                assert_eq!(*left_op, TestCmpOp::Eq);
3101                            }
3102                            other => panic!("expected comparison, got {:?}", other),
3103                        },
3104                        other => panic!("expected test, got {:?}", other),
3105                    }
3106                    // Right: [[ ${Y} -gt 0 ]]
3107                    match right.as_ref() {
3108                        Expr::Test(test) => match test.as_ref() {
3109                            TestExpr::Comparison { op: right_op, .. } => {
3110                                assert_eq!(*right_op, TestCmpOp::NumGt);
3111                            }
3112                            other => panic!("expected comparison, got {:?}", other),
3113                        },
3114                        other => panic!("expected test, got {:?}", other),
3115                    }
3116                }
3117                other => panic!("expected binary op, got {:?}", other),
3118            },
3119            other => panic!("expected if, got {:?}", other),
3120        }
3121    }
3122
3123    // ═══════════════════════════════════════════════════════════════════════════
3124    // Integration Tests - Complete Scripts
3125    // ═══════════════════════════════════════════════════════════════════════════
3126
3127    /// Level 1: Linear script using core features
3128    #[test]
3129    fn script_level1_linear() {
3130        let script = r#"
3131NAME="kaish"
3132VERSION=1
3133TIMEOUT=30
3134ITEMS="alpha beta gamma"
3135
3136echo "Starting ${NAME} v${VERSION}"
3137cat "README.md" | grep pattern="install" | head count=5
3138fetch url="https://api.example.com/status" timeout=${TIMEOUT} > "/tmp/status.json"
3139echo "Items: ${ITEMS}"
3140"#;
3141        let result = parse(script).unwrap();
3142        let stmts: Vec<_> = result.statements.iter()
3143            .filter(|s| !matches!(s, Stmt::Empty))
3144            .collect();
3145
3146        assert_eq!(stmts.len(), 8);
3147        assert!(matches!(stmts[0], Stmt::Assignment(_)));  // set NAME
3148        assert!(matches!(stmts[1], Stmt::Assignment(_)));  // set VERSION
3149        assert!(matches!(stmts[2], Stmt::Assignment(_)));  // set TIMEOUT
3150        assert!(matches!(stmts[3], Stmt::Assignment(_)));  // set ITEMS
3151        assert!(matches!(stmts[4], Stmt::Command(_)));     // echo "Starting..."
3152        assert!(matches!(stmts[5], Stmt::Pipeline(_)));    // cat | grep | head
3153        assert!(matches!(stmts[6], Stmt::Pipeline(_)));    // fetch (with redirect - Pipeline since it has redirects)
3154        assert!(matches!(stmts[7], Stmt::Command(_)));     // echo "Items: ${ITEMS}"
3155    }
3156
3157    /// Level 2: Script with conditionals (shell-compatible syntax)
3158    #[test]
3159    fn script_level2_branching() {
3160        let script = r#"
3161RESULT=$(kaish-validate "input.json")
3162
3163if [[ ${RESULT.ok} == true ]]; then
3164    echo "Validation passed"
3165    process "input.json" > "output.json"
3166else
3167    echo "Validation failed: ${RESULT.err}"
3168fi
3169
3170if [[ ${COUNT} -gt 0 ]] && [[ ${COUNT} -le 100 ]]; then
3171    echo "Count in valid range"
3172fi
3173
3174if check-network || check-cache; then
3175    fetch url=${URL}
3176fi
3177"#;
3178        let result = parse(script).unwrap();
3179        let stmts: Vec<_> = result.statements.iter()
3180            .filter(|s| !matches!(s, Stmt::Empty))
3181            .collect();
3182
3183        assert_eq!(stmts.len(), 4);
3184
3185        // First: assignment with command substitution
3186        match stmts[0] {
3187            Stmt::Assignment(a) => {
3188                assert_eq!(a.name, "RESULT");
3189                assert!(matches!(&a.value, Expr::CommandSubst(_)));
3190            }
3191            other => panic!("expected assignment, got {:?}", other),
3192        }
3193
3194        // Second: if/else
3195        match stmts[1] {
3196            Stmt::If(if_stmt) => {
3197                assert_eq!(if_stmt.then_branch.len(), 2);
3198                assert!(if_stmt.else_branch.is_some());
3199                assert_eq!(if_stmt.else_branch.as_ref().unwrap().len(), 1);
3200            }
3201            other => panic!("expected if, got {:?}", other),
3202        }
3203
3204        // Third: if with && condition
3205        match stmts[2] {
3206            Stmt::If(if_stmt) => {
3207                match if_stmt.condition.as_ref() {
3208                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3209                    other => panic!("expected && condition, got {:?}", other),
3210                }
3211            }
3212            other => panic!("expected if, got {:?}", other),
3213        }
3214
3215        // Fourth: if with || of commands
3216        match stmts[3] {
3217            Stmt::If(if_stmt) => {
3218                match if_stmt.condition.as_ref() {
3219                    Expr::BinaryOp { op, left, right } => {
3220                        assert_eq!(*op, BinaryOp::Or);
3221                        assert!(matches!(left.as_ref(), Expr::Command(_)));
3222                        assert!(matches!(right.as_ref(), Expr::Command(_)));
3223                    }
3224                    other => panic!("expected || condition, got {:?}", other),
3225                }
3226            }
3227            other => panic!("expected if, got {:?}", other),
3228        }
3229    }
3230
3231    /// Level 3: Script with loops and function definitions
3232    #[test]
3233    fn script_level3_loops_and_functions() {
3234        let script = r#"
3235greet() {
3236    echo "Hello, $1!"
3237}
3238
3239fetch_all() {
3240    for URL in $@; do
3241        fetch url=${URL}
3242    done
3243}
3244
3245USERS="alice bob charlie"
3246
3247for USER in ${USERS}; do
3248    greet ${USER}
3249    if [[ ${USER} == "bob" ]]; then
3250        echo "Found Bob!"
3251    fi
3252done
3253
3254long-running-task &
3255"#;
3256        let result = parse(script).unwrap();
3257        let stmts: Vec<_> = result.statements.iter()
3258            .filter(|s| !matches!(s, Stmt::Empty))
3259            .collect();
3260
3261        assert_eq!(stmts.len(), 5);
3262
3263        // First function def
3264        match stmts[0] {
3265            Stmt::ToolDef(t) => {
3266                assert_eq!(t.name, "greet");
3267                assert!(t.params.is_empty());
3268            }
3269            other => panic!("expected function def, got {:?}", other),
3270        }
3271
3272        // Second function def with nested for loop
3273        match stmts[1] {
3274            Stmt::ToolDef(t) => {
3275                assert_eq!(t.name, "fetch_all");
3276                assert_eq!(t.body.len(), 1);
3277                assert!(matches!(&t.body[0], Stmt::For(_)));
3278            }
3279            other => panic!("expected function def, got {:?}", other),
3280        }
3281
3282        // Assignment
3283        assert!(matches!(stmts[2], Stmt::Assignment(_)));
3284
3285        // For loop with nested if
3286        match stmts[3] {
3287            Stmt::For(f) => {
3288                assert_eq!(f.variable, "USER");
3289                assert_eq!(f.body.len(), 2);
3290                assert!(matches!(&f.body[0], Stmt::Command(_)));
3291                assert!(matches!(&f.body[1], Stmt::If(_)));
3292            }
3293            other => panic!("expected for loop, got {:?}", other),
3294        }
3295
3296        // Background job
3297        match stmts[4] {
3298            Stmt::Pipeline(p) => {
3299                assert!(p.background);
3300                assert_eq!(p.commands[0].name, "long-running-task");
3301            }
3302            other => panic!("expected pipeline (background), got {:?}", other),
3303        }
3304    }
3305
3306    /// Level 4: Complex nested control flow (shell-compatible syntax)
3307    #[test]
3308    fn script_level4_complex_nesting() {
3309        let script = r#"
3310RESULT=$(cat "config.json" | jq query=".servers" | kaish-validate schema="server-schema.json")
3311
3312if ping host=${HOST} && [[ ${RESULT} == true ]]; then
3313    for SERVER in "prod-1 prod-2"; do
3314        deploy target=${SERVER} port=8080
3315        if [[ $? -ne 0 ]]; then
3316            notify channel="ops" message="Deploy failed"
3317        fi
3318    done
3319fi
3320"#;
3321        let result = parse(script).unwrap();
3322        let stmts: Vec<_> = result.statements.iter()
3323            .filter(|s| !matches!(s, Stmt::Empty))
3324            .collect();
3325
3326        assert_eq!(stmts.len(), 2);
3327
3328        // Command substitution with pipeline
3329        match stmts[0] {
3330            Stmt::Assignment(a) => {
3331                assert_eq!(a.name, "RESULT");
3332                match &a.value {
3333                    Expr::CommandSubst(pipeline) => {
3334                        assert_eq!(pipeline.commands.len(), 3);
3335                    }
3336                    other => panic!("expected command subst, got {:?}", other),
3337                }
3338            }
3339            other => panic!("expected assignment, got {:?}", other),
3340        }
3341
3342        // If with && condition, containing for loop with nested if
3343        match stmts[1] {
3344            Stmt::If(if_stmt) => {
3345                match if_stmt.condition.as_ref() {
3346                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3347                    other => panic!("expected && condition, got {:?}", other),
3348                }
3349                assert_eq!(if_stmt.then_branch.len(), 1);
3350                match &if_stmt.then_branch[0] {
3351                    Stmt::For(f) => {
3352                        assert_eq!(f.body.len(), 2);
3353                        assert!(matches!(&f.body[1], Stmt::If(_)));
3354                    }
3355                    other => panic!("expected for in if body, got {:?}", other),
3356                }
3357            }
3358            other => panic!("expected if, got {:?}", other),
3359        }
3360    }
3361
3362    /// Level 5: Edge cases and parser stress test
3363    #[test]
3364    fn script_level5_edge_cases() {
3365        let script = r#"
3366echo ""
3367echo "quotes: \"nested\" here"
3368echo "escapes: \n\t\r\\"
3369echo "unicode: \u2764"
3370
3371X=-99999
3372Y=3.14159265358979
3373Z=-0.001
3374
3375cmd a=1 b="two" c=true d=false e=null
3376
3377if true; then
3378    if false; then
3379        echo "inner"
3380    else
3381        echo "else"
3382    fi
3383fi
3384
3385for I in "a b c"; do
3386    echo ${I}
3387done
3388
3389no_params() {
3390    echo "no params"
3391}
3392
3393function all_args {
3394    echo "args: $@"
3395}
3396
3397a | b | c | d | e &
3398cmd 2> "errors.log"
3399cmd &> "all.log"
3400cmd >> "append.log"
3401cmd < "input.txt"
3402"#;
3403        let result = parse(script).unwrap();
3404        let stmts: Vec<_> = result.statements.iter()
3405            .filter(|s| !matches!(s, Stmt::Empty))
3406            .collect();
3407
3408        // Verify it parses without error
3409        assert!(stmts.len() >= 10, "expected many statements, got {}", stmts.len());
3410
3411        // Background pipeline
3412        let bg_stmt = stmts.iter().find(|s| matches!(s, Stmt::Pipeline(p) if p.background));
3413        assert!(bg_stmt.is_some(), "expected background pipeline");
3414
3415        match bg_stmt.unwrap() {
3416            Stmt::Pipeline(p) => {
3417                assert_eq!(p.commands.len(), 5);
3418                assert!(p.background);
3419            }
3420            _ => unreachable!(),
3421        }
3422    }
3423
3424    // ═══════════════════════════════════════════════════════════════════════════
3425    // Edge Case Tests: Ambiguity Resolution
3426    // ═══════════════════════════════════════════════════════════════════════════
3427
3428    #[test]
3429    fn parse_keyword_as_variable_rejected() {
3430        // Keywords CANNOT be used as variable names - this is intentional
3431        // to avoid ambiguity. Use different names instead.
3432        let result = parse(r#"if="value""#);
3433        assert!(result.is_err(), "if= should fail - 'if' is a keyword");
3434
3435        let result = parse("while=true");
3436        assert!(result.is_err(), "while= should fail - 'while' is a keyword");
3437
3438        let result = parse(r#"then="next""#);
3439        assert!(result.is_err(), "then= should fail - 'then' is a keyword");
3440    }
3441
3442    #[test]
3443    fn parse_set_command_with_flag() {
3444        let result = parse("set -e");
3445        assert!(result.is_ok(), "failed to parse set -e: {:?}", result);
3446        let program = result.unwrap();
3447        match &program.statements[0] {
3448            Stmt::Command(cmd) => {
3449                assert_eq!(cmd.name, "set");
3450                assert_eq!(cmd.args.len(), 1);
3451                match &cmd.args[0] {
3452                    Arg::ShortFlag(f) => assert_eq!(f, "e"),
3453                    other => panic!("expected ShortFlag, got {:?}", other),
3454                }
3455            }
3456            other => panic!("expected Command, got {:?}", other),
3457        }
3458    }
3459
3460    #[test]
3461    fn parse_set_command_no_args() {
3462        let result = parse("set");
3463        assert!(result.is_ok(), "failed to parse set: {:?}", result);
3464        let program = result.unwrap();
3465        match &program.statements[0] {
3466            Stmt::Command(cmd) => {
3467                assert_eq!(cmd.name, "set");
3468                assert_eq!(cmd.args.len(), 0);
3469            }
3470            other => panic!("expected Command, got {:?}", other),
3471        }
3472    }
3473
3474    #[test]
3475    fn parse_set_assignment_vs_command() {
3476        // X=5 should be assignment
3477        let result = parse("X=5");
3478        assert!(result.is_ok());
3479        let program = result.unwrap();
3480        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
3481
3482        // set -e should be command
3483        let result = parse("set -e");
3484        assert!(result.is_ok());
3485        let program = result.unwrap();
3486        assert!(matches!(&program.statements[0], Stmt::Command(_)));
3487    }
3488
3489    #[test]
3490    fn parse_true_as_command() {
3491        let result = parse("true");
3492        assert!(result.is_ok());
3493        let program = result.unwrap();
3494        match &program.statements[0] {
3495            Stmt::Command(cmd) => assert_eq!(cmd.name, "true"),
3496            other => panic!("expected Command(true), got {:?}", other),
3497        }
3498    }
3499
3500    #[test]
3501    fn parse_false_as_command() {
3502        let result = parse("false");
3503        assert!(result.is_ok());
3504        let program = result.unwrap();
3505        match &program.statements[0] {
3506            Stmt::Command(cmd) => assert_eq!(cmd.name, "false"),
3507            other => panic!("expected Command(false), got {:?}", other),
3508        }
3509    }
3510
3511    #[test]
3512    fn parse_dot_as_source_alias() {
3513        let result = parse(". script.kai");
3514        assert!(result.is_ok(), "failed to parse . script.kai: {:?}", result);
3515        let program = result.unwrap();
3516        match &program.statements[0] {
3517            Stmt::Command(cmd) => {
3518                assert_eq!(cmd.name, ".");
3519                assert_eq!(cmd.args.len(), 1);
3520            }
3521            other => panic!("expected Command(.), got {:?}", other),
3522        }
3523    }
3524
3525    #[test]
3526    fn parse_source_command() {
3527        let result = parse("source utils.kai");
3528        assert!(result.is_ok(), "failed to parse source: {:?}", result);
3529        let program = result.unwrap();
3530        match &program.statements[0] {
3531            Stmt::Command(cmd) => {
3532                assert_eq!(cmd.name, "source");
3533                assert_eq!(cmd.args.len(), 1);
3534            }
3535            other => panic!("expected Command(source), got {:?}", other),
3536        }
3537    }
3538
3539    #[test]
3540    fn parse_test_expr_file_test() {
3541        // Paths must be quoted strings in test expressions
3542        let result = parse(r#"[[ -f "/path/file" ]]"#);
3543        assert!(result.is_ok(), "failed to parse file test: {:?}", result);
3544    }
3545
3546    #[test]
3547    fn parse_test_expr_comparison() {
3548        let result = parse(r#"[[ $X == "value" ]]"#);
3549        assert!(result.is_ok(), "failed to parse comparison test: {:?}", result);
3550    }
3551
3552    #[test]
3553    fn parse_test_expr_single_eq() {
3554        // = and == are equivalent inside [[ ]] (matching bash behavior)
3555        let result = parse(r#"[[ $X = "value" ]]"#);
3556        assert!(result.is_ok(), "failed to parse single-= comparison: {:?}", result);
3557        let program = result.unwrap();
3558        match &program.statements[0] {
3559            Stmt::Test(TestExpr::Comparison { op, .. }) => {
3560                assert_eq!(op, &TestCmpOp::Eq);
3561            }
3562            other => panic!("expected Test(Comparison), got {:?}", other),
3563        }
3564    }
3565
3566    #[test]
3567    fn parse_while_loop() {
3568        let result = parse("while true; do echo; done");
3569        assert!(result.is_ok(), "failed to parse while loop: {:?}", result);
3570        let program = result.unwrap();
3571        assert!(matches!(&program.statements[0], Stmt::While(_)));
3572    }
3573
3574    #[test]
3575    fn parse_break_with_level() {
3576        let result = parse("break 2");
3577        assert!(result.is_ok());
3578        let program = result.unwrap();
3579        match &program.statements[0] {
3580            Stmt::Break(Some(n)) => assert_eq!(*n, 2),
3581            other => panic!("expected Break(2), got {:?}", other),
3582        }
3583    }
3584
3585    #[test]
3586    fn parse_continue_with_level() {
3587        let result = parse("continue 3");
3588        assert!(result.is_ok());
3589        let program = result.unwrap();
3590        match &program.statements[0] {
3591            Stmt::Continue(Some(n)) => assert_eq!(*n, 3),
3592            other => panic!("expected Continue(3), got {:?}", other),
3593        }
3594    }
3595
3596    #[test]
3597    fn parse_exit_with_code() {
3598        let result = parse("exit 1");
3599        assert!(result.is_ok());
3600        let program = result.unwrap();
3601        match &program.statements[0] {
3602            Stmt::Exit(Some(expr)) => {
3603                match expr.as_ref() {
3604                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 1),
3605                    other => panic!("expected Int(1), got {:?}", other),
3606                }
3607            }
3608            other => panic!("expected Exit(1), got {:?}", other),
3609        }
3610    }
3611
3612    // ========================================================================
3613    // parse_interpolated_string_spanned — body-internal span tracking for
3614    // heredoc bodies. The byte offsets these tests pin become validator
3615    // issue spans via the HereDocBody → SpannedPart flow.
3616    // ========================================================================
3617
3618    #[test]
3619    fn spanned_literal_only_records_byte_range() {
3620        let parts = parse_interpolated_string_spanned("hello world", 100);
3621        assert_eq!(parts.len(), 1);
3622        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello world"));
3623        assert_eq!(parts[0].offset, 100, "base_offset must propagate to literals");
3624        assert_eq!(parts[0].len, 11);
3625    }
3626
3627    #[test]
3628    fn spanned_braced_var_at_zero() {
3629        let parts = parse_interpolated_string_spanned("${X}", 50);
3630        assert_eq!(parts.len(), 1);
3631        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3632        assert_eq!(parts[0].offset, 50);
3633        assert_eq!(parts[0].len, 4); // "${X}"
3634    }
3635
3636    #[test]
3637    fn spanned_simple_var_then_literal() {
3638        let parts = parse_interpolated_string_spanned("$X end", 10);
3639        assert_eq!(parts.len(), 2);
3640        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3641        assert_eq!(parts[0].offset, 10);
3642        assert_eq!(parts[0].len, 2); // "$X"
3643        assert!(matches!(&parts[1].part, StringPart::Literal(s) if s == " end"));
3644        assert_eq!(parts[1].offset, 12);
3645        assert_eq!(parts[1].len, 4);
3646    }
3647
3648    #[test]
3649    fn spanned_mixed_literal_var_literal() {
3650        let parts = parse_interpolated_string_spanned("hi ${X} bye", 0);
3651        assert_eq!(parts.len(), 3);
3652        // "hi "
3653        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hi "));
3654        assert_eq!(parts[0].offset, 0);
3655        assert_eq!(parts[0].len, 3);
3656        // ${X}
3657        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3658        assert_eq!(parts[1].offset, 3);
3659        assert_eq!(parts[1].len, 4);
3660        // " bye"
3661        assert!(matches!(&parts[2].part, StringPart::Literal(s) if s == " bye"));
3662        assert_eq!(parts[2].offset, 7);
3663        assert_eq!(parts[2].len, 4);
3664    }
3665
3666    #[test]
3667    fn spanned_positional_param() {
3668        let parts = parse_interpolated_string_spanned("$1 done", 0);
3669        assert_eq!(parts.len(), 2);
3670        assert!(matches!(&parts[0].part, StringPart::Positional(1)));
3671        assert_eq!(parts[0].offset, 0);
3672        assert_eq!(parts[0].len, 2); // "$1"
3673    }
3674
3675    #[test]
3676    fn spanned_special_dollar_dollar() {
3677        let parts = parse_interpolated_string_spanned("$$", 5);
3678        assert_eq!(parts.len(), 1);
3679        assert!(matches!(&parts[0].part, StringPart::CurrentPid));
3680        assert_eq!(parts[0].offset, 5);
3681        assert_eq!(parts[0].len, 2);
3682    }
3683
3684    #[test]
3685    fn spanned_arithmetic_marker_recognised() {
3686        // The lexer wraps arithmetic markers as ${__ARITH:expr__} for
3687        // interpolated heredocs; the spanned parser must produce
3688        // StringPart::Arithmetic for that shape.
3689        let parts = parse_interpolated_string_spanned("${__ARITH:1+2__}", 0);
3690        assert_eq!(parts.len(), 1);
3691        assert!(matches!(&parts[0].part, StringPart::Arithmetic(e) if e == "1+2"));
3692    }
3693
3694    #[test]
3695    fn spanned_default_separator_yields_var_with_default() {
3696        let parts = parse_interpolated_string_spanned("${X:-fallback}", 0);
3697        assert_eq!(parts.len(), 1);
3698        assert!(matches!(&parts[0].part, StringPart::VarWithDefault { .. }));
3699        assert_eq!(parts[0].offset, 0);
3700        assert_eq!(parts[0].len, 14); // "${X:-fallback}"
3701    }
3702
3703    #[test]
3704    fn spanned_no_dollar_runs_one_literal() {
3705        let parts = parse_interpolated_string_spanned("plain text only", 7);
3706        assert_eq!(parts.len(), 1);
3707        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "plain text only"));
3708        assert_eq!(parts[0].offset, 7);
3709        assert_eq!(parts[0].len, 15);
3710    }
3711
3712    #[test]
3713    fn spanned_matches_unspanned_part_count() {
3714        // Spanned and spanless variants must agree on the part decomposition.
3715        // Bug fixes in one should land in the other.
3716        let cases = [
3717            "hello",
3718            "$X",
3719            "${X}",
3720            "${X:-d}",
3721            "hi $A and $B",
3722            "$0 $1 $2",
3723            "$$ $? $#",
3724        ];
3725        for s in &cases {
3726            let unspanned = parse_interpolated_string(s);
3727            let spanned = parse_interpolated_string_spanned(s, 0);
3728            assert_eq!(
3729                unspanned.len(),
3730                spanned.len(),
3731                "part count differs for {:?}",
3732                s
3733            );
3734        }
3735    }
3736
3737    #[test]
3738    fn spanned_multibyte_utf8_before_var_uses_byte_offsets() {
3739        // 🚀 is 4 bytes in UTF-8 and a space is 1 byte, so the literal
3740        // prefix is 5 bytes total. `${X}` then sits at byte offset 5.
3741        // Right-by-luck for char-vs-byte indexing is precisely what this
3742        // test catches: if someone swaps .len_utf8() for 1, offset becomes 2.
3743        let parts = parse_interpolated_string_spanned("🚀 ${X}", 0);
3744        assert_eq!(parts.len(), 2);
3745
3746        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "🚀 "));
3747        assert_eq!(parts[0].offset, 0);
3748        assert_eq!(parts[0].len, 5, "literal len must be bytes, not chars");
3749
3750        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3751        assert_eq!(parts[1].offset, 5, "var offset must be bytes, not chars");
3752        assert_eq!(parts[1].len, 4);
3753    }
3754
3755    #[test]
3756    fn spanned_multibyte_utf8_pure_literal_is_byte_length() {
3757        // "hello 世界 world": 5 + 1 + 6 (3 per CJK char) + 1 + 5 = 18 bytes,
3758        // 13 chars. The `len` field must report 18, not 13.
3759        let parts = parse_interpolated_string_spanned("hello 世界 world", 0);
3760        assert_eq!(parts.len(), 1);
3761        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello 世界 world"));
3762        assert_eq!(parts[0].offset, 0);
3763        assert_eq!(parts[0].len, 18);
3764    }
3765
3766    #[test]
3767    fn spanned_escape_dollar_consumes_two_bytes_emits_one_char() {
3768        // `\$` is 2 source bytes and resolves to a single literal `$`.
3769        // The literal part's `len` should reflect the SOURCE length (2).
3770        let parts = parse_interpolated_string_spanned("\\$", 0);
3771        assert_eq!(parts.len(), 1);
3772        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "$"));
3773        assert_eq!(parts[0].offset, 0);
3774        assert_eq!(parts[0].len, 2, "len is source byte length, not rendered length");
3775    }
3776
3777    #[test]
3778    fn spanned_escape_backslash_collapses_pair_to_one() {
3779        let parts = parse_interpolated_string_spanned("\\\\", 0);
3780        assert_eq!(parts.len(), 1);
3781        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "\\"));
3782        assert_eq!(parts[0].len, 2);
3783    }
3784}
kaish_kernel/parser.rs

kaish_kernel/
parser.rs