kaish_kernel/
parser.rs

1//! Parser for kaish source code.
2//!
3//! Transforms a token stream from the lexer into an Abstract Syntax Tree.
4//! Uses chumsky for parser combinators with good error recovery.
5
6use crate::ast::{
7    Arg, Assignment, BinaryOp, CaseBranch, CaseStmt, Command, Expr, FileTestOp, ForLoop, IfStmt,
8    Pipeline, Program, Redirect, RedirectKind, SpannedPart, Stmt, StringPart, StringTestOp,
9    TestCmpOp, TestExpr, ToolDef, Value, VarPath, VarSegment, WhileLoop,
10};
11use crate::lexer::{self, HereDocData, Token};
12use chumsky::{input::ValueInput, prelude::*};
13
14/// Span type used throughout the parser.
15pub type Span = SimpleSpan;
16
17/// Parse a raw `${...}` string into an Expr.
18///
19/// Handles:
20/// - Special variables: `${?}` → LastExitCode, `${$}` → CurrentPid
21/// - Simple paths: `${VAR}`, `${VAR.field}`, `${VAR[0]}` → VarRef
22/// - Default values: `${VAR:-default}` → VarWithDefault (with nested expansion support)
23fn parse_var_expr(raw: &str) -> Expr {
24    // Special case: ${?} is the last exit code (same as $?)
25    if raw == "${?}" {
26        return Expr::LastExitCode;
27    }
28
29    // Special case: ${$} is the current PID (same as $$)
30    if raw == "${$}" {
31        return Expr::CurrentPid;
32    }
33
34    // Check for default value syntax: ${VAR:-default}
35    // Need to find :- that's not inside a nested ${...}
36    if let Some(colon_idx) = find_default_separator(raw) {
37        // Extract variable name (between ${ and :-)
38        let name = raw[2..colon_idx].to_string();
39        // Extract default value (between :- and }) and recursively parse it,
40        // after stripping shell quoting from the word (quotes are syntax).
41        let default_str = &raw[colon_idx + 2..raw.len() - 1];
42        let default = parse_interpolated_string(&unquote_default_word(default_str));
43        return Expr::VarWithDefault { name, default };
44    }
45
46    // Regular variable path
47    Expr::VarRef(parse_varpath(raw))
48}
49
50/// Remove shell quoting from a `${VAR:-WORD}` default word, bash-style, before
51/// the word is parsed for interpolation.
52///
53/// The quotes around a default word are syntax, not data: `${X:-"default"}`
54/// yields `default`, not `"default"`. Double quotes are stripped but `$`-style
55/// interpolation inside them stays active; single quotes are stripped and
56/// suppress interpolation (their `$` becomes a literal, via the lexer's
57/// `__KAISH_ESCAPED_DOLLAR__` marker that `parse_interpolated_string` turns
58/// back into a bare `$`). Unquoted text passes through unchanged.
59fn unquote_default_word(word: &str) -> String {
60    let mut out = String::with_capacity(word.len());
61    let mut in_single = false;
62    let mut in_double = false;
63    for ch in word.chars() {
64        match ch {
65            // A quote delimiter toggles its mode and is itself dropped; the
66            // other quote kind is literal data while inside one.
67            '\'' if !in_double => in_single = !in_single,
68            '"' if !in_single => in_double = !in_double,
69            // `$` inside single quotes must not interpolate downstream.
70            '$' if in_single => out.push_str("__KAISH_ESCAPED_DOLLAR__"),
71            _ => out.push(ch),
72        }
73    }
74    out
75}
76
77/// Find the position of :- in a ${VAR:-default} expression, accounting for nested ${...}.
78fn find_default_separator(raw: &str) -> Option<usize> {
79    let bytes = raw.as_bytes();
80    let mut depth = 0;
81    let mut i = 0;
82
83    while i < bytes.len() {
84        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
85            depth += 1;
86            i += 2;
87            continue;
88        }
89        if bytes[i] == b'}' && depth > 0 {
90            depth -= 1;
91            i += 1;
92            continue;
93        }
94        // Only find :- at the top level (depth == 1 means we're inside the outer ${...})
95        if depth == 1 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
96            return Some(i);
97        }
98        i += 1;
99    }
100    None
101}
102
103/// Find the position of :- in variable content (without outer braces), accounting for nested ${...}.
104fn find_default_separator_in_content(content: &str) -> Option<usize> {
105    let bytes = content.as_bytes();
106    let mut depth = 0;
107    let mut i = 0;
108
109    while i < bytes.len() {
110        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
111            depth += 1;
112            i += 2;
113            continue;
114        }
115        if bytes[i] == b'}' && depth > 0 {
116            depth -= 1;
117            i += 1;
118            continue;
119        }
120        // Find :- at the top level (depth == 0)
121        if depth == 0 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
122            return Some(i);
123        }
124        i += 1;
125    }
126    None
127}
128
129/// Parse a raw `${...}` string into a VarPath.
130///
131/// Handles paths like `${VAR}` and `${VAR.field}`. Array indexing is not supported.
132fn parse_varpath(raw: &str) -> VarPath {
133    let segments_strs = lexer::parse_var_ref(raw).unwrap_or_default();
134    let segments = segments_strs
135        .into_iter()
136        .filter(|s| !s.starts_with('['))  // Skip index segments
137        .map(VarSegment::Field)
138        .collect();
139    VarPath { segments }
140}
141
142/// Drop `Stmt::Empty` (bare newlines/semicolons) from a parsed `$()` body so an
143/// empty or whitespace-only substitution collapses to nothing runnable.
144fn strip_empty_stmts(statements: Vec<Stmt>) -> Vec<Stmt> {
145    statements
146        .into_iter()
147        .filter(|s| !matches!(s, Stmt::Empty))
148        .collect()
149}
150
151/// Parse an unquoted heredoc body's interpolation while tracking each part's
152/// byte offset in the source.
153///
154/// `base_offset` is added to every part's offset so callers can attribute
155/// positions to a larger source (e.g., heredoc body inside the original
156/// script). Returns parts in source order with offset+len populated.
157///
158/// **Heredoc-specific behaviour**: per POSIX, unquoted heredoc bodies process
159/// three backslash escapes — `\$` (suppress expansion), `\\` (literal
160/// backslash), and `\<newline>` (line continuation). All other backslashes
161/// are kept verbatim. This differs from [`parse_interpolated_string`], which
162/// is called on double-quoted string content where the lexer has already
163/// processed escapes via `__KAISH_ESCAPED_DOLLAR__`.
164///
165/// This sibling of [`parse_interpolated_string`] duplicates parsing logic
166/// for now; unifying them behind a position-tracking core is a follow-up
167/// cleanup. Behaviour MUST stay aligned for the non-escape paths — bug fixes
168/// for the shared interpolation logic here should land there as well.
169fn parse_interpolated_string_spanned(s: &str, base_offset: usize) -> Vec<SpannedPart> {
170    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
171
172    let chars_vec: Vec<char> = s.chars().collect();
173    let mut i = 0;
174    let mut pos: usize = 0;
175
176    let mut parts: Vec<SpannedPart> = Vec::new();
177    let mut current_text = String::new();
178    let mut current_text_start: usize = pos;
179
180    let push_literal =
181        |current_text: &mut String, start: &mut usize, end: usize, parts: &mut Vec<SpannedPart>| {
182            if !current_text.is_empty() {
183                parts.push(SpannedPart {
184                    part: StringPart::Literal(std::mem::take(current_text)),
185                    offset: base_offset + *start,
186                    len: end - *start,
187                });
188                *start = end;
189            }
190        };
191
192    while i < chars_vec.len() {
193        let ch = chars_vec[i];
194
195        if ch == '\x00' {
196            // Escaped-dollar marker: \x00 DOLLAR \x00 → literal '$'
197            let start = pos;
198            i += 1;
199            pos += 1;
200            let mut marker = String::new();
201            while let Some(&c) = chars_vec.get(i) {
202                if c == '\x00' {
203                    i += 1;
204                    pos += 1;
205                    break;
206                }
207                marker.push(c);
208                i += 1;
209                pos += c.len_utf8();
210            }
211            if marker == "DOLLAR" {
212                if current_text.is_empty() {
213                    current_text_start = start;
214                }
215                current_text.push('$');
216            }
217        } else if ch == '\\' {
218            // POSIX heredoc-body escape processing for unquoted heredocs.
219            // Only `\$`, `\\`, and `\<newline>` are escapes; everything else
220            // keeps the backslash verbatim. Each case advances `pos` by the
221            // bytes consumed from the source so subsequent part offsets stay
222            // anchored to original-source coordinates.
223            let next = chars_vec.get(i + 1).copied();
224            match next {
225                Some('$') => {
226                    if current_text.is_empty() {
227                        current_text_start = pos;
228                    }
229                    current_text.push('$');
230                    i += 2;
231                    pos += 2;
232                }
233                Some('\\') => {
234                    if current_text.is_empty() {
235                        current_text_start = pos;
236                    }
237                    current_text.push('\\');
238                    i += 2;
239                    pos += 2;
240                }
241                Some('\n') => {
242                    // Line continuation: consume both bytes, emit nothing.
243                    // The literal run resumes on the next line.
244                    i += 2;
245                    pos += 2;
246                    if current_text.is_empty() {
247                        current_text_start = pos;
248                    }
249                }
250                Some('\r') => {
251                    // \<CR> or \<CR><LF>: line continuation
252                    i += 2;
253                    pos += 2;
254                    if chars_vec.get(i) == Some(&'\n') {
255                        i += 1;
256                        pos += 1;
257                    }
258                    if current_text.is_empty() {
259                        current_text_start = pos;
260                    }
261                }
262                _ => {
263                    // Other backslash sequences: keep `\` literally,
264                    // consume only the backslash. The next iteration will
265                    // process the following char on its own merits.
266                    if current_text.is_empty() {
267                        current_text_start = pos;
268                    }
269                    current_text.push('\\');
270                    i += 1;
271                    pos += 1;
272                }
273            }
274        } else if ch == '$' {
275            // Possible expansion. Save current run before peeking ahead.
276            let part_start = pos;
277            let next = chars_vec.get(i + 1).copied();
278
279            if next == Some('(') && chars_vec.get(i + 2) != Some(&'(') {
280                // $(...) command substitution
281                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
282                i += 2; // consume "$("
283                pos += 2;
284                let mut cmd_content = String::new();
285                let mut depth = 1;
286                while let Some(&c) = chars_vec.get(i) {
287                    i += 1;
288                    pos += c.len_utf8();
289                    if c == '(' {
290                        depth += 1;
291                        cmd_content.push(c);
292                    } else if c == ')' {
293                        depth -= 1;
294                        if depth == 0 {
295                            break;
296                        }
297                        cmd_content.push(c);
298                    } else {
299                        cmd_content.push(c);
300                    }
301                }
302                let inserted = if let Ok(program) = parse(&cmd_content) {
303                    // The full statement block runs as the substitution body
304                    // (pipelines, `&&`/`||`, `;`/newline sequences, comments).
305                    let stmts = strip_empty_stmts(program.statements);
306                    if stmts.is_empty() {
307                        false
308                    } else {
309                        parts.push(SpannedPart {
310                            part: StringPart::CommandSubst(stmts),
311                            offset: base_offset + part_start,
312                            len: pos - part_start,
313                        });
314                        true
315                    }
316                } else {
317                    false
318                };
319                if inserted {
320                    // Successfully pushed a CommandSubst; the next literal
321                    // run will start after the closing ')'.
322                    current_text_start = pos;
323                } else {
324                    // Fall back to literal text. The literal run starts at
325                    // the leading '$' (set above only if current_text was
326                    // empty); leave current_text_start alone otherwise so we
327                    // don't lose an in-progress run.
328                    if current_text.is_empty() {
329                        current_text_start = part_start;
330                    }
331                    current_text.push_str("$(");
332                    current_text.push_str(&cmd_content);
333                    current_text.push(')');
334                }
335            } else if next == Some('{') {
336                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
337                i += 2; // consume "${"
338                pos += 2;
339                let mut var_content = String::new();
340                let mut depth = 1;
341                while let Some(&c) = chars_vec.get(i) {
342                    i += 1;
343                    pos += c.len_utf8();
344                    if c == '{' && var_content.ends_with('$') {
345                        depth += 1;
346                        var_content.push(c);
347                    } else if c == '}' {
348                        depth -= 1;
349                        if depth == 0 {
350                            break;
351                        }
352                        var_content.push(c);
353                    } else {
354                        var_content.push(c);
355                    }
356                }
357                let part = if let Some(name) = var_content.strip_prefix('#') {
358                    StringPart::VarLength(name.to_string())
359                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
360                    let expr = var_content
361                        .strip_prefix("__ARITH:")
362                        .and_then(|s| s.strip_suffix("__"))
363                        .unwrap_or("");
364                    StringPart::Arithmetic(expr.to_string())
365                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
366                    let name = var_content[..colon_idx].to_string();
367                    let default_str = &var_content[colon_idx + 2..];
368                    // Default value spans recursively kept relative to the
369                    // outer body — the inner parts get their own offsets via
370                    // the recursive call when needed. For now, the default's
371                    // parts are stored without spans (default is a Vec<StringPart>).
372                    let default = parse_interpolated_string(&unquote_default_word(default_str));
373                    StringPart::VarWithDefault { name, default }
374                } else {
375                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
376                };
377                parts.push(SpannedPart {
378                    part,
379                    offset: base_offset + part_start,
380                    len: pos - part_start,
381                });
382                current_text_start = pos;
383            } else if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
384                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
385                i += 1; // consume '$'
386                pos += 1;
387                if let Some(&digit) = chars_vec.get(i) {
388                    let n = digit.to_digit(10).unwrap_or(0) as usize;
389                    i += 1;
390                    pos += digit.len_utf8();
391                    parts.push(SpannedPart {
392                        part: StringPart::Positional(n),
393                        offset: base_offset + part_start,
394                        len: pos - part_start,
395                    });
396                }
397                current_text_start = pos;
398            } else if next == Some('@') {
399                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
400                i += 2; // consume "$@"
401                pos += 2;
402                parts.push(SpannedPart {
403                    part: StringPart::AllArgs,
404                    offset: base_offset + part_start,
405                    len: pos - part_start,
406                });
407                current_text_start = pos;
408            } else if next == Some('#') {
409                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
410                i += 2; // consume "$#"
411                pos += 2;
412                parts.push(SpannedPart {
413                    part: StringPart::ArgCount,
414                    offset: base_offset + part_start,
415                    len: pos - part_start,
416                });
417                current_text_start = pos;
418            } else if next == Some('?') {
419                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
420                i += 2; // consume "$?"
421                pos += 2;
422                parts.push(SpannedPart {
423                    part: StringPart::LastExitCode,
424                    offset: base_offset + part_start,
425                    len: pos - part_start,
426                });
427                current_text_start = pos;
428            } else if next == Some('$') {
429                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
430                i += 2; // consume "$$"
431                pos += 2;
432                parts.push(SpannedPart {
433                    part: StringPart::CurrentPid,
434                    offset: base_offset + part_start,
435                    len: pos - part_start,
436                });
437                current_text_start = pos;
438            } else if next.map(|c| c.is_ascii_alphabetic() || c == '_').unwrap_or(false) {
439                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
440                i += 1; // consume '$'
441                pos += 1;
442                let mut var_name = String::new();
443                while let Some(&c) = chars_vec.get(i) {
444                    if c.is_ascii_alphanumeric() || c == '_' {
445                        var_name.push(c);
446                        i += 1;
447                        pos += c.len_utf8();
448                    } else {
449                        break;
450                    }
451                }
452                parts.push(SpannedPart {
453                    part: StringPart::Var(VarPath::simple(var_name)),
454                    offset: base_offset + part_start,
455                    len: pos - part_start,
456                });
457                current_text_start = pos;
458            } else {
459                // Bare $ — treat as literal
460                if current_text.is_empty() {
461                    current_text_start = pos;
462                }
463                current_text.push(ch);
464                i += 1;
465                pos += 1;
466            }
467        } else {
468            if current_text.is_empty() {
469                current_text_start = pos;
470            }
471            current_text.push(ch);
472            i += 1;
473            pos += ch.len_utf8();
474        }
475    }
476
477    push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
478
479    parts
480}
481
482fn parse_interpolated_string(s: &str) -> Vec<StringPart> {
483    // First, replace escaped dollar markers with a temporary placeholder
484    // The lexer uses __KAISH_ESCAPED_DOLLAR__ for \$ to prevent re-interpretation
485    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
486
487    let mut parts = Vec::new();
488    let mut current_text = String::new();
489    let mut chars = s.chars().peekable();
490
491    while let Some(ch) = chars.next() {
492        if ch == '\x00' {
493            // This is our escaped dollar marker - skip "DOLLAR" and the closing \x00
494            let mut marker = String::new();
495            while let Some(&c) = chars.peek() {
496                if c == '\x00' {
497                    chars.next(); // consume closing marker
498                    break;
499                }
500                if let Some(c) = chars.next() {
501                    marker.push(c);
502                }
503            }
504            if marker == "DOLLAR" {
505                current_text.push('$');
506            }
507        } else if ch == '$' {
508            // Check for command substitution $(...)
509            if chars.peek() == Some(&'(') {
510                // Command substitution $(...)
511                if !current_text.is_empty() {
512                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
513                }
514
515                // Consume the '('
516                chars.next();
517
518                // Collect until matching ')' accounting for nested parens
519                let mut cmd_content = String::new();
520                let mut paren_depth = 1;
521                for c in chars.by_ref() {
522                    if c == '(' {
523                        paren_depth += 1;
524                        cmd_content.push(c);
525                    } else if c == ')' {
526                        paren_depth -= 1;
527                        if paren_depth == 0 {
528                            break;
529                        }
530                        cmd_content.push(c);
531                    } else {
532                        cmd_content.push(c);
533                    }
534                }
535
536                // Parse the command content as a full statement block
537                // (pipelines, `&&`/`||` chains, `;`/newline sequences, comments).
538                if let Ok(program) = parse(&cmd_content) {
539                    let stmts = strip_empty_stmts(program.statements);
540                    if stmts.is_empty() {
541                        // Nothing runnable — treat as literal text.
542                        current_text.push_str("$(");
543                        current_text.push_str(&cmd_content);
544                        current_text.push(')');
545                    } else {
546                        parts.push(StringPart::CommandSubst(stmts));
547                    }
548                } else {
549                    // Parse failed - treat as literal
550                    current_text.push_str("$(");
551                    current_text.push_str(&cmd_content);
552                    current_text.push(')');
553                }
554            } else if chars.peek() == Some(&'{') {
555                // Braced variable reference ${...}
556                if !current_text.is_empty() {
557                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
558                }
559
560                // Consume the '{'
561                chars.next();
562
563                // Collect until matching '}', tracking nesting depth
564                let mut var_content = String::new();
565                let mut depth = 1;
566                for c in chars.by_ref() {
567                    if c == '{' && var_content.ends_with('$') {
568                        depth += 1;
569                        var_content.push(c);
570                    } else if c == '}' {
571                        depth -= 1;
572                        if depth == 0 {
573                            break;
574                        }
575                        var_content.push(c);
576                    } else {
577                        var_content.push(c);
578                    }
579                }
580
581                // Parse the content for special syntax
582                let part = if let Some(name) = var_content.strip_prefix('#') {
583                    // Variable length: ${#VAR}
584                    StringPart::VarLength(name.to_string())
585                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
586                    // Arithmetic expression: ${__ARITH:expr__}
587                    let expr = var_content
588                        .strip_prefix("__ARITH:")
589                        .and_then(|s| s.strip_suffix("__"))
590                        .unwrap_or("");
591                    StringPart::Arithmetic(expr.to_string())
592                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
593                    // Variable with default: ${VAR:-default} - recursively parse the default
594                    let name = var_content[..colon_idx].to_string();
595                    let default_str = &var_content[colon_idx + 2..];
596                    let default = parse_interpolated_string(&unquote_default_word(default_str));
597                    StringPart::VarWithDefault { name, default }
598                } else {
599                    // Regular variable: ${VAR} or ${VAR.field}
600                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
601                };
602                parts.push(part);
603            } else if chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
604                // Positional parameter $0-$9
605                if !current_text.is_empty() {
606                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
607                }
608                if let Some(digit) = chars.next() {
609                    let n = digit.to_digit(10).unwrap_or(0) as usize;
610                    parts.push(StringPart::Positional(n));
611                }
612            } else if chars.peek() == Some(&'@') {
613                // All arguments $@
614                if !current_text.is_empty() {
615                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
616                }
617                chars.next(); // consume '@'
618                parts.push(StringPart::AllArgs);
619            } else if chars.peek() == Some(&'#') {
620                // Argument count $#
621                if !current_text.is_empty() {
622                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
623                }
624                chars.next(); // consume '#'
625                parts.push(StringPart::ArgCount);
626            } else if chars.peek() == Some(&'?') {
627                // Last exit code $?
628                if !current_text.is_empty() {
629                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
630                }
631                chars.next(); // consume '?'
632                parts.push(StringPart::LastExitCode);
633            } else if chars.peek() == Some(&'$') {
634                // Current PID $$
635                if !current_text.is_empty() {
636                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
637                }
638                chars.next(); // consume second '$'
639                parts.push(StringPart::CurrentPid);
640            } else if chars.peek().map(|c| c.is_ascii_alphabetic() || *c == '_').unwrap_or(false) {
641                // Simple variable reference $NAME
642                if !current_text.is_empty() {
643                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
644                }
645
646                // Collect identifier characters
647                let mut var_name = String::new();
648                while let Some(&c) = chars.peek() {
649                    if c.is_ascii_alphanumeric() || c == '_' {
650                        if let Some(c) = chars.next() {
651                            var_name.push(c);
652                        }
653                    } else {
654                        break;
655                    }
656                }
657
658                parts.push(StringPart::Var(VarPath::simple(var_name)));
659            } else {
660                // Literal $ (not followed by { or identifier start)
661                current_text.push(ch);
662            }
663        } else {
664            current_text.push(ch);
665        }
666    }
667
668    if !current_text.is_empty() {
669        parts.push(StringPart::Literal(current_text));
670    }
671
672    parts
673}
674
675/// Parse error with location and context.
676#[derive(Debug, Clone)]
677pub struct ParseError {
678    pub span: Span,
679    pub message: String,
680}
681
682impl ParseError {
683    /// Format the error against the original source, emitting a 1-indexed
684    /// `line:col [parse]: <message>` prefix and a snippet of the offending
685    /// line. Mirrors `ValidationIssue::format` so error reporting feels
686    /// consistent across pipeline phases.
687    pub fn format(&self, source: &str) -> String {
688        let start = self.span.start;
689        let mut line = 1usize;
690        let mut col = 1usize;
691        for (i, ch) in source.char_indices() {
692            if i >= start {
693                break;
694            }
695            if ch == '\n' {
696                line += 1;
697                col = 1;
698            } else {
699                col += 1;
700            }
701        }
702        let line_content = {
703            let line_start = source[..start.min(source.len())]
704                .rfind('\n')
705                .map_or(0, |i| i + 1);
706            let line_end = source[start.min(source.len())..]
707                .find('\n')
708                .map_or(source.len(), |i| start + i);
709            source.get(line_start..line_end).unwrap_or("")
710        };
711        if line_content.is_empty() {
712            format!("{}:{} [parse]: {}", line, col, self.message)
713        } else {
714            format!(
715                "{}:{} [parse]: {}\n  | {}",
716                line, col, self.message, line_content
717            )
718        }
719    }
720}
721
722impl std::fmt::Display for ParseError {
723    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
724        write!(f, "{} at {:?}", self.message, self.span)
725    }
726}
727
728impl std::error::Error for ParseError {}
729
730/// Parse kaish source code into a Program AST.
731pub fn parse(source: &str) -> Result<Program, Vec<ParseError>> {
732    // Tokenize with logos
733    let tokens = lexer::tokenize(source).map_err(|errs| {
734        errs.into_iter()
735            .map(|e| ParseError {
736                span: (e.span.start..e.span.end).into(),
737                message: format!("lexer error: {}", e.token),
738            })
739            .collect::<Vec<_>>()
740    })?;
741
742    // Convert tokens to (Token, SimpleSpan) pairs
743    let tokens: Vec<(Token, Span)> = tokens
744        .into_iter()
745        .map(|spanned| (spanned.token, (spanned.span.start..spanned.span.end).into()))
746        .collect();
747
748    // End-of-input span
749    let end_span: Span = (source.len()..source.len()).into();
750
751    // Parse using slice-based input (like nano_rust example)
752    let parser = program_parser();
753    let result = parser.parse(tokens.as_slice().map(end_span, |(t, s)| (t, s)));
754
755    let program = result.into_result().map_err(|errs| {
756        errs.into_iter()
757            .map(|e| ParseError {
758                span: *e.span(),
759                message: e.to_string(),
760            })
761            .collect::<Vec<_>>()
762    })?;
763
764    // Structural well-formedness checks that chumsky's grammar can't surface a
765    // clean message for. A command with two stdin sources (`<`/`<<`/`<<<`)
766    // would silently depend on redirect ordering at execution time, so reject
767    // it here — at parse time, which (unlike validation) can never be skipped.
768    if first_ambiguous_stdin(&program.statements) {
769        return Err(vec![ParseError {
770            // Redirects carry no AST span, so anchor at the start of the
771            // source; the message is the actionable part. Precise columns
772            // would require spanning `Redirect` (deferred — see docs/issues.md).
773            span: (0..0).into(),
774            message: "multiple stdin redirects on one command are ambiguous; \
775                      use exactly one of `<`, `<<`, or `<<<`"
776                .to_string(),
777        }]);
778    }
779
780    Ok(program)
781}
782
783/// Parse a single statement (useful for REPL).
784pub fn parse_statement(source: &str) -> Result<Stmt, Vec<ParseError>> {
785    let program = parse(source)?;
786    program
787        .statements
788        .into_iter()
789        .find(|s| !matches!(s, Stmt::Empty))
790        .ok_or_else(|| {
791            vec![ParseError {
792                span: (0..source.len()).into(),
793                message: "empty input".to_string(),
794            }]
795        })
796}
797
798// ═══════════════════════════════════════════════════════════════════════════
799// Parser Combinators - generic over input type
800// ═══════════════════════════════════════════════════════════════════════════
801
802/// Top-level program parser.
803fn program_parser<'tokens, 'src: 'tokens, I>(
804) -> impl Parser<'tokens, I, Program, extra::Err<Rich<'tokens, Token, Span>>>
805where
806    I: ValueInput<'tokens, Token = Token, Span = Span>,
807{
808    statement_parser()
809        .repeated()
810        .collect::<Vec<_>>()
811        .map(|statements| Program { statements })
812}
813
814/// Statement parser - dispatches based on leading token.
815/// Supports statement-level chaining with && and ||.
816fn statement_parser<'tokens, I>(
817) -> impl Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
818where
819    I: ValueInput<'tokens, Token = Token, Span = Span>,
820{
821    recursive(|stmt| {
822        let terminator = choice((just(Token::Newline), just(Token::Semi))).repeated();
823
824        // break [N] - break out of N levels of loops (default 1)
825        let break_stmt = just(Token::Break)
826            .ignore_then(
827                select! { Token::Int(n) => n as usize }.or_not()
828            )
829            .map(Stmt::Break);
830
831        // continue [N] - continue to next iteration, skipping N levels (default 1)
832        let continue_stmt = just(Token::Continue)
833            .ignore_then(
834                select! { Token::Int(n) => n as usize }.or_not()
835            )
836            .map(Stmt::Continue);
837
838        // return [expr] - return from a tool
839        let return_stmt = just(Token::Return)
840            .ignore_then(primary_expr_parser().or_not())
841            .map(|e| Stmt::Return(e.map(Box::new)));
842
843        // exit [code] - exit the script
844        let exit_stmt = just(Token::Exit)
845            .ignore_then(primary_expr_parser().or_not())
846            .map(|e| Stmt::Exit(e.map(Box::new)));
847
848        // set command: `set -e`, `set +e`, `set` (no args), `set -o pipefail`
849        // This must come BEFORE assignment_parser to handle `set -e` vs `X=value`
850        //
851        // Strategy: Use lookahead to check what follows `set`:
852        // - If followed by a flag (-e, --long, +e): parse as set command
853        // - If followed by identifier NOT followed by =: parse as set command (e.g., `set pipefail`)
854        // - If followed by nothing (end/newline/semi): parse as set command
855        // - If followed by identifier then =: let assignment_parser handle it
856        let set_flag_arg = choice((
857            select! { Token::ShortFlag(f) => Arg::ShortFlag(f) },
858            select! { Token::LongFlag(f) => Arg::LongFlag(f) },
859            // PlusFlag for +e, +x etc. - convert to positional arg with + prefix
860            select! { Token::PlusFlag(f) => Arg::Positional(Expr::Literal(Value::String(format!("+{}", f)))) },
861        ));
862
863        // Option value after `-o`/`+o`: a size literal (`8K`, `1M`) or raw
864        // byte count. Stringified so `set.rs` can `parse_size` the
865        // `output-limit=<value>` it reconstructs.
866        let option_value_str = select! {
867            Token::NumberIdent(s) => s,
868            Token::Int(n) => n.to_string(),
869            Token::Ident(s) => s,
870        };
871
872        // `-o output-limit=8K`: `name`, `=`, `value` are three tokens; fold
873        // them back into a single `name=value` positional (the form `set.rs`
874        // and bash both expect). Without this the `=` is a parse error.
875        let set_option_assign = ident_parser()
876            .then_ignore(just(Token::Eq))
877            .then(option_value_str)
878            .map(|(name, value)| {
879                Arg::Positional(Expr::Literal(Value::String(format!("{name}={value}"))))
880            });
881
882        // Quoted option such as `set -o "output-limit=8K"`: the whole thing is
883        // one string token. Accept it as a positional so the quoted form works
884        // too (agents reach for it after the unquoted form trips a shell lint).
885        let set_quoted_arg = select! {
886            Token::String(s) => Arg::Positional(Expr::Literal(Value::String(s))),
887            Token::SingleString(s) => Arg::Positional(Expr::Literal(Value::String(s))),
888        };
889
890        // set with flags: `set -e`, `set -e -u -o pipefail`
891        let set_with_flags = just(Token::Set)
892            .then(set_flag_arg)
893            .then(
894                choice((
895                    set_flag_arg,
896                    // `-o name=value` (try before the bare-ident arm).
897                    set_option_assign,
898                    set_quoted_arg,
899                    // Identifiers like 'pipefail' after -o
900                    ident_parser().map(|name| Arg::Positional(Expr::Literal(Value::String(name)))),
901                ))
902                .repeated()
903                .collect::<Vec<_>>(),
904            )
905            .map(|((_, first_arg), mut rest_args)| {
906                let mut args = vec![first_arg];
907                args.append(&mut rest_args);
908                Stmt::Command(Command {
909                    name: "set".to_string(),
910                    args,
911                    redirects: vec![],
912                })
913            });
914
915        // set with no args: `set` alone (shows settings)
916        // Must be followed by newline, semicolon, end of input, or a chaining operator (&&, ||)
917        let set_no_args = just(Token::Set)
918            .then(
919                choice((
920                    just(Token::Newline).to(()),
921                    just(Token::Semi).to(()),
922                    just(Token::And).to(()),
923                    just(Token::Or).to(()),
924                    end(),
925                ))
926                .rewind(),
927            )
928            .map(|_| Stmt::Command(Command {
929                name: "set".to_string(),
930                args: vec![],
931                redirects: vec![],
932            }));
933
934        // Try set_with_flags first (requires at least one flag)
935        // Then try set_no_args (no args, followed by terminator)
936        // If neither matches, fall through to assignment_parser
937        let set_command = set_with_flags.or(set_no_args);
938
939        // Inline env prefix: `NAME=value ... command`. One or more bash-style
940        // assignments immediately followed by a command/pipeline scopes those
941        // assignments to that command only (Stmt::EnvScoped). With no command
942        // following, this alternative fails and we fall through to a plain,
943        // persistent assignment. Must precede `assignment_parser` so the
944        // prefixed-command form wins when a command follows.
945        let env_prefix_assign = ident_parser()
946            .then_ignore(just(Token::Eq))
947            .then(expr_parser())
948            .map(|(name, value)| Assignment { name, value, local: false });
949        let env_scoped = env_prefix_assign
950            .repeated()
951            .at_least(1)
952            .collect::<Vec<_>>()
953            .then(pipeline_parser().map(pipeline_into_stmt))
954            .map(|(assignments, body)| Stmt::EnvScoped {
955                assignments,
956                body: Box::new(body),
957            });
958
959        // Base statement (without chaining)
960        let base_statement = choice((
961            just(Token::Newline).to(Stmt::Empty),
962            set_command,
963            env_scoped,
964            assignment_parser().map(Stmt::Assignment),
965            // Shell-style functions (use $1, $2 positional params)
966            posix_function_parser(stmt.clone()).map(Stmt::ToolDef),  // name() { }
967            bash_function_parser(stmt.clone()).map(Stmt::ToolDef),   // function name { }
968            if_parser(stmt.clone()).map(Stmt::If),
969            for_parser(stmt.clone()).map(Stmt::For),
970            while_parser(stmt.clone()).map(Stmt::While),
971            case_parser(stmt.clone()).map(Stmt::Case),
972            break_stmt,
973            continue_stmt,
974            return_stmt,
975            exit_stmt,
976            test_expr_stmt_parser().map(Stmt::Test),
977            // Note: 'true' and 'false' are handled by command_parser/pipeline_parser
978            pipeline_parser().map(pipeline_into_stmt),
979        ))
980        .boxed();
981
982        // Statement chaining with precedence: && binds tighter than ||
983        // and_chain = base_stmt { "&&" base_stmt }
984        // or_chain  = and_chain { "||" and_chain }
985        let and_chain = base_statement
986            .clone()
987            .foldl(
988                just(Token::And).ignore_then(base_statement).repeated(),
989                |left, right| Stmt::AndChain {
990                    left: Box::new(left),
991                    right: Box::new(right),
992                },
993            );
994
995        and_chain
996            .clone()
997            .foldl(
998                just(Token::Or).ignore_then(and_chain).repeated(),
999                |left, right| Stmt::OrChain {
1000                    left: Box::new(left),
1001                    right: Box::new(right),
1002                },
1003            )
1004            .then_ignore(terminator)
1005    })
1006}
1007
1008/// Assignment: `NAME=value` (bash-style) or `local NAME = value` (scoped)
1009fn assignment_parser<'tokens, I>(
1010) -> impl Parser<'tokens, I, Assignment, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1011where
1012    I: ValueInput<'tokens, Token = Token, Span = Span>,
1013{
1014    // local NAME = value (with spaces around =)
1015    let local_assignment = just(Token::Local)
1016        .ignore_then(ident_parser())
1017        .then_ignore(just(Token::Eq))
1018        .then(expr_parser())
1019        .map(|(name, value)| Assignment {
1020            name,
1021            value,
1022            local: true,
1023        });
1024
1025    // Bash-style: NAME=value (no spaces around =)
1026    // The lexer produces IDENT EQ EXPR, so we parse it here
1027    let bash_assignment = ident_parser()
1028        .then_ignore(just(Token::Eq))
1029        .then(expr_parser())
1030        .map(|(name, value)| Assignment {
1031            name,
1032            value,
1033            local: false,
1034        });
1035
1036    choice((local_assignment, bash_assignment))
1037        .labelled("assignment")
1038        .boxed()
1039}
1040
1041/// POSIX-style function: `name() { body }`
1042///
1043/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
1044fn posix_function_parser<'tokens, I, S>(
1045    stmt: S,
1046) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1047where
1048    I: ValueInput<'tokens, Token = Token, Span = Span>,
1049    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1050{
1051    ident_parser()
1052        .then_ignore(just(Token::LParen))
1053        .then_ignore(just(Token::RParen))
1054        .then_ignore(just(Token::LBrace))
1055        .then_ignore(just(Token::Newline).repeated())
1056        .then(
1057            stmt.repeated()
1058                .collect::<Vec<_>>()
1059                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1060        )
1061        .then_ignore(just(Token::Newline).repeated())
1062        .then_ignore(just(Token::RBrace))
1063        .map(|(name, body)| ToolDef { name, params: vec![], body })
1064        .labelled("POSIX function")
1065        .boxed()
1066}
1067
1068/// Bash-style function: `function name { body }` (without parens)
1069///
1070/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
1071fn bash_function_parser<'tokens, I, S>(
1072    stmt: S,
1073) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1074where
1075    I: ValueInput<'tokens, Token = Token, Span = Span>,
1076    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1077{
1078    just(Token::Function)
1079        .ignore_then(ident_parser())
1080        .then_ignore(just(Token::LBrace))
1081        .then_ignore(just(Token::Newline).repeated())
1082        .then(
1083            stmt.repeated()
1084                .collect::<Vec<_>>()
1085                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1086        )
1087        .then_ignore(just(Token::Newline).repeated())
1088        .then_ignore(just(Token::RBrace))
1089        .map(|(name, body)| ToolDef { name, params: vec![], body })
1090        .labelled("bash function")
1091        .boxed()
1092}
1093
1094/// If statement: `if COND; then STMTS [elif COND; then STMTS]* [else STMTS] fi`
1095///
1096/// elif clauses are desugared to nested if/else:
1097///   `if A; then X elif B; then Y else Z fi`
1098/// becomes:
1099///   `if A; then X else { if B; then Y else Z fi } fi`
1100fn if_parser<'tokens, I, S>(
1101    stmt: S,
1102) -> impl Parser<'tokens, I, IfStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1103where
1104    I: ValueInput<'tokens, Token = Token, Span = Span>,
1105    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1106{
1107    // Parse a single branch: condition + then statements
1108    let branch = condition_parser()
1109        .then_ignore(just(Token::Semi).or_not())
1110        .then_ignore(just(Token::Newline).repeated())
1111        .then_ignore(just(Token::Then))
1112        .then_ignore(just(Token::Newline).repeated())
1113        .then(
1114            stmt.clone()
1115                .repeated()
1116                .collect::<Vec<_>>()
1117                .map(|stmts: Vec<Stmt>| {
1118                    stmts
1119                        .into_iter()
1120                        .filter(|s| !matches!(s, Stmt::Empty))
1121                        .collect::<Vec<_>>()
1122                }),
1123        );
1124
1125    // Parse elif branches: `elif COND; then STMTS`
1126    let elif_branch = just(Token::Elif)
1127        .ignore_then(condition_parser())
1128        .then_ignore(just(Token::Semi).or_not())
1129        .then_ignore(just(Token::Newline).repeated())
1130        .then_ignore(just(Token::Then))
1131        .then_ignore(just(Token::Newline).repeated())
1132        .then(
1133            stmt.clone()
1134                .repeated()
1135                .collect::<Vec<_>>()
1136                .map(|stmts: Vec<Stmt>| {
1137                    stmts
1138                        .into_iter()
1139                        .filter(|s| !matches!(s, Stmt::Empty))
1140                        .collect::<Vec<_>>()
1141                }),
1142        );
1143
1144    // Parse else branch: `else STMTS`
1145    let else_branch = just(Token::Else)
1146        .ignore_then(just(Token::Newline).repeated())
1147        .ignore_then(stmt.repeated().collect::<Vec<_>>())
1148        .map(|stmts: Vec<Stmt>| {
1149            stmts
1150                .into_iter()
1151                .filter(|s| !matches!(s, Stmt::Empty))
1152                .collect::<Vec<_>>()
1153        });
1154
1155    just(Token::If)
1156        .ignore_then(branch)
1157        .then(elif_branch.repeated().collect::<Vec<_>>())
1158        .then(else_branch.or_not())
1159        .then_ignore(just(Token::Fi))
1160        .map(|(((condition, then_branch), elif_branches), else_branch)| {
1161            // Build nested if/else structure from elif branches
1162            build_if_chain(condition, then_branch, elif_branches, else_branch)
1163        })
1164        .labelled("if statement")
1165        .boxed()
1166}
1167
1168/// Build a nested IfStmt chain from elif branches.
1169///
1170/// Transforms:
1171///   if A then X elif B then Y elif C then Z else W fi
1172/// Into:
1173///   IfStmt { cond: A, then: X, else: Some([IfStmt { cond: B, then: Y, else: Some([IfStmt { cond: C, then: Z, else: Some(W) }]) }]) }
1174fn build_if_chain(
1175    condition: Expr,
1176    then_branch: Vec<Stmt>,
1177    mut elif_branches: Vec<(Expr, Vec<Stmt>)>,
1178    else_branch: Option<Vec<Stmt>>,
1179) -> IfStmt {
1180    if elif_branches.is_empty() {
1181        // No elif, just if/else
1182        IfStmt {
1183            condition: Box::new(condition),
1184            then_branch,
1185            else_branch,
1186        }
1187    } else {
1188        // Pop the first elif and recursively build the rest
1189        let (elif_cond, elif_then) = elif_branches.remove(0);
1190        let nested_if = build_if_chain(elif_cond, elif_then, elif_branches, else_branch);
1191        IfStmt {
1192            condition: Box::new(condition),
1193            then_branch,
1194            else_branch: Some(vec![Stmt::If(nested_if)]),
1195        }
1196    }
1197}
1198
1199/// For loop: `for VAR in ITEMS; do STMTS done`
1200fn for_parser<'tokens, I, S>(
1201    stmt: S,
1202) -> impl Parser<'tokens, I, ForLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1203where
1204    I: ValueInput<'tokens, Token = Token, Span = Span>,
1205    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1206{
1207    just(Token::For)
1208        .ignore_then(ident_parser())
1209        .then_ignore(just(Token::In))
1210        .then(expr_parser().repeated().at_least(1).collect::<Vec<_>>())
1211        .then_ignore(just(Token::Semi).or_not())
1212        .then_ignore(just(Token::Newline).repeated())
1213        .then_ignore(just(Token::Do))
1214        .then_ignore(just(Token::Newline).repeated())
1215        .then(
1216            stmt.repeated()
1217                .collect::<Vec<_>>()
1218                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1219        )
1220        .then_ignore(just(Token::Done))
1221        .map(|((variable, items), body)| ForLoop {
1222            variable,
1223            items,
1224            body,
1225        })
1226        .labelled("for loop")
1227        .boxed()
1228}
1229
1230/// While loop: `while condition; do ...; done`
1231fn while_parser<'tokens, I, S>(
1232    stmt: S,
1233) -> impl Parser<'tokens, I, WhileLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1234where
1235    I: ValueInput<'tokens, Token = Token, Span = Span>,
1236    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1237{
1238    just(Token::While)
1239        .ignore_then(condition_parser())
1240        .then_ignore(just(Token::Semi).or_not())
1241        .then_ignore(just(Token::Newline).repeated())
1242        .then_ignore(just(Token::Do))
1243        .then_ignore(just(Token::Newline).repeated())
1244        .then(
1245            stmt.repeated()
1246                .collect::<Vec<_>>()
1247                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1248        )
1249        .then_ignore(just(Token::Done))
1250        .map(|(condition, body)| WhileLoop {
1251            condition: Box::new(condition),
1252            body,
1253        })
1254        .labelled("while loop")
1255        .boxed()
1256}
1257
1258/// Case statement: `case expr in pattern) commands ;; esac`
1259///
1260/// Supports:
1261/// - Single patterns: `pattern) commands ;;`
1262/// - Multiple patterns: `pattern1|pattern2) commands ;;`
1263/// - Optional leading `(` before patterns: `(pattern) commands ;;`
1264fn case_parser<'tokens, I, S>(
1265    stmt: S,
1266) -> impl Parser<'tokens, I, CaseStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1267where
1268    I: ValueInput<'tokens, Token = Token, Span = Span>,
1269    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1270{
1271    // Pattern part: individual tokens that make up a glob pattern
1272    // e.g., "*.rs" is Star + Dot + Ident("rs")
1273    let pattern_part = choice((
1274        select! { Token::GlobWord(s) => s },
1275        select! { Token::Ident(s) => s },
1276        select! { Token::NumberIdent(s) => s },
1277        select! { Token::DottedIdent(s) => s },
1278        select! { Token::String(s) => s },
1279        select! { Token::SingleString(s) => s },
1280        select! { Token::Int(n) => n.to_string() },
1281        select! { Token::Star => "*".to_string() },
1282        select! { Token::Question => "?".to_string() },
1283        select! { Token::Dot => ".".to_string() },
1284        select! { Token::DotDot => "..".to_string() },
1285        select! { Token::Tilde => "~".to_string() },
1286        select! { Token::TildePath(s) => s },
1287        select! { Token::RelativePath(s) => s },
1288        select! { Token::DotSlashPath(s) => s },
1289        select! { Token::Path(p) => p },
1290        select! { Token::VarRef(v) => v },
1291        select! { Token::SimpleVarRef(v) => format!("${}", v) },
1292        // Character class: [a-z], [!abc], [^abc], etc.
1293        just(Token::LBracket)
1294            .ignore_then(
1295                choice((
1296                    select! { Token::Ident(s) => s },
1297                    select! { Token::Int(n) => n.to_string() },
1298                    just(Token::Colon).to(":".to_string()),
1299                    // Negation: ! or ^ at start of char class
1300                    just(Token::Bang).to("!".to_string()),
1301                    // Range like a-z
1302                    select! { Token::ShortFlag(s) => format!("-{}", s) },
1303                ))
1304                .repeated()
1305                .at_least(1)
1306                .collect::<Vec<String>>()
1307            )
1308            .then_ignore(just(Token::RBracket))
1309            .map(|parts| format!("[{}]", parts.join(""))),
1310        // Brace expansion: {a,b,c} or {js,ts}
1311        just(Token::LBrace)
1312            .ignore_then(
1313                choice((
1314                    select! { Token::Ident(s) => s },
1315                    select! { Token::Int(n) => n.to_string() },
1316                ))
1317                .separated_by(just(Token::Comma))
1318                .at_least(1)
1319                .collect::<Vec<String>>()
1320            )
1321            .then_ignore(just(Token::RBrace))
1322            .map(|parts| format!("{{{}}}", parts.join(","))),
1323    ));
1324
1325    // A complete pattern is one or more pattern parts joined together
1326    // e.g., "*.rs" = Star + Dot + Ident
1327    let pattern = pattern_part
1328        .repeated()
1329        .at_least(1)
1330        .collect::<Vec<String>>()
1331        .map(|parts| parts.join(""))
1332        .labelled("case pattern");
1333
1334    // Multiple patterns separated by pipe: `pattern1 | pattern2`
1335    let patterns = pattern
1336        .separated_by(just(Token::Pipe))
1337        .at_least(1)
1338        .collect::<Vec<String>>()
1339        .labelled("case patterns");
1340
1341    // Branch: `[( ] patterns ) commands ;;`
1342    let branch = just(Token::LParen)
1343        .or_not()
1344        .ignore_then(just(Token::Newline).repeated())
1345        .ignore_then(patterns)
1346        .then_ignore(just(Token::RParen))
1347        .then_ignore(just(Token::Newline).repeated())
1348        .then(
1349            stmt.clone()
1350                .repeated()
1351                .collect::<Vec<_>>()
1352                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1353        )
1354        .then_ignore(just(Token::DoubleSemi))
1355        .then_ignore(just(Token::Newline).repeated())
1356        .map(|(patterns, body)| CaseBranch { patterns, body })
1357        .labelled("case branch");
1358
1359    just(Token::Case)
1360        .ignore_then(expr_parser())
1361        .then_ignore(just(Token::In))
1362        .then_ignore(just(Token::Newline).repeated())
1363        .then(branch.repeated().collect::<Vec<_>>())
1364        .then_ignore(just(Token::Esac))
1365        .map(|(expr, branches)| CaseStmt { expr, branches })
1366        .labelled("case statement")
1367        .boxed()
1368}
1369
1370/// Pipeline: `cmd | cmd | cmd [&]`
1371fn pipeline_parser<'tokens, I>(
1372) -> impl Parser<'tokens, I, Pipeline, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1373where
1374    I: ValueInput<'tokens, Token = Token, Span = Span>,
1375{
1376    command_parser()
1377        .separated_by(just(Token::Pipe))
1378        .at_least(1)
1379        .collect::<Vec<_>>()
1380        .then(just(Token::Amp).or_not())
1381        .map(|(commands, bg)| Pipeline {
1382            commands,
1383            background: bg.is_some(),
1384        })
1385        .labelled("pipeline")
1386        .boxed()
1387}
1388
1389/// Command: `name args... [redirects...]`
1390/// Command names can be identifiers, 'true', 'false', or '.' (source alias).
1391fn command_parser<'tokens, I>(
1392) -> impl Parser<'tokens, I, Command, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1393where
1394    I: ValueInput<'tokens, Token = Token, Span = Span>,
1395{
1396    // Command name can be an identifier, path, 'true', 'false', '.' (source alias), or ./path
1397    let command_name = choice((
1398        ident_parser(),
1399        path_parser(),
1400        select! { Token::DotSlashPath(s) => s },
1401        just(Token::True).to("true".to_string()),
1402        just(Token::False).to("false".to_string()),
1403        just(Token::Dot).to(".".to_string()),
1404    ));
1405
1406    // NB: the "at most one stdin source per command" rule is enforced by a
1407    // post-parse scan in `parse()` (see `first_ambiguous_stdin`), NOT here.
1408    // A `try_map` rejection at this level cannot surface its own message: a
1409    // command like `cat <<< a <<< b` also fails the competing statement-level
1410    // assignment/function alternative ("expected '=', or '('"), and chumsky's
1411    // `choice` merge keeps that alternative's error regardless of which span
1412    // our custom error carries. So we accept the command here and reject it
1413    // structurally after parsing, where the message is fully under our control
1414    // (verified empirically 2026-06-07; see docs/issues.md).
1415    command_name
1416        .then(args_list_parser())
1417        .then(redirect_parser().repeated().collect::<Vec<_>>())
1418        .map(|((name, args), redirects)| Command {
1419            name,
1420            args,
1421            redirects,
1422        })
1423        .labelled("command")
1424        .boxed()
1425}
1426
1427/// Map a parsed `Pipeline` to a statement, unwrapping a single redirect-free
1428/// foreground command to `Stmt::Command` (the canonical shape used throughout
1429/// the parser). Shared by the top-level statement parser, `$()` bodies, and
1430/// inline env-prefix bodies so the unwrap rule lives in one place.
1431fn pipeline_into_stmt(p: Pipeline) -> Stmt {
1432    if p.commands.len() == 1 && !p.background && p.commands[0].redirects.is_empty() {
1433        match p.commands.into_iter().next() {
1434            Some(cmd) => Stmt::Command(cmd),
1435            None => Stmt::Empty, // unreachable (len checked) but safe
1436        }
1437    } else {
1438        Stmt::Pipeline(p)
1439    }
1440}
1441
1442/// True if `cmd` has more than one stdin source (`<`, `<<`, `<<<`). Such a
1443/// command would silently depend on redirect ordering at execution time
1444/// (`setup_stdin_redirects` is last-wins), so `parse()` rejects it loudly.
1445fn command_has_ambiguous_stdin(cmd: &Command) -> bool {
1446    cmd.redirects
1447        .iter()
1448        .filter(|r| {
1449            matches!(
1450                r.kind,
1451                RedirectKind::Stdin | RedirectKind::HereDoc | RedirectKind::HereString
1452            )
1453        })
1454        .count()
1455        > 1
1456}
1457
1458/// Find the first command anywhere in `stmts` (recursing into pipelines,
1459/// control-flow bodies, chains, and tool definitions) that has more than one
1460/// stdin source. Used by `parse()` to reject the ambiguity after parsing.
1461fn first_ambiguous_stdin(stmts: &[Stmt]) -> bool {
1462    stmts.iter().any(stmt_has_ambiguous_stdin)
1463}
1464
1465fn stmt_has_ambiguous_stdin(stmt: &Stmt) -> bool {
1466    match stmt {
1467        Stmt::Command(c) => command_has_ambiguous_stdin(c),
1468        Stmt::Pipeline(p) => p.commands.iter().any(command_has_ambiguous_stdin),
1469        Stmt::If(i) => {
1470            first_ambiguous_stdin(&i.then_branch)
1471                || i.else_branch
1472                    .as_deref()
1473                    .is_some_and(first_ambiguous_stdin)
1474        }
1475        Stmt::For(f) => first_ambiguous_stdin(&f.body),
1476        Stmt::While(w) => first_ambiguous_stdin(&w.body),
1477        Stmt::Case(c) => c.branches.iter().any(|b| first_ambiguous_stdin(&b.body)),
1478        Stmt::ToolDef(t) => first_ambiguous_stdin(&t.body),
1479        Stmt::AndChain { left, right } | Stmt::OrChain { left, right } => {
1480            stmt_has_ambiguous_stdin(left) || stmt_has_ambiguous_stdin(right)
1481        }
1482        Stmt::EnvScoped { body, .. } => stmt_has_ambiguous_stdin(body),
1483        Stmt::Assignment(_)
1484        | Stmt::Break(_)
1485        | Stmt::Continue(_)
1486        | Stmt::Return(_)
1487        | Stmt::Exit(_)
1488        | Stmt::Test(_)
1489        | Stmt::Empty => false,
1490    }
1491}
1492
1493/// Arguments list parser that handles `--` flag terminator.
1494///
1495/// After `--`, all subsequent flags are converted to positional string arguments.
1496fn args_list_parser<'tokens, I>(
1497) -> impl Parser<'tokens, I, Vec<Arg>, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1498where
1499    I: ValueInput<'tokens, Token = Token, Span = Span>,
1500{
1501    // Arguments before `--` (normal parsing). Each arg is captured with its
1502    // source span so we can reject the silent argv-splat: two positional words
1503    // with no whitespace between them (`/tmp/$(echo x).txt` → 3 args). kaish does
1504    // no token pasting, so an unquoted interpolated word fragments into separate
1505    // args; the fix is to quote the whole word. Single-token words (`file.txt`,
1506    // `v1.2.3`) are one arg and never trigger this. See docs/issues.md #2.
1507    let pre_dash = arg_before_double_dash_parser()
1508        .map_with(|arg, e| -> (Arg, Span) { (arg, e.span()) })
1509        .repeated()
1510        .collect::<Vec<(Arg, Span)>>()
1511        .try_map(|args, _span| {
1512            for pair in args.windows(2) {
1513                let (prev, prev_span) = &pair[0];
1514                let (next, next_span) = &pair[1];
1515                if matches!(prev, Arg::Positional(_))
1516                    && matches!(next, Arg::Positional(_))
1517                    && prev_span.end == next_span.start
1518                {
1519                    return Err(Rich::custom(
1520                        *next_span,
1521                        "adjacent words with no space between them are not joined into one \
1522                         argument (kaish does no token pasting); quote the whole word, e.g. \
1523                         \"/tmp/$(echo x).txt\" or \"$dir/out.txt\"",
1524                    ));
1525                }
1526            }
1527            Ok(args.into_iter().map(|(arg, _)| arg).collect::<Vec<Arg>>())
1528        });
1529
1530    // The `--` marker itself
1531    let double_dash = select! {
1532        Token::DoubleDash => Arg::DoubleDash,
1533    };
1534
1535    // Arguments after `--` (flags become positional strings)
1536    let post_dash_arg = choice((
1537        // Flags become positional strings
1538        select! {
1539            Token::ShortFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("-{}", name)))),
1540            Token::LongFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("--{}", name)))),
1541        },
1542        // Everything else stays the same
1543        primary_expr_parser().map(Arg::Positional),
1544    ));
1545
1546    let post_dash = post_dash_arg.repeated().collect::<Vec<_>>();
1547
1548    // Combine: args_before ++ [--] ++ args_after
1549    pre_dash
1550        .then(double_dash.then(post_dash).or_not())
1551        .map(|(mut args, maybe_dd)| {
1552            if let Some((dd, post)) = maybe_dd {
1553                args.push(dd);
1554                args.extend(post);
1555            }
1556            args
1557        })
1558}
1559
1560/// A statement keyword used as a plain word — its source spelling.
1561///
1562/// Lets keywords serve as the *key* of a `key=value` argv assignment, so
1563/// `dd if=/dev/urandom` works (`if` is `Token::If`, not an `Ident`). Safe
1564/// because: statement-level `if`/`for`/… are decided before arg parsing (their
1565/// productions precede `pipeline_parser`), `command_name` never accepts these
1566/// tokens, and the `key=value` rule requires the key span-adjacent to `=` — a
1567/// real `if <cond>` has a space and never matches. See docs/binary-data.md.
1568fn keyword_word<'tokens, I>(
1569) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1570where
1571    I: ValueInput<'tokens, Token = Token, Span = Span>,
1572{
1573    select! {
1574        Token::Set => "set",
1575        Token::Local => "local",
1576        Token::If => "if",
1577        Token::Then => "then",
1578        Token::Else => "else",
1579        Token::Elif => "elif",
1580        Token::Fi => "fi",
1581        Token::For => "for",
1582        Token::While => "while",
1583        Token::In => "in",
1584        Token::Do => "do",
1585        Token::Done => "done",
1586        Token::Case => "case",
1587        Token::Esac => "esac",
1588        Token::Function => "function",
1589        Token::Break => "break",
1590        Token::Continue => "continue",
1591        Token::Return => "return",
1592        Token::Exit => "exit",
1593    }
1594    .map(|s| s.to_string())
1595}
1596
1597/// Argument parser for arguments before `--` (normal flag handling).
1598fn arg_before_double_dash_parser<'tokens, I>(
1599) -> impl Parser<'tokens, I, Arg, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1600where
1601    I: ValueInput<'tokens, Token = Token, Span = Span>,
1602{
1603    // Long flag with value: --name=value
1604    let long_flag_with_value = select! {
1605        Token::LongFlag(name) => name,
1606    }
1607    .then_ignore(just(Token::Eq))
1608    .then(primary_expr_parser())
1609    .map(|(key, value)| Arg::Named { key, value });
1610
1611    // Boolean long flag: --name
1612    let long_flag = select! {
1613        Token::LongFlag(name) => Arg::LongFlag(name),
1614    };
1615
1616    // Boolean short flag: -x
1617    let short_flag = select! {
1618        Token::ShortFlag(name) => Arg::ShortFlag(name),
1619    };
1620
1621    // Shell assignment in argv position: name=value (must not have spaces around =).
1622    // Produces Arg::WordAssign; the kernel routes it through tool_args.named
1623    // only for shell-assignment-accepting builtins (export, alias). For every
1624    // other command it materialises as a `"name=value"` positional, matching
1625    // bash semantics (`cat foo=bar` opens a file named `foo=bar`).
1626    let named = choice((
1627        select! { Token::Ident(s) => s },
1628        keyword_word(),
1629    ))
1630    .map_with(|s, e| -> (String, Span) { (s, e.span()) })
1631    .then(just(Token::Eq).map_with(|_, e| -> Span { e.span() }))
1632    .then(primary_expr_parser().map_with(|expr, e| -> (Expr, Span) { (expr, e.span()) }))
1633    .try_map(|(((key, key_span), eq_span), (value, value_span)): (((String, Span), Span), (Expr, Span)), span| {
1634        // Check that key ends where = starts and = ends where value starts
1635        if key_span.end != eq_span.start || eq_span.end != value_span.start {
1636            Err(Rich::custom(
1637                span,
1638                "shell assignment must not have spaces around '=' (use 'key=value' not 'key = value')",
1639            ))
1640        } else {
1641            Ok(Arg::WordAssign { key, value })
1642        }
1643    });
1644
1645    // Positional argument
1646    let positional = primary_expr_parser().map(Arg::Positional);
1647
1648    // Order matters: try more specific patterns first
1649    // Note: DoubleDash is NOT included here - it's handled by args_list_parser
1650    choice((
1651        long_flag_with_value,
1652        long_flag,
1653        short_flag,
1654        named,
1655        positional,
1656    ))
1657    .boxed()
1658}
1659
1660/// Redirect: `> file`, `>> file`, `< file`, `<< heredoc`, `2> file`, `&> file`, `2>&1`
1661fn redirect_parser<'tokens, I>(
1662) -> impl Parser<'tokens, I, Redirect, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1663where
1664    I: ValueInput<'tokens, Token = Token, Span = Span>,
1665{
1666    // Regular redirects: >, >>, <, 2>, &>
1667    let regular_redirect = select! {
1668        Token::GtGt => RedirectKind::StdoutAppend,
1669        Token::Gt => RedirectKind::StdoutOverwrite,
1670        Token::Lt => RedirectKind::Stdin,
1671        Token::Stderr => RedirectKind::Stderr,
1672        Token::Both => RedirectKind::Both,
1673    }
1674    .then(primary_expr_parser())
1675    .map(|(kind, target)| Redirect { kind, target });
1676
1677    // Here-doc redirect: << content
1678    // Quoted delimiters (<<'EOF' or <<"EOF") produce literal heredocs (no expansion).
1679    // Unquoted delimiters produce interpolated heredocs (variables are expanded).
1680    // For literal heredocs the `<<-EOF` tab stripping is applied here at parse
1681    // time (the body is fully known); for interpolated heredocs the stripping
1682    // is deferred to the interpreter so source byte offsets in `parts` stay
1683    // aligned with the original source for span reporting.
1684    let heredoc_redirect = just(Token::HereDocStart)
1685        .ignore_then(select! { Token::HereDoc(data) => data })
1686        .map(|data: HereDocData| {
1687            let target = if data.literal {
1688                let body = if data.strip_tabs {
1689                    crate::interpreter::strip_leading_tabs(&data.content)
1690                } else {
1691                    data.content
1692                };
1693                Expr::Literal(Value::String(body))
1694            } else {
1695                let parts = parse_interpolated_string_spanned(
1696                    &data.content,
1697                    data.body_start_offset,
1698                );
1699                // If there's only one literal part and no tab stripping is
1700                // needed, simplify to Expr::Literal — keeps the AST shape
1701                // identical to the pre-spans path for trivial bodies.
1702                if parts.len() == 1 && !data.strip_tabs {
1703                    if let StringPart::Literal(text) = &parts[0].part {
1704                        return Redirect {
1705                            kind: RedirectKind::HereDoc,
1706                            target: Expr::Literal(Value::String(text.clone())),
1707                        };
1708                    }
1709                }
1710                Expr::HereDocBody {
1711                    parts,
1712                    strip_tabs: data.strip_tabs,
1713                }
1714            };
1715            Redirect {
1716                kind: RedirectKind::HereDoc,
1717                target,
1718            }
1719        });
1720
1721    // Here-string redirect: <<< word
1722    // The target is any single expression; kaish's existing Expr machinery
1723    // handles interpolation, single-quoted literals, and command substitution.
1724    let herestring_redirect = just(Token::HereString)
1725        .ignore_then(primary_expr_parser())
1726        .map(|target| Redirect {
1727            kind: RedirectKind::HereString,
1728            target,
1729        });
1730
1731    // Merge stderr to stdout: 2>&1 (no target needed - implicit)
1732    let merge_stderr_redirect = just(Token::StderrToStdout)
1733        .map(|_| Redirect {
1734            kind: RedirectKind::MergeStderr,
1735            // Target is unused for MergeStderr, but we need something
1736            target: Expr::Literal(Value::Null),
1737        });
1738
1739    // Merge stdout to stderr: 1>&2 or >&2 (no target needed - implicit)
1740    let merge_stdout_redirect = choice((
1741        just(Token::StdoutToStderr),
1742        just(Token::StdoutToStderr2),
1743    ))
1744    .map(|_| Redirect {
1745        kind: RedirectKind::MergeStdout,
1746        // Target is unused for MergeStdout, but we need something
1747        target: Expr::Literal(Value::Null),
1748    });
1749
1750    choice((
1751        heredoc_redirect,
1752        herestring_redirect,
1753        merge_stderr_redirect,
1754        merge_stdout_redirect,
1755        regular_redirect,
1756    ))
1757    .labelled("redirect")
1758    .boxed()
1759}
1760
1761/// Test expression parser for `[[ ... ]]` syntax.
1762///
1763/// Supports:
1764/// - File tests: `[[ -f path ]]`, `[[ -d path ]]`, etc.
1765/// - String tests: `[[ -z str ]]`, `[[ -n str ]]`
1766/// - Comparisons: `[[ $X == "value" ]]`, `[[ $NUM -gt 5 ]]`
1767/// - Compound: `[[ -f a && -d b ]]`, `[[ -z x || -n y ]]`, `[[ ! -f file ]]`
1768///
1769/// Precedence (highest to lowest): `!` > `&&` > `||`
1770fn test_expr_stmt_parser<'tokens, I>(
1771) -> impl Parser<'tokens, I, TestExpr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1772where
1773    I: ValueInput<'tokens, Token = Token, Span = Span>,
1774{
1775    // File test operators: -e, -f, -d, -r, -w, -x
1776    let file_test_op = select! {
1777        Token::ShortFlag(s) if s == "e" => FileTestOp::Exists,
1778        Token::ShortFlag(s) if s == "f" => FileTestOp::IsFile,
1779        Token::ShortFlag(s) if s == "d" => FileTestOp::IsDir,
1780        Token::ShortFlag(s) if s == "r" => FileTestOp::Readable,
1781        Token::ShortFlag(s) if s == "w" => FileTestOp::Writable,
1782        Token::ShortFlag(s) if s == "x" => FileTestOp::Executable,
1783    };
1784
1785    // String test operators: -z, -n
1786    let string_test_op = select! {
1787        Token::ShortFlag(s) if s == "z" => StringTestOp::IsEmpty,
1788        Token::ShortFlag(s) if s == "n" => StringTestOp::IsNonEmpty,
1789    };
1790
1791    // Comparison operators: =, ==, !=, =~, !~, >, <, >=, <=, -gt, -lt, -ge, -le, -eq, -ne
1792    // Note: = and == are equivalent inside [[ ]] (matching bash behavior)
1793    let cmp_op = choice((
1794        just(Token::EqEq).to(TestCmpOp::Eq),
1795        just(Token::Eq).to(TestCmpOp::Eq),
1796        just(Token::NotEq).to(TestCmpOp::NotEq),
1797        just(Token::Match).to(TestCmpOp::Match),
1798        just(Token::NotMatch).to(TestCmpOp::NotMatch),
1799        just(Token::Gt).to(TestCmpOp::Gt),
1800        just(Token::Lt).to(TestCmpOp::Lt),
1801        just(Token::GtEq).to(TestCmpOp::GtEq),
1802        just(Token::LtEq).to(TestCmpOp::LtEq),
1803        select! { Token::ShortFlag(s) if s == "eq" => TestCmpOp::NumEq },
1804        select! { Token::ShortFlag(s) if s == "ne" => TestCmpOp::NumNotEq },
1805        select! { Token::ShortFlag(s) if s == "gt" => TestCmpOp::NumGt },
1806        select! { Token::ShortFlag(s) if s == "lt" => TestCmpOp::NumLt },
1807        select! { Token::ShortFlag(s) if s == "ge" => TestCmpOp::NumGtEq },
1808        select! { Token::ShortFlag(s) if s == "le" => TestCmpOp::NumLtEq },
1809    ));
1810
1811    // File test: -f path
1812    let file_test = file_test_op
1813        .then(primary_expr_parser())
1814        .map(|(op, path)| TestExpr::FileTest {
1815            op,
1816            path: Box::new(path),
1817        });
1818
1819    // String test: -z str
1820    let string_test = string_test_op
1821        .then(primary_expr_parser())
1822        .map(|(op, value)| TestExpr::StringTest {
1823            op,
1824            value: Box::new(value),
1825        });
1826
1827    // Comparison: $X == "value" or $NUM -gt 5
1828    let comparison = primary_expr_parser()
1829        .then(cmp_op)
1830        .then(primary_expr_parser())
1831        .map(|((left, op), right)| TestExpr::Comparison {
1832            left: Box::new(left),
1833            op,
1834            right: Box::new(right),
1835        });
1836
1837    // Primary test expression (atomic - no compound operators)
1838    let primary_test = choice((file_test, string_test, comparison));
1839
1840    // Build compound expressions with proper precedence:
1841    // Grammar:
1842    //   test_expr = or_expr
1843    //   or_expr   = and_expr { "||" and_expr }
1844    //   and_expr  = unary_expr { "&&" unary_expr }
1845    //   unary_expr = "!" unary_expr | primary_test
1846    //
1847    // Precedence: ! (highest) > && > ||
1848
1849    // Unary NOT binds tighter than `&&`/`||`, so it must recurse at the
1850    // unary level — `! A || B` is `(!A) || B`, NOT `!(A || B)`. The inner
1851    // `recursive` lets `!` chain (`! ! expr`) while bottoming out at a
1852    // primary test, so the bang never swallows a following `&&`/`||` operand.
1853    let unary = recursive(|unary| {
1854        let not_expr = just(Token::Bang)
1855            .ignore_then(unary)
1856            .map(|expr| TestExpr::Not { expr: Box::new(expr) });
1857        choice((not_expr, primary_test.clone()))
1858    });
1859
1860    // AND level: unary && unary && ...
1861    let and_expr = unary.clone().foldl(
1862        just(Token::And).ignore_then(unary).repeated(),
1863        |left, right| TestExpr::And {
1864            left: Box::new(left),
1865            right: Box::new(right),
1866        },
1867    );
1868
1869    // OR level: and_expr || and_expr || ...
1870    let compound_test = and_expr.clone().foldl(
1871        just(Token::Or).ignore_then(and_expr).repeated(),
1872        |left, right| TestExpr::Or {
1873            left: Box::new(left),
1874            right: Box::new(right),
1875        },
1876    );
1877
1878    // [[ ]] is two consecutive bracket tokens (not a single TestStart token)
1879    // to avoid conflicts with nested array syntax like [[1, 2], [3, 4]]
1880    just(Token::LBracket)
1881        .then(just(Token::LBracket))
1882        .ignore_then(compound_test)
1883        .then_ignore(just(Token::RBracket).then(just(Token::RBracket)))
1884        .labelled("test expression")
1885        .boxed()
1886}
1887
1888/// Condition parser: supports [[ ]] test expressions and commands with && / || chaining.
1889///
1890/// Shell semantics: conditions are commands whose exit codes determine truthiness.
1891/// - `if true; then` → runs `true` builtin, exit code 0 = truthy
1892/// - `if grep -q pattern file; then` → runs command, checks exit code
1893/// - `if a && b; then` → runs `a`, if exit 0, runs `b`
1894///
1895/// Use `[[ ]]` for comparisons: `if [[ $X -gt 5 ]]; then`
1896///
1897/// Grammar (with precedence - && binds tighter than ||):
1898///   condition = or_expr
1899///   or_expr   = and_expr { "||" and_expr }
1900///   and_expr  = base { "&&" base }
1901///   base      = test_expr | command
1902fn condition_parser<'tokens, I>(
1903) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1904where
1905    I: ValueInput<'tokens, Token = Token, Span = Span>,
1906{
1907    // [[ ]] test expression - wrap as Expr::Test
1908    let test_expr_condition = test_expr_stmt_parser().map(|test| Expr::Test(Box::new(test)));
1909
1910    // Command as condition (includes true/false as command names)
1911    // The command's exit code determines truthiness (0 = true, non-zero = false)
1912    let command_condition = command_parser().map(Expr::Command);
1913
1914    // Base: test expr OR command
1915    let base = choice((test_expr_condition, command_condition));
1916
1917    // && has higher precedence than ||
1918    // First chain with && (higher precedence)
1919    let and_expr = base.clone().foldl(
1920        just(Token::And).ignore_then(base).repeated(),
1921        |left, right| Expr::BinaryOp {
1922            left: Box::new(left),
1923            op: BinaryOp::And,
1924            right: Box::new(right),
1925        },
1926    );
1927
1928    // Then chain with || (lower precedence)
1929    and_expr
1930        .clone()
1931        .foldl(
1932            just(Token::Or).ignore_then(and_expr).repeated(),
1933            |left, right| Expr::BinaryOp {
1934                left: Box::new(left),
1935                op: BinaryOp::Or,
1936                right: Box::new(right),
1937            },
1938        )
1939        .labelled("condition")
1940        .boxed()
1941}
1942
1943/// Expression parser - supports && and || binary operators.
1944fn expr_parser<'tokens, I>(
1945) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1946where
1947    I: ValueInput<'tokens, Token = Token, Span = Span>,
1948{
1949    // For now, just primary expressions. Can extend for && / || later if needed.
1950    primary_expr_parser()
1951}
1952
1953/// Primary expression: literal, variable reference, command substitution, or bare identifier.
1954///
1955/// Uses `recursive` to support nested command substitution like `$(echo $(date))`.
1956fn primary_expr_parser<'tokens, I>(
1957) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1958where
1959    I: ValueInput<'tokens, Token = Token, Span = Span>,
1960{
1961    // Positional parameters: $0-$9, $@, $#, ${#VAR}, $?, $$
1962    let positional = select! {
1963        Token::Positional(n) => Expr::Positional(n),
1964        Token::AllArgs => Expr::AllArgs,
1965        Token::ArgCount => Expr::ArgCount,
1966        Token::VarLength(name) => Expr::VarLength(name),
1967        Token::LastExitCode => Expr::LastExitCode,
1968        Token::CurrentPid => Expr::CurrentPid,
1969    };
1970
1971    // Arithmetic expression: $((expr)) - preprocessed into Arithmetic token
1972    let arithmetic = select! {
1973        Token::Arithmetic(expr_str) => Expr::Arithmetic(expr_str),
1974    };
1975
1976    // Keywords that can also be used as barewords in argument position
1977    // (e.g., `echo done` should work even though `done` is a keyword)
1978    let keyword_as_bareword = select! {
1979        Token::Done => "done",
1980        Token::Fi => "fi",
1981        Token::Then => "then",
1982        Token::Else => "else",
1983        Token::Elif => "elif",
1984        Token::In => "in",
1985        Token::Do => "do",
1986        Token::Esac => "esac",
1987        // `set` in argument position is the literal word (`echo set`,
1988        // `kaish-output-limit set 1K`); the `set` *builtin* is only matched
1989        // when `Token::Set` leads a statement (see `set_command`), so this
1990        // arm never shadows it.
1991        Token::Set => "set",
1992    }
1993    .map(|s| Expr::Literal(Value::String(s.to_string())));
1994
1995    // Bare words starting with + or - (e.g., date +%s, cat -)
1996    let plus_minus_bare = select! {
1997        Token::PlusBare(s) => Expr::Literal(Value::String(s)),
1998        Token::MinusBare(s) => Expr::Literal(Value::String(s)),
1999        Token::MinusAlone => Expr::Literal(Value::String("-".to_string())),
2000    };
2001
2002    // Glob patterns: merged GlobWord tokens and bare Star/Question
2003    let glob_pattern = select! {
2004        Token::GlobWord(s) => Expr::GlobPattern(s),
2005        Token::Star => Expr::GlobPattern("*".to_string()),
2006        Token::Question => Expr::GlobPattern("?".to_string()),
2007    };
2008
2009    recursive(|expr| {
2010        choice((
2011            positional,
2012            arithmetic,
2013            cmd_subst_parser(expr.clone()),
2014            var_expr_parser(),
2015            interpolated_string_parser(),
2016            literal_parser().map(Expr::Literal),
2017            // Glob patterns before ident (GlobWord is more specific)
2018            glob_pattern,
2019            // Bare identifiers become string literals (shell barewords)
2020            ident_parser().map(|s| Expr::Literal(Value::String(s))),
2021            // Absolute paths become string literals
2022            path_parser().map(|s| Expr::Literal(Value::String(s))),
2023            // Bare words starting with + or - (date +%s, cat -)
2024            // Shell navigation tokens
2025            select! {
2026                // Bare `.` in argument/expression position is the literal
2027                // current-directory path (`find .`, `ls .`, `echo .`). The
2028                // `source` alias is unaffected: `command_parser` consumes a
2029                // *leading* `.` as the command name before args are parsed,
2030                // so only a `.` that follows a command reaches here.
2031                Token::Dot => Expr::Literal(Value::String(".".into())),
2032                Token::DotDot => Expr::Literal(Value::String("..".into())),
2033                // Bare comma in argument position is the literal "," — the
2034                // `cut -d, -f2` / `tr -d ,` delimiter idiom. Brace expansion
2035                // consumes its separator commas inside `{…}` before reaching
2036                // here, and a run of comma-touching positionals (`echo 1,2,3`)
2037                // is still caught by the no-token-pasting guard in
2038                // `args_list_parser`. See docs/issues.md.
2039                Token::Comma => Expr::Literal(Value::String(",".into())),
2040                Token::Tilde => Expr::Literal(Value::String("~".into())),
2041                Token::TildePath(s) => Expr::Literal(Value::String(s)),
2042                Token::RelativePath(s) => Expr::Literal(Value::String(s)),
2043                Token::DotSlashPath(s) => Expr::Literal(Value::String(s)),
2044                // Digit-leading bareword (SHA prefix `019dda1c`, UUIDs).
2045                Token::NumberIdent(s) => Expr::Literal(Value::String(s)),
2046                // Dot-prefixed bareword (`.gitignore`, `.parent`, `.parent.parent`).
2047                // Distinct from `Token::Dot` (the source alias), which only
2048                // matches a bare `.` and requires whitespace before its file
2049                // argument.
2050                Token::DottedIdent(s) => Expr::Literal(Value::String(s)),
2051                // Job specifier `%1` for wait/kill — flows as the literal
2052                // string "%1"; the builtins interpret the leading `%`.
2053                Token::JobSpec(s) => Expr::Literal(Value::String(s)),
2054            },
2055            plus_minus_bare,
2056            // Keywords can be used as barewords in argument position
2057            keyword_as_bareword,
2058        ))
2059        .labelled("expression")
2060    })
2061    .boxed()
2062}
2063
2064/// Variable reference: `${VAR}`, `${VAR.field}`, `${VAR:-default}`, or `$VAR` (simple form).
2065/// Returns Expr directly to support both VarRef and VarWithDefault.
2066fn var_expr_parser<'tokens, I>(
2067) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2068where
2069    I: ValueInput<'tokens, Token = Token, Span = Span>,
2070{
2071    select! {
2072        Token::VarRef(raw) => parse_var_expr(&raw),
2073        Token::SimpleVarRef(name) => Expr::VarRef(VarPath::simple(name)),
2074    }
2075    .labelled("variable reference")
2076}
2077
2078/// Command substitution: `$(pipeline)` - runs a pipeline and returns its result.
2079///
2080/// Accepts a recursive expression parser to support nested command substitution.
2081fn cmd_subst_parser<'tokens, I, E>(
2082    expr: E,
2083) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2084where
2085    I: ValueInput<'tokens, Token = Token, Span = Span>,
2086    E: Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone,
2087{
2088    // Argument parser using the recursive expression parser
2089    // Long flag with value: --name=value
2090    let long_flag_with_value = select! {
2091        Token::LongFlag(name) => name,
2092    }
2093    .then_ignore(just(Token::Eq))
2094    .then(expr.clone())
2095    .map(|(key, value)| Arg::Named { key, value });
2096
2097    // Boolean long flag: --name
2098    let long_flag = select! {
2099        Token::LongFlag(name) => Arg::LongFlag(name),
2100    };
2101
2102    // Boolean short flag: -x
2103    let short_flag = select! {
2104        Token::ShortFlag(name) => Arg::ShortFlag(name),
2105    };
2106
2107    // Shell assignment in argv position: name=value (see arg_before_double_dash_parser).
2108    // Keyword keys (`if=`, `in=`, …) are accepted so `$(dd if=x)` parses.
2109    let named = choice((ident_parser(), keyword_word()))
2110        .then_ignore(just(Token::Eq))
2111        .then(expr.clone())
2112        .map(|(key, value)| Arg::WordAssign { key, value });
2113
2114    // Positional argument
2115    let positional = expr.map(Arg::Positional);
2116
2117    let arg = choice((
2118        long_flag_with_value,
2119        long_flag,
2120        short_flag,
2121        named,
2122        positional,
2123    ));
2124
2125    // Command name parser - accepts identifiers and boolean keywords (true/false are builtins)
2126    let command_name = choice((
2127        ident_parser(),
2128        just(Token::True).to("true".to_string()),
2129        just(Token::False).to("false".to_string()),
2130    ));
2131
2132    // Command parser
2133    let command = command_name
2134        .then(arg.repeated().collect::<Vec<_>>())
2135        .map(|(name, args)| Command {
2136            name,
2137            args,
2138            redirects: vec![],
2139        });
2140
2141    // Pipeline parser
2142    let pipeline = command
2143        .separated_by(just(Token::Pipe))
2144        .at_least(1)
2145        .collect::<Vec<_>>()
2146        .map(|commands| Pipeline {
2147            commands,
2148            background: false,
2149        });
2150
2151    // A single pipeline becomes one statement (`$(echo x)` → one `Stmt::Command`),
2152    // keeping the AST shape uniform with the rest of the parser.
2153    let pipeline_stmt = pipeline.map(pipeline_into_stmt);
2154
2155    // Statement chaining inside `$()`, same precedence as the top level
2156    // (`&&` binds tighter than `||`). This is the full statement grammar a
2157    // command substitution body accepts — pipelines, `&&`/`||` chains, and
2158    // (via the sequence below) `;`/newline separators and `#` comments.
2159    // Control structures (`if`/`for`/`while`/`case`) are intentionally out of
2160    // scope here; they require threading the recursive statement parser through
2161    // every expression site (see docs/issues.md). The body grammar otherwise
2162    // mirrors `statement_parser`.
2163    let and_chain = pipeline_stmt.clone().foldl(
2164        just(Token::And).ignore_then(pipeline_stmt.clone()).repeated(),
2165        |left, right| Stmt::AndChain {
2166            left: Box::new(left),
2167            right: Box::new(right),
2168        },
2169    );
2170    let chained = and_chain.clone().foldl(
2171        just(Token::Or).ignore_then(and_chain).repeated(),
2172        |left, right| Stmt::OrChain {
2173            left: Box::new(left),
2174            right: Box::new(right),
2175        },
2176    );
2177
2178    // `;` / newline separated sequence of chained statements, with optional
2179    // leading/trailing/interior separators (so multi-line bodies and a trailing
2180    // `;` or comment-induced newline parse cleanly). `#` comments lex to
2181    // newlines, so they are consumed here as ordinary separators.
2182    let separator = choice((just(Token::Newline), just(Token::Semi)));
2183    let body = separator
2184        .clone()
2185        .repeated()
2186        .ignore_then(
2187            chained
2188                .separated_by(separator.clone().repeated().at_least(1))
2189                .allow_trailing()
2190                .collect::<Vec<_>>(),
2191        )
2192        .then_ignore(separator.repeated());
2193
2194    just(Token::CmdSubstStart)
2195        .ignore_then(body)
2196        .then_ignore(just(Token::RParen))
2197        .map(Expr::CommandSubst)
2198        .labelled("command substitution")
2199}
2200
2201/// String parser - handles double-quoted strings (with interpolation) and single-quoted (literal).
2202fn interpolated_string_parser<'tokens, I>(
2203) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2204where
2205    I: ValueInput<'tokens, Token = Token, Span = Span>,
2206{
2207    // Double-quoted string: may contain $VAR or ${VAR} interpolation
2208    let double_quoted = select! {
2209        Token::String(s) => s,
2210    }
2211    .map(|s| {
2212        // Check if string contains interpolation markers (${} or $NAME) or escaped dollars
2213        if s.contains('$') || s.contains("__KAISH_ESCAPED_DOLLAR__") {
2214            // Parse interpolated parts
2215            let parts = parse_interpolated_string(&s);
2216            if parts.len() == 1
2217                && let StringPart::Literal(text) = &parts[0] {
2218                    return Expr::Literal(Value::String(text.clone()));
2219                }
2220            Expr::Interpolated(parts)
2221        } else {
2222            Expr::Literal(Value::String(s))
2223        }
2224    });
2225
2226    // Single-quoted string: literal, no interpolation
2227    let single_quoted = select! {
2228        Token::SingleString(s) => Expr::Literal(Value::String(s)),
2229    };
2230
2231    choice((single_quoted, double_quoted)).labelled("string")
2232}
2233
2234/// Literal value parser (excluding strings, which are handled by interpolated_string_parser).
2235fn literal_parser<'tokens, I>(
2236) -> impl Parser<'tokens, I, Value, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2237where
2238    I: ValueInput<'tokens, Token = Token, Span = Span>,
2239{
2240    choice((
2241        select! {
2242            Token::True => Value::Bool(true),
2243            Token::False => Value::Bool(false),
2244        },
2245        select! {
2246            Token::Int(n) => Value::Int(n),
2247            Token::Float(f) => Value::Float(f),
2248        },
2249    ))
2250    .labelled("literal")
2251    .boxed()
2252}
2253
2254/// Identifier parser.
2255fn ident_parser<'tokens, I>(
2256) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2257where
2258    I: ValueInput<'tokens, Token = Token, Span = Span>,
2259{
2260    select! {
2261        Token::Ident(s) => s,
2262    }
2263    .labelled("identifier")
2264}
2265
2266/// Path parser: matches absolute paths like `/tmp/out`, `/etc/hosts`.
2267fn path_parser<'tokens, I>(
2268) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2269where
2270    I: ValueInput<'tokens, Token = Token, Span = Span>,
2271{
2272    select! {
2273        Token::Path(s) => s,
2274    }
2275    .labelled("path")
2276}
2277
2278#[cfg(test)]
2279#[allow(clippy::approx_constant)]
2280mod tests {
2281    use super::*;
2282
2283    /// Extract the single `Command` from a one-statement `$(cmd)` body.
2284    fn subst_cmd(expr: &Expr) -> &Command {
2285        match expr {
2286            Expr::CommandSubst(stmts) => match stmts.as_slice() {
2287                [Stmt::Command(cmd)] => cmd,
2288                other => panic!("expected a single command in $(), got {other:?}"),
2289            },
2290            other => panic!("expected command subst, got {other:?}"),
2291        }
2292    }
2293
2294    /// Extract the single `Pipeline` from a one-statement `$(a | b)` body.
2295    fn subst_pipeline(expr: &Expr) -> &Pipeline {
2296        match expr {
2297            Expr::CommandSubst(stmts) => match stmts.as_slice() {
2298                [Stmt::Pipeline(p)] => p,
2299                other => panic!("expected a single pipeline in $(), got {other:?}"),
2300            },
2301            other => panic!("expected command subst, got {other:?}"),
2302        }
2303    }
2304
2305    #[test]
2306    fn parse_empty() {
2307        let result = parse("");
2308        assert!(result.is_ok());
2309        assert_eq!(result.expect("ok").statements.len(), 0);
2310    }
2311
2312    #[test]
2313    fn parse_newlines_only() {
2314        let result = parse("\n\n\n");
2315        assert!(result.is_ok());
2316    }
2317
2318    #[test]
2319    fn parse_simple_command() {
2320        let result = parse("echo");
2321        assert!(result.is_ok());
2322        let program = result.expect("ok");
2323        assert_eq!(program.statements.len(), 1);
2324        assert!(matches!(&program.statements[0], Stmt::Command(_)));
2325    }
2326
2327    #[test]
2328    fn parse_command_with_string_arg() {
2329        let result = parse(r#"echo "hello""#);
2330        assert!(result.is_ok());
2331        let program = result.expect("ok");
2332        match &program.statements[0] {
2333            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 1),
2334            _ => panic!("expected Command"),
2335        }
2336    }
2337
2338    #[test]
2339    fn parse_assignment() {
2340        let result = parse("X=5");
2341        assert!(result.is_ok());
2342        let program = result.expect("ok");
2343        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
2344    }
2345
2346    #[test]
2347    fn parse_pipeline() {
2348        let result = parse("a | b | c");
2349        assert!(result.is_ok());
2350        let program = result.expect("ok");
2351        match &program.statements[0] {
2352            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2353            _ => panic!("expected Pipeline"),
2354        }
2355    }
2356
2357    #[test]
2358    fn parse_background_job() {
2359        let result = parse("cmd &");
2360        assert!(result.is_ok());
2361        let program = result.expect("ok");
2362        match &program.statements[0] {
2363            Stmt::Pipeline(p) => assert!(p.background),
2364            _ => panic!("expected Pipeline with background"),
2365        }
2366    }
2367
2368    #[test]
2369    fn parse_if_simple() {
2370        let result = parse("if true; then echo; fi");
2371        assert!(result.is_ok());
2372        let program = result.expect("ok");
2373        assert!(matches!(&program.statements[0], Stmt::If(_)));
2374    }
2375
2376    #[test]
2377    fn parse_if_else() {
2378        let result = parse("if true; then echo; else echo; fi");
2379        assert!(result.is_ok());
2380        let program = result.expect("ok");
2381        match &program.statements[0] {
2382            Stmt::If(if_stmt) => assert!(if_stmt.else_branch.is_some()),
2383            _ => panic!("expected If"),
2384        }
2385    }
2386
2387    #[test]
2388    fn parse_elif_simple() {
2389        let result = parse("if true; then echo a; elif false; then echo b; fi");
2390        assert!(result.is_ok(), "parse failed: {:?}", result);
2391        let program = result.expect("ok");
2392        match &program.statements[0] {
2393            Stmt::If(if_stmt) => {
2394                // elif is desugared to nested if in else
2395                assert!(if_stmt.else_branch.is_some());
2396                let else_branch = if_stmt.else_branch.as_ref().unwrap();
2397                assert_eq!(else_branch.len(), 1);
2398                assert!(matches!(&else_branch[0], Stmt::If(_)));
2399            }
2400            _ => panic!("expected If"),
2401        }
2402    }
2403
2404    #[test]
2405    fn parse_elif_with_else() {
2406        let result = parse("if true; then echo a; elif false; then echo b; else echo c; fi");
2407        assert!(result.is_ok(), "parse failed: {:?}", result);
2408        let program = result.expect("ok");
2409        match &program.statements[0] {
2410            Stmt::If(outer_if) => {
2411                // Check nested structure: if -> elif -> else
2412                let else_branch = outer_if.else_branch.as_ref().expect("outer else");
2413                assert_eq!(else_branch.len(), 1);
2414                match &else_branch[0] {
2415                    Stmt::If(inner_if) => {
2416                        // The inner if (from elif) should have the final else
2417                        assert!(inner_if.else_branch.is_some());
2418                    }
2419                    _ => panic!("expected nested If from elif"),
2420                }
2421            }
2422            _ => panic!("expected If"),
2423        }
2424    }
2425
2426    #[test]
2427    fn parse_multiple_elif() {
2428        // Shell-compatible: use [[ ]] for comparisons
2429        let result = parse(
2430            "if [[ ${X} == 1 ]]; then echo one; elif [[ ${X} == 2 ]]; then echo two; elif [[ ${X} == 3 ]]; then echo three; else echo other; fi",
2431        );
2432        assert!(result.is_ok(), "parse failed: {:?}", result);
2433    }
2434
2435    #[test]
2436    fn parse_for_loop() {
2437        let result = parse("for X in items; do echo; done");
2438        assert!(result.is_ok());
2439        let program = result.expect("ok");
2440        assert!(matches!(&program.statements[0], Stmt::For(_)));
2441    }
2442
2443    #[test]
2444    fn parse_brackets_not_array_literal() {
2445        // Array literals are no longer supported, [ is just a regular char
2446        let result = parse("cmd [1");
2447        // This should fail or parse unexpectedly - arrays are removed
2448        // Just verify we don't crash
2449        let _ = result;
2450    }
2451
2452    #[test]
2453    fn parse_named_arg() {
2454        // Bareword key=value parses as WordAssign — the kernel decides per
2455        // command whether to route it to tool_args.named (export/alias) or
2456        // stringify to a positional (every other builtin).
2457        let result = parse("cmd foo=5");
2458        assert!(result.is_ok());
2459        let program = result.expect("ok");
2460        match &program.statements[0] {
2461            Stmt::Command(cmd) => {
2462                assert_eq!(cmd.args.len(), 1);
2463                assert!(matches!(&cmd.args[0], Arg::WordAssign { .. }));
2464            }
2465            _ => panic!("expected Command"),
2466        }
2467    }
2468
2469    #[test]
2470    fn parse_short_flag() {
2471        let result = parse("ls -l");
2472        assert!(result.is_ok());
2473        let program = result.expect("ok");
2474        match &program.statements[0] {
2475            Stmt::Command(cmd) => {
2476                assert_eq!(cmd.name, "ls");
2477                assert_eq!(cmd.args.len(), 1);
2478                match &cmd.args[0] {
2479                    Arg::ShortFlag(name) => assert_eq!(name, "l"),
2480                    _ => panic!("expected ShortFlag"),
2481                }
2482            }
2483            _ => panic!("expected Command"),
2484        }
2485    }
2486
2487    #[test]
2488    fn parse_long_flag() {
2489        let result = parse("git push --force");
2490        assert!(result.is_ok());
2491        let program = result.expect("ok");
2492        match &program.statements[0] {
2493            Stmt::Command(cmd) => {
2494                assert_eq!(cmd.name, "git");
2495                assert_eq!(cmd.args.len(), 2);
2496                match &cmd.args[0] {
2497                    Arg::Positional(Expr::Literal(Value::String(s))) => assert_eq!(s, "push"),
2498                    _ => panic!("expected Positional push"),
2499                }
2500                match &cmd.args[1] {
2501                    Arg::LongFlag(name) => assert_eq!(name, "force"),
2502                    _ => panic!("expected LongFlag"),
2503                }
2504            }
2505            _ => panic!("expected Command"),
2506        }
2507    }
2508
2509    #[test]
2510    fn parse_long_flag_with_value() {
2511        let result = parse(r#"git commit --message="hello""#);
2512        assert!(result.is_ok());
2513        let program = result.expect("ok");
2514        match &program.statements[0] {
2515            Stmt::Command(cmd) => {
2516                assert_eq!(cmd.name, "git");
2517                assert_eq!(cmd.args.len(), 2);
2518                match &cmd.args[1] {
2519                    Arg::Named { key, value } => {
2520                        assert_eq!(key, "message");
2521                        match value {
2522                            Expr::Literal(Value::String(s)) => assert_eq!(s, "hello"),
2523                            _ => panic!("expected String value"),
2524                        }
2525                    }
2526                    _ => panic!("expected Named from --flag=value"),
2527                }
2528            }
2529            _ => panic!("expected Command"),
2530        }
2531    }
2532
2533    #[test]
2534    fn parse_mixed_flags_and_args() {
2535        let result = parse(r#"git commit -m "message" --amend"#);
2536        assert!(result.is_ok());
2537        let program = result.expect("ok");
2538        match &program.statements[0] {
2539            Stmt::Command(cmd) => {
2540                assert_eq!(cmd.name, "git");
2541                assert_eq!(cmd.args.len(), 4);
2542                // commit (positional)
2543                assert!(matches!(&cmd.args[0], Arg::Positional(_)));
2544                // -m (short flag)
2545                match &cmd.args[1] {
2546                    Arg::ShortFlag(name) => assert_eq!(name, "m"),
2547                    _ => panic!("expected ShortFlag -m"),
2548                }
2549                // "message" (positional)
2550                assert!(matches!(&cmd.args[2], Arg::Positional(_)));
2551                // --amend (long flag)
2552                match &cmd.args[3] {
2553                    Arg::LongFlag(name) => assert_eq!(name, "amend"),
2554                    _ => panic!("expected LongFlag --amend"),
2555                }
2556            }
2557            _ => panic!("expected Command"),
2558        }
2559    }
2560
2561    #[test]
2562    fn parse_redirect_stdout() {
2563        let result = parse("cmd > file");
2564        assert!(result.is_ok());
2565        let program = result.expect("ok");
2566        // Commands with redirects stay as Pipeline, not Command
2567        match &program.statements[0] {
2568            Stmt::Pipeline(p) => {
2569                assert_eq!(p.commands.len(), 1);
2570                let cmd = &p.commands[0];
2571                assert_eq!(cmd.redirects.len(), 1);
2572                assert!(matches!(cmd.redirects[0].kind, RedirectKind::StdoutOverwrite));
2573            }
2574            _ => panic!("expected Pipeline"),
2575        }
2576    }
2577
2578    #[test]
2579    fn parse_var_ref() {
2580        let result = parse("echo ${VAR}");
2581        assert!(result.is_ok());
2582        let program = result.expect("ok");
2583        match &program.statements[0] {
2584            Stmt::Command(cmd) => {
2585                assert_eq!(cmd.args.len(), 1);
2586                assert!(matches!(&cmd.args[0], Arg::Positional(Expr::VarRef(_))));
2587            }
2588            _ => panic!("expected Command"),
2589        }
2590    }
2591
2592    #[test]
2593    fn parse_multiple_statements() {
2594        let result = parse("a\nb\nc");
2595        assert!(result.is_ok());
2596        let program = result.expect("ok");
2597        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2598        assert_eq!(non_empty.len(), 3);
2599    }
2600
2601    #[test]
2602    fn parse_semicolon_separated() {
2603        let result = parse("a; b; c");
2604        assert!(result.is_ok());
2605        let program = result.expect("ok");
2606        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2607        assert_eq!(non_empty.len(), 3);
2608    }
2609
2610    #[test]
2611    fn parse_complex_pipeline() {
2612        let result = parse(r#"cat file | grep pattern="foo" | head count=10"#);
2613        assert!(result.is_ok());
2614        let program = result.expect("ok");
2615        match &program.statements[0] {
2616            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2617            _ => panic!("expected Pipeline"),
2618        }
2619    }
2620
2621    #[test]
2622    fn parse_json_as_string_arg() {
2623        // JSON arrays/objects should be passed as string arguments
2624        let result = parse(r#"cmd '[[1, 2], [3, 4]]'"#);
2625        assert!(result.is_ok());
2626    }
2627
2628    #[test]
2629    fn parse_mixed_args() {
2630        let result = parse(r#"cmd pos1 key="val" pos2 num=42"#);
2631        assert!(result.is_ok());
2632        let program = result.expect("ok");
2633        match &program.statements[0] {
2634            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 4),
2635            _ => panic!("expected Command"),
2636        }
2637    }
2638
2639    #[test]
2640    fn error_unterminated_string() {
2641        let result = parse(r#"echo "hello"#);
2642        assert!(result.is_err());
2643    }
2644
2645    #[test]
2646    fn error_unterminated_var_ref() {
2647        let result = parse("echo ${VAR");
2648        assert!(result.is_err());
2649    }
2650
2651    #[test]
2652    fn error_missing_fi() {
2653        let result = parse("if true; then echo");
2654        assert!(result.is_err());
2655    }
2656
2657    #[test]
2658    fn error_missing_done() {
2659        let result = parse("for X in items; do echo");
2660        assert!(result.is_err());
2661    }
2662
2663    #[test]
2664    fn parse_nested_cmd_subst() {
2665        // Nested command substitution is supported
2666        let result = parse("X=$(echo $(date))").unwrap();
2667        match &result.statements[0] {
2668            Stmt::Assignment(a) => {
2669                assert_eq!(a.name, "X");
2670                let outer = subst_cmd(&a.value);
2671                assert_eq!(outer.name, "echo");
2672                // The argument should be another command substitution
2673                match &outer.args[0] {
2674                    Arg::Positional(inner_expr) => {
2675                        assert_eq!(subst_cmd(inner_expr).name, "date");
2676                    }
2677                    other => panic!("expected nested cmd subst arg, got {:?}", other),
2678                }
2679            }
2680            other => panic!("expected assignment, got {:?}", other),
2681        }
2682    }
2683
2684    #[test]
2685    fn parse_deeply_nested_cmd_subst() {
2686        // Three levels deep
2687        let result = parse("X=$(a $(b $(c)))").unwrap();
2688        match &result.statements[0] {
2689            Stmt::Assignment(a) => {
2690                let level1 = subst_cmd(&a.value);
2691                assert_eq!(level1.name, "a");
2692                match &level1.args[0] {
2693                    Arg::Positional(level2_expr) => {
2694                        let level2 = subst_cmd(level2_expr);
2695                        assert_eq!(level2.name, "b");
2696                        match &level2.args[0] {
2697                            Arg::Positional(level3_expr) => {
2698                                assert_eq!(subst_cmd(level3_expr).name, "c");
2699                            }
2700                            other => panic!("expected level3 cmd subst, got {:?}", other),
2701                        }
2702                    }
2703                    other => panic!("expected level2 cmd subst, got {:?}", other),
2704                }
2705            }
2706            other => panic!("expected assignment, got {:?}", other),
2707        }
2708    }
2709
2710    // ═══════════════════════════════════════════════════════════════════════════
2711    // Value Preservation Tests - These test that actual values are captured
2712    // ═══════════════════════════════════════════════════════════════════════════
2713
2714    #[test]
2715    fn value_int_preserved() {
2716        let result = parse("X=42").unwrap();
2717        match &result.statements[0] {
2718            Stmt::Assignment(a) => {
2719                assert_eq!(a.name, "X");
2720                match &a.value {
2721                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2722                    other => panic!("expected int literal, got {:?}", other),
2723                }
2724            }
2725            other => panic!("expected assignment, got {:?}", other),
2726        }
2727    }
2728
2729    #[test]
2730    fn value_negative_int_preserved() {
2731        let result = parse("X=-99").unwrap();
2732        match &result.statements[0] {
2733            Stmt::Assignment(a) => match &a.value {
2734                Expr::Literal(Value::Int(n)) => assert_eq!(*n, -99),
2735                other => panic!("expected int, got {:?}", other),
2736            },
2737            other => panic!("expected assignment, got {:?}", other),
2738        }
2739    }
2740
2741    #[test]
2742    fn value_float_preserved() {
2743        let result = parse("PI=3.14").unwrap();
2744        match &result.statements[0] {
2745            Stmt::Assignment(a) => match &a.value {
2746                Expr::Literal(Value::Float(f)) => assert!((*f - 3.14).abs() < 0.001),
2747                other => panic!("expected float, got {:?}", other),
2748            },
2749            other => panic!("expected assignment, got {:?}", other),
2750        }
2751    }
2752
2753    #[test]
2754    fn value_string_preserved() {
2755        let result = parse(r#"echo "hello world""#).unwrap();
2756        match &result.statements[0] {
2757            Stmt::Command(cmd) => {
2758                assert_eq!(cmd.name, "echo");
2759                match &cmd.args[0] {
2760                    Arg::Positional(Expr::Literal(Value::String(s))) => {
2761                        assert_eq!(s, "hello world");
2762                    }
2763                    other => panic!("expected string arg, got {:?}", other),
2764                }
2765            }
2766            other => panic!("expected command, got {:?}", other),
2767        }
2768    }
2769
2770    #[test]
2771    fn value_string_with_escapes_preserved() {
2772        let result = parse(r#"echo "line1\nline2""#).unwrap();
2773        match &result.statements[0] {
2774            Stmt::Command(cmd) => match &cmd.args[0] {
2775                Arg::Positional(Expr::Literal(Value::String(s))) => {
2776                    assert_eq!(s, "line1\nline2");
2777                }
2778                other => panic!("expected string, got {:?}", other),
2779            },
2780            other => panic!("expected command, got {:?}", other),
2781        }
2782    }
2783
2784    #[test]
2785    fn value_command_name_preserved() {
2786        let result = parse("my-command").unwrap();
2787        match &result.statements[0] {
2788            Stmt::Command(cmd) => assert_eq!(cmd.name, "my-command"),
2789            other => panic!("expected command, got {:?}", other),
2790        }
2791    }
2792
2793    #[test]
2794    fn value_assignment_name_preserved() {
2795        let result = parse("MY_VAR=1").unwrap();
2796        match &result.statements[0] {
2797            Stmt::Assignment(a) => assert_eq!(a.name, "MY_VAR"),
2798            other => panic!("expected assignment, got {:?}", other),
2799        }
2800    }
2801
2802    #[test]
2803    fn value_for_variable_preserved() {
2804        let result = parse("for ITEM in items; do echo; done").unwrap();
2805        match &result.statements[0] {
2806            Stmt::For(f) => assert_eq!(f.variable, "ITEM"),
2807            other => panic!("expected for, got {:?}", other),
2808        }
2809    }
2810
2811    #[test]
2812    fn value_varref_name_preserved() {
2813        let result = parse("echo ${MESSAGE}").unwrap();
2814        match &result.statements[0] {
2815            Stmt::Command(cmd) => match &cmd.args[0] {
2816                Arg::Positional(Expr::VarRef(path)) => {
2817                    assert_eq!(path.segments.len(), 1);
2818                    let VarSegment::Field(name) = &path.segments[0];
2819                    assert_eq!(name, "MESSAGE");
2820                }
2821                other => panic!("expected varref, got {:?}", other),
2822            },
2823            other => panic!("expected command, got {:?}", other),
2824        }
2825    }
2826
2827    #[test]
2828    fn value_varref_field_access_preserved() {
2829        let result = parse("echo ${RESULT.data}").unwrap();
2830        match &result.statements[0] {
2831            Stmt::Command(cmd) => match &cmd.args[0] {
2832                Arg::Positional(Expr::VarRef(path)) => {
2833                    assert_eq!(path.segments.len(), 2);
2834                    let VarSegment::Field(a) = &path.segments[0];
2835                    let VarSegment::Field(b) = &path.segments[1];
2836                    assert_eq!(a, "RESULT");
2837                    assert_eq!(b, "data");
2838                }
2839                other => panic!("expected varref, got {:?}", other),
2840            },
2841            other => panic!("expected command, got {:?}", other),
2842        }
2843    }
2844
2845    #[test]
2846    fn value_varref_index_ignored() {
2847        // Index segments are no longer supported - they're filtered out by parse_varpath
2848        let result = parse("echo ${ITEMS[0]}").unwrap();
2849        match &result.statements[0] {
2850            Stmt::Command(cmd) => match &cmd.args[0] {
2851                Arg::Positional(Expr::VarRef(path)) => {
2852                    // Index segment [0] is skipped, only ITEMS remains
2853                    assert_eq!(path.segments.len(), 1);
2854                    let VarSegment::Field(name) = &path.segments[0];
2855                    assert_eq!(name, "ITEMS");
2856                }
2857                other => panic!("expected varref, got {:?}", other),
2858            },
2859            other => panic!("expected command, got {:?}", other),
2860        }
2861    }
2862
2863    #[test]
2864    fn value_named_arg_preserved() {
2865        // Bareword key=value parses as WordAssign — the kernel decides per
2866        // command whether to route into args.named (export/alias) or
2867        // stringify as a positional.
2868        let result = parse("cmd count=42").unwrap();
2869        match &result.statements[0] {
2870            Stmt::Command(cmd) => {
2871                assert_eq!(cmd.name, "cmd");
2872                match &cmd.args[0] {
2873                    Arg::WordAssign { key, value } => {
2874                        assert_eq!(key, "count");
2875                        match value {
2876                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2877                            other => panic!("expected int, got {:?}", other),
2878                        }
2879                    }
2880                    other => panic!("expected WordAssign arg, got {:?}", other),
2881                }
2882            }
2883            other => panic!("expected command, got {:?}", other),
2884        }
2885    }
2886
2887    #[test]
2888    fn value_function_def_name_preserved() {
2889        let result = parse("greet() { echo }").unwrap();
2890        match &result.statements[0] {
2891            Stmt::ToolDef(t) => {
2892                assert_eq!(t.name, "greet");
2893                assert!(t.params.is_empty());
2894            }
2895            other => panic!("expected function def, got {:?}", other),
2896        }
2897    }
2898
2899    // ═══════════════════════════════════════════════════════════════════════════
2900    // New Feature Tests - Comparisons, Interpolation, Nested Structures
2901    // ═══════════════════════════════════════════════════════════════════════════
2902
2903    #[test]
2904    fn parse_comparison_equals() {
2905        // Shell-compatible: use [[ ]] for comparisons
2906        let result = parse("if [[ ${X} == 5 ]]; then echo; fi").unwrap();
2907        match &result.statements[0] {
2908            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2909                Expr::Test(test) => match test.as_ref() {
2910                    TestExpr::Comparison { left, op, right } => {
2911                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2912                        assert_eq!(*op, TestCmpOp::Eq);
2913                        match right.as_ref() {
2914                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 5),
2915                            other => panic!("expected int, got {:?}", other),
2916                        }
2917                    }
2918                    other => panic!("expected comparison, got {:?}", other),
2919                },
2920                other => panic!("expected test expr, got {:?}", other),
2921            },
2922            other => panic!("expected if, got {:?}", other),
2923        }
2924    }
2925
2926    #[test]
2927    fn parse_comparison_not_equals() {
2928        let result = parse("if [[ ${X} != 0 ]]; then echo; fi").unwrap();
2929        match &result.statements[0] {
2930            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2931                Expr::Test(test) => match test.as_ref() {
2932                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotEq),
2933                    other => panic!("expected comparison, got {:?}", other),
2934                },
2935                other => panic!("expected test expr, got {:?}", other),
2936            },
2937            other => panic!("expected if, got {:?}", other),
2938        }
2939    }
2940
2941    #[test]
2942    fn parse_comparison_less_than() {
2943        let result = parse("if [[ ${COUNT} -lt 10 ]]; then echo; fi").unwrap();
2944        match &result.statements[0] {
2945            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2946                Expr::Test(test) => match test.as_ref() {
2947                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLt),
2948                    other => panic!("expected comparison, got {:?}", other),
2949                },
2950                other => panic!("expected test expr, got {:?}", other),
2951            },
2952            other => panic!("expected if, got {:?}", other),
2953        }
2954    }
2955
2956    #[test]
2957    fn parse_comparison_greater_than() {
2958        let result = parse("if [[ ${COUNT} -gt 0 ]]; then echo; fi").unwrap();
2959        match &result.statements[0] {
2960            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2961                Expr::Test(test) => match test.as_ref() {
2962                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGt),
2963                    other => panic!("expected comparison, got {:?}", other),
2964                },
2965                other => panic!("expected test expr, got {:?}", other),
2966            },
2967            other => panic!("expected if, got {:?}", other),
2968        }
2969    }
2970
2971    #[test]
2972    fn parse_comparison_less_equal() {
2973        let result = parse("if [[ ${X} -le 100 ]]; then echo; fi").unwrap();
2974        match &result.statements[0] {
2975            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2976                Expr::Test(test) => match test.as_ref() {
2977                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLtEq),
2978                    other => panic!("expected comparison, got {:?}", other),
2979                },
2980                other => panic!("expected test expr, got {:?}", other),
2981            },
2982            other => panic!("expected if, got {:?}", other),
2983        }
2984    }
2985
2986    #[test]
2987    fn parse_comparison_greater_equal() {
2988        let result = parse("if [[ ${X} -ge 1 ]]; then echo; fi").unwrap();
2989        match &result.statements[0] {
2990            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2991                Expr::Test(test) => match test.as_ref() {
2992                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGtEq),
2993                    other => panic!("expected comparison, got {:?}", other),
2994                },
2995                other => panic!("expected test expr, got {:?}", other),
2996            },
2997            other => panic!("expected if, got {:?}", other),
2998        }
2999    }
3000
3001    #[test]
3002    fn parse_regex_match() {
3003        let result = parse(r#"if [[ ${NAME} =~ "^test" ]]; then echo; fi"#).unwrap();
3004        match &result.statements[0] {
3005            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3006                Expr::Test(test) => match test.as_ref() {
3007                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Match),
3008                    other => panic!("expected comparison, got {:?}", other),
3009                },
3010                other => panic!("expected test expr, got {:?}", other),
3011            },
3012            other => panic!("expected if, got {:?}", other),
3013        }
3014    }
3015
3016    #[test]
3017    fn parse_regex_not_match() {
3018        let result = parse(r#"if [[ ${NAME} !~ "^test" ]]; then echo; fi"#).unwrap();
3019        match &result.statements[0] {
3020            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3021                Expr::Test(test) => match test.as_ref() {
3022                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotMatch),
3023                    other => panic!("expected comparison, got {:?}", other),
3024                },
3025                other => panic!("expected test expr, got {:?}", other),
3026            },
3027            other => panic!("expected if, got {:?}", other),
3028        }
3029    }
3030
3031    #[test]
3032    fn parse_string_interpolation() {
3033        let result = parse(r#"echo "Hello ${NAME}!""#).unwrap();
3034        match &result.statements[0] {
3035            Stmt::Command(cmd) => match &cmd.args[0] {
3036                Arg::Positional(Expr::Interpolated(parts)) => {
3037                    assert_eq!(parts.len(), 3);
3038                    match &parts[0] {
3039                        StringPart::Literal(s) => assert_eq!(s, "Hello "),
3040                        other => panic!("expected literal, got {:?}", other),
3041                    }
3042                    match &parts[1] {
3043                        StringPart::Var(path) => {
3044                            assert_eq!(path.segments.len(), 1);
3045                            let VarSegment::Field(name) = &path.segments[0];
3046                            assert_eq!(name, "NAME");
3047                        }
3048                        other => panic!("expected var, got {:?}", other),
3049                    }
3050                    match &parts[2] {
3051                        StringPart::Literal(s) => assert_eq!(s, "!"),
3052                        other => panic!("expected literal, got {:?}", other),
3053                    }
3054                }
3055                other => panic!("expected interpolated, got {:?}", other),
3056            },
3057            other => panic!("expected command, got {:?}", other),
3058        }
3059    }
3060
3061    #[test]
3062    fn parse_string_interpolation_multiple_vars() {
3063        let result = parse(r#"echo "${FIRST} and ${SECOND}""#).unwrap();
3064        match &result.statements[0] {
3065            Stmt::Command(cmd) => match &cmd.args[0] {
3066                Arg::Positional(Expr::Interpolated(parts)) => {
3067                    // ${FIRST} + " and " + ${SECOND} = 3 parts
3068                    assert_eq!(parts.len(), 3);
3069                    assert!(matches!(&parts[0], StringPart::Var(_)));
3070                    assert!(matches!(&parts[1], StringPart::Literal(_)));
3071                    assert!(matches!(&parts[2], StringPart::Var(_)));
3072                }
3073                other => panic!("expected interpolated, got {:?}", other),
3074            },
3075            other => panic!("expected command, got {:?}", other),
3076        }
3077    }
3078
3079    #[test]
3080    fn parse_empty_function_body() {
3081        let result = parse("empty() { }").unwrap();
3082        match &result.statements[0] {
3083            Stmt::ToolDef(t) => {
3084                assert_eq!(t.name, "empty");
3085                assert!(t.params.is_empty());
3086                assert!(t.body.is_empty());
3087            }
3088            other => panic!("expected function def, got {:?}", other),
3089        }
3090    }
3091
3092    #[test]
3093    fn parse_bash_style_function() {
3094        let result = parse("function greet { echo hello }").unwrap();
3095        match &result.statements[0] {
3096            Stmt::ToolDef(t) => {
3097                assert_eq!(t.name, "greet");
3098                assert!(t.params.is_empty());
3099                assert_eq!(t.body.len(), 1);
3100            }
3101            other => panic!("expected function def, got {:?}", other),
3102        }
3103    }
3104
3105    #[test]
3106    fn parse_comparison_string_values() {
3107        let result = parse(r#"if [[ ${STATUS} == "ok" ]]; then echo; fi"#).unwrap();
3108        match &result.statements[0] {
3109            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3110                Expr::Test(test) => match test.as_ref() {
3111                    TestExpr::Comparison { left, op, right } => {
3112                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
3113                        assert_eq!(*op, TestCmpOp::Eq);
3114                        match right.as_ref() {
3115                            Expr::Literal(Value::String(s)) => assert_eq!(s, "ok"),
3116                            other => panic!("expected string, got {:?}", other),
3117                        }
3118                    }
3119                    other => panic!("expected comparison, got {:?}", other),
3120                },
3121                other => panic!("expected test expr, got {:?}", other),
3122            },
3123            other => panic!("expected if, got {:?}", other),
3124        }
3125    }
3126
3127    // ═══════════════════════════════════════════════════════════════════════════
3128    // Command Substitution Tests
3129    // ═══════════════════════════════════════════════════════════════════════════
3130
3131    #[test]
3132    fn parse_cmd_subst_simple() {
3133        let result = parse("X=$(echo)").unwrap();
3134        match &result.statements[0] {
3135            Stmt::Assignment(a) => {
3136                assert_eq!(a.name, "X");
3137                assert_eq!(subst_cmd(&a.value).name, "echo");
3138            }
3139            other => panic!("expected assignment, got {:?}", other),
3140        }
3141    }
3142
3143    #[test]
3144    fn parse_cmd_subst_with_args() {
3145        let result = parse(r#"X=$(fetch url="http://example.com")"#).unwrap();
3146        match &result.statements[0] {
3147            Stmt::Assignment(a) => {
3148                let cmd = subst_cmd(&a.value);
3149                assert_eq!(cmd.name, "fetch");
3150                assert_eq!(cmd.args.len(), 1);
3151                match &cmd.args[0] {
3152                    Arg::WordAssign { key, .. } => assert_eq!(key, "url"),
3153                    other => panic!("expected WordAssign arg, got {:?}", other),
3154                }
3155            }
3156            other => panic!("expected assignment, got {:?}", other),
3157        }
3158    }
3159
3160    #[test]
3161    fn parse_cmd_subst_pipeline() {
3162        let result = parse("X=$(cat file | grep pattern)").unwrap();
3163        match &result.statements[0] {
3164            Stmt::Assignment(a) => {
3165                let pipeline = subst_pipeline(&a.value);
3166                assert_eq!(pipeline.commands.len(), 2);
3167                assert_eq!(pipeline.commands[0].name, "cat");
3168                assert_eq!(pipeline.commands[1].name, "grep");
3169            }
3170            other => panic!("expected assignment, got {:?}", other),
3171        }
3172    }
3173
3174    #[test]
3175    fn parse_cmd_subst_in_condition() {
3176        // Shell-compatible: conditions are commands, not command substitutions
3177        let result = parse("if kaish-validate; then echo; fi").unwrap();
3178        match &result.statements[0] {
3179            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3180                Expr::Command(cmd) => {
3181                    assert_eq!(cmd.name, "kaish-validate");
3182                }
3183                other => panic!("expected command, got {:?}", other),
3184            },
3185            other => panic!("expected if, got {:?}", other),
3186        }
3187    }
3188
3189    // ═══════════════════════════════════════════════════════════════════════════
3190    // Inline env-prefix (`NAME=value command`) Tests
3191    // ═══════════════════════════════════════════════════════════════════════════
3192
3193    #[test]
3194    fn parse_env_prefix_single() {
3195        let result = parse("FOO=bar echo hi").unwrap();
3196        match &result.statements[0] {
3197            Stmt::EnvScoped { assignments, body } => {
3198                assert_eq!(assignments.len(), 1);
3199                assert_eq!(assignments[0].name, "FOO");
3200                assert!(!assignments[0].local);
3201                match body.as_ref() {
3202                    Stmt::Command(cmd) => assert_eq!(cmd.name, "echo"),
3203                    other => panic!("expected command body, got {other:?}"),
3204                }
3205            }
3206            other => panic!("expected env-scoped, got {other:?}"),
3207        }
3208    }
3209
3210    #[test]
3211    fn parse_env_prefix_multiple() {
3212        let result = parse("A=1 B=2 run").unwrap();
3213        match &result.statements[0] {
3214            Stmt::EnvScoped { assignments, body } => {
3215                assert_eq!(assignments.len(), 2);
3216                assert_eq!(assignments[0].name, "A");
3217                assert_eq!(assignments[1].name, "B");
3218                assert!(matches!(body.as_ref(), Stmt::Command(c) if c.name == "run"));
3219            }
3220            other => panic!("expected env-scoped, got {other:?}"),
3221        }
3222    }
3223
3224    #[test]
3225    fn parse_bare_assignment_is_not_env_scoped() {
3226        // No command follows — stays a plain (persistent) assignment.
3227        let result = parse("FOO=bar").unwrap();
3228        assert!(
3229            matches!(&result.statements[0], Stmt::Assignment(a) if a.name == "FOO"),
3230            "got {:?}",
3231            result.statements[0]
3232        );
3233    }
3234
3235    #[test]
3236    fn parse_assignment_then_and_chain_does_not_over_capture() {
3237        // `FOO=bar && echo` is a (persistent) assignment chained with `&&`, NOT
3238        // an env-prefixed command — the `&&` is not a command for the prefix.
3239        let result = parse("FOO=bar && echo hi").unwrap();
3240        match &result.statements[0] {
3241            Stmt::AndChain { left, right } => {
3242                assert!(matches!(left.as_ref(), Stmt::Assignment(a) if a.name == "FOO"));
3243                assert!(matches!(right.as_ref(), Stmt::Command(c) if c.name == "echo"));
3244            }
3245            other => panic!("expected and-chain, got {other:?}"),
3246        }
3247    }
3248
3249    #[test]
3250    fn parse_env_prefix_pipeline_body() {
3251        let result = parse("FOO=bar cat | grep x").unwrap();
3252        match &result.statements[0] {
3253            Stmt::EnvScoped { assignments, body } => {
3254                assert_eq!(assignments[0].name, "FOO");
3255                match body.as_ref() {
3256                    Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 2),
3257                    other => panic!("expected pipeline body, got {other:?}"),
3258                }
3259            }
3260            other => panic!("expected env-scoped, got {other:?}"),
3261        }
3262    }
3263
3264    // ═══════════════════════════════════════════════════════════════════════════
3265    // Argv-splat rejection (adjacent unquoted words — docs/issues.md #2)
3266    // ═══════════════════════════════════════════════════════════════════════════
3267
3268    fn parse_err_message(source: &str) -> String {
3269        parse(source)
3270            .expect_err("expected a parse error")
3271            .iter()
3272            .map(|e| e.message.clone())
3273            .collect::<Vec<_>>()
3274            .join(" ")
3275    }
3276
3277    #[test]
3278    fn argv_splat_cmdsubst_glued_to_path_is_rejected() {
3279        // `/tmp/$(echo x).txt` lexes as 3 adjacent tokens; unquoted it would
3280        // silently splat into 3 args. Reject with a quote-it hint.
3281        let msg = parse_err_message("echo /tmp/$(echo x).txt");
3282        assert!(msg.contains("quote"), "expected quote hint, got: {msg}");
3283    }
3284
3285    #[test]
3286    fn argv_splat_var_glued_to_path_is_rejected() {
3287        assert!(parse("echo $dir/out.txt").is_err());
3288    }
3289
3290    #[test]
3291    fn argv_splat_three_way_glue_is_rejected() {
3292        assert!(parse("echo foo$(echo bar)baz").is_err());
3293    }
3294
3295    #[test]
3296    fn argv_splat_quoted_word_is_accepted() {
3297        // The supported idiom: quote the whole interpolated word.
3298        assert!(parse(r#"echo "/tmp/$(echo x).txt""#).is_ok());
3299        assert!(parse(r#"echo "$dir/out.txt""#).is_ok());
3300    }
3301
3302    #[test]
3303    fn argv_single_token_words_are_not_splat() {
3304        // These lex as a single token each — no adjacency, must still parse.
3305        assert!(parse("echo file.txt").is_ok(), "file.txt");
3306        assert!(parse("echo a.b.c").is_ok(), "a.b.c");
3307        assert!(parse("echo v1.2.3").is_ok(), "v1.2.3");
3308    }
3309
3310    #[test]
3311    fn argv_spaced_words_are_not_splat() {
3312        assert!(parse("echo a b c").is_ok());
3313        assert!(parse("echo /tmp/x $(echo y)").is_ok());
3314    }
3315
3316    #[test]
3317    fn parse_cmd_subst_in_command_arg() {
3318        let result = parse("echo $(whoami)").unwrap();
3319        match &result.statements[0] {
3320            Stmt::Command(cmd) => {
3321                assert_eq!(cmd.name, "echo");
3322                match &cmd.args[0] {
3323                    Arg::Positional(expr) => {
3324                        assert_eq!(subst_cmd(expr).name, "whoami");
3325                    }
3326                    other => panic!("expected command subst, got {:?}", other),
3327                }
3328            }
3329            other => panic!("expected command, got {:?}", other),
3330        }
3331    }
3332
3333    // ═══════════════════════════════════════════════════════════════════════════
3334    // Logical Operator Tests (&&, ||)
3335    // ═══════════════════════════════════════════════════════════════════════════
3336
3337    #[test]
3338    fn parse_condition_and() {
3339        // Shell-compatible: commands chained with &&
3340        let result = parse("if check-a && check-b; then echo; fi").unwrap();
3341        match &result.statements[0] {
3342            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3343                Expr::BinaryOp { left, op, right } => {
3344                    assert_eq!(*op, BinaryOp::And);
3345                    assert!(matches!(left.as_ref(), Expr::Command(_)));
3346                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3347                }
3348                other => panic!("expected binary op, got {:?}", other),
3349            },
3350            other => panic!("expected if, got {:?}", other),
3351        }
3352    }
3353
3354    #[test]
3355    fn parse_condition_or() {
3356        let result = parse("if try-a || try-b; then echo; fi").unwrap();
3357        match &result.statements[0] {
3358            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3359                Expr::BinaryOp { left, op, right } => {
3360                    assert_eq!(*op, BinaryOp::Or);
3361                    assert!(matches!(left.as_ref(), Expr::Command(_)));
3362                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3363                }
3364                other => panic!("expected binary op, got {:?}", other),
3365            },
3366            other => panic!("expected if, got {:?}", other),
3367        }
3368    }
3369
3370    #[test]
3371    fn parse_condition_and_or_precedence() {
3372        // a && b || c should parse as (a && b) || c
3373        let result = parse("if cmd-a && cmd-b || cmd-c; then echo; fi").unwrap();
3374        match &result.statements[0] {
3375            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3376                Expr::BinaryOp { left, op, right } => {
3377                    // Top level should be ||
3378                    assert_eq!(*op, BinaryOp::Or);
3379                    // Left side should be && expression
3380                    match left.as_ref() {
3381                        Expr::BinaryOp { op: inner_op, .. } => {
3382                            assert_eq!(*inner_op, BinaryOp::And);
3383                        }
3384                        other => panic!("expected binary op (&&), got {:?}", other),
3385                    }
3386                    // Right side should be command
3387                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3388                }
3389                other => panic!("expected binary op, got {:?}", other),
3390            },
3391            other => panic!("expected if, got {:?}", other),
3392        }
3393    }
3394
3395    #[test]
3396    fn parse_condition_multiple_and() {
3397        let result = parse("if cmd-a && cmd-b && cmd-c; then echo; fi").unwrap();
3398        match &result.statements[0] {
3399            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3400                Expr::BinaryOp { left, op, .. } => {
3401                    assert_eq!(*op, BinaryOp::And);
3402                    // Left side should also be &&
3403                    match left.as_ref() {
3404                        Expr::BinaryOp { op: inner_op, .. } => {
3405                            assert_eq!(*inner_op, BinaryOp::And);
3406                        }
3407                        other => panic!("expected binary op, got {:?}", other),
3408                    }
3409                }
3410                other => panic!("expected binary op, got {:?}", other),
3411            },
3412            other => panic!("expected if, got {:?}", other),
3413        }
3414    }
3415
3416    #[test]
3417    fn parse_condition_mixed_comparison_and_logical() {
3418        // Shell-compatible: use [[ ]] for comparisons, && to chain them
3419        let result = parse("if [[ ${X} == 5 ]] && [[ ${Y} -gt 0 ]]; then echo; fi").unwrap();
3420        match &result.statements[0] {
3421            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3422                Expr::BinaryOp { left, op, right } => {
3423                    assert_eq!(*op, BinaryOp::And);
3424                    // Left: [[ ${X} == 5 ]]
3425                    match left.as_ref() {
3426                        Expr::Test(test) => match test.as_ref() {
3427                            TestExpr::Comparison { op: left_op, .. } => {
3428                                assert_eq!(*left_op, TestCmpOp::Eq);
3429                            }
3430                            other => panic!("expected comparison, got {:?}", other),
3431                        },
3432                        other => panic!("expected test, got {:?}", other),
3433                    }
3434                    // Right: [[ ${Y} -gt 0 ]]
3435                    match right.as_ref() {
3436                        Expr::Test(test) => match test.as_ref() {
3437                            TestExpr::Comparison { op: right_op, .. } => {
3438                                assert_eq!(*right_op, TestCmpOp::NumGt);
3439                            }
3440                            other => panic!("expected comparison, got {:?}", other),
3441                        },
3442                        other => panic!("expected test, got {:?}", other),
3443                    }
3444                }
3445                other => panic!("expected binary op, got {:?}", other),
3446            },
3447            other => panic!("expected if, got {:?}", other),
3448        }
3449    }
3450
3451    // ═══════════════════════════════════════════════════════════════════════════
3452    // Integration Tests - Complete Scripts
3453    // ═══════════════════════════════════════════════════════════════════════════
3454
3455    /// Level 1: Linear script using core features
3456    #[test]
3457    fn script_level1_linear() {
3458        let script = r#"
3459NAME="kaish"
3460VERSION=1
3461TIMEOUT=30
3462ITEMS="alpha beta gamma"
3463
3464echo "Starting ${NAME} v${VERSION}"
3465cat "README.md" | grep pattern="install" | head count=5
3466fetch url="https://api.example.com/status" timeout=${TIMEOUT} > "/tmp/status.json"
3467echo "Items: ${ITEMS}"
3468"#;
3469        let result = parse(script).unwrap();
3470        let stmts: Vec<_> = result.statements.iter()
3471            .filter(|s| !matches!(s, Stmt::Empty))
3472            .collect();
3473
3474        assert_eq!(stmts.len(), 8);
3475        assert!(matches!(stmts[0], Stmt::Assignment(_)));  // set NAME
3476        assert!(matches!(stmts[1], Stmt::Assignment(_)));  // set VERSION
3477        assert!(matches!(stmts[2], Stmt::Assignment(_)));  // set TIMEOUT
3478        assert!(matches!(stmts[3], Stmt::Assignment(_)));  // set ITEMS
3479        assert!(matches!(stmts[4], Stmt::Command(_)));     // echo "Starting..."
3480        assert!(matches!(stmts[5], Stmt::Pipeline(_)));    // cat | grep | head
3481        assert!(matches!(stmts[6], Stmt::Pipeline(_)));    // fetch (with redirect - Pipeline since it has redirects)
3482        assert!(matches!(stmts[7], Stmt::Command(_)));     // echo "Items: ${ITEMS}"
3483    }
3484
3485    /// Level 2: Script with conditionals (shell-compatible syntax)
3486    #[test]
3487    fn script_level2_branching() {
3488        let script = r#"
3489RESULT=$(kaish-validate "input.json")
3490
3491if [[ ${RESULT.ok} == true ]]; then
3492    echo "Validation passed"
3493    process "input.json" > "output.json"
3494else
3495    echo "Validation failed: ${RESULT.err}"
3496fi
3497
3498if [[ ${COUNT} -gt 0 ]] && [[ ${COUNT} -le 100 ]]; then
3499    echo "Count in valid range"
3500fi
3501
3502if check-network || check-cache; then
3503    fetch url=${URL}
3504fi
3505"#;
3506        let result = parse(script).unwrap();
3507        let stmts: Vec<_> = result.statements.iter()
3508            .filter(|s| !matches!(s, Stmt::Empty))
3509            .collect();
3510
3511        assert_eq!(stmts.len(), 4);
3512
3513        // First: assignment with command substitution
3514        match stmts[0] {
3515            Stmt::Assignment(a) => {
3516                assert_eq!(a.name, "RESULT");
3517                assert!(matches!(&a.value, Expr::CommandSubst(_)));
3518            }
3519            other => panic!("expected assignment, got {:?}", other),
3520        }
3521
3522        // Second: if/else
3523        match stmts[1] {
3524            Stmt::If(if_stmt) => {
3525                assert_eq!(if_stmt.then_branch.len(), 2);
3526                assert!(if_stmt.else_branch.is_some());
3527                assert_eq!(if_stmt.else_branch.as_ref().unwrap().len(), 1);
3528            }
3529            other => panic!("expected if, got {:?}", other),
3530        }
3531
3532        // Third: if with && condition
3533        match stmts[2] {
3534            Stmt::If(if_stmt) => {
3535                match if_stmt.condition.as_ref() {
3536                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3537                    other => panic!("expected && condition, got {:?}", other),
3538                }
3539            }
3540            other => panic!("expected if, got {:?}", other),
3541        }
3542
3543        // Fourth: if with || of commands
3544        match stmts[3] {
3545            Stmt::If(if_stmt) => {
3546                match if_stmt.condition.as_ref() {
3547                    Expr::BinaryOp { op, left, right } => {
3548                        assert_eq!(*op, BinaryOp::Or);
3549                        assert!(matches!(left.as_ref(), Expr::Command(_)));
3550                        assert!(matches!(right.as_ref(), Expr::Command(_)));
3551                    }
3552                    other => panic!("expected || condition, got {:?}", other),
3553                }
3554            }
3555            other => panic!("expected if, got {:?}", other),
3556        }
3557    }
3558
3559    /// Level 3: Script with loops and function definitions
3560    #[test]
3561    fn script_level3_loops_and_functions() {
3562        let script = r#"
3563greet() {
3564    echo "Hello, $1!"
3565}
3566
3567fetch_all() {
3568    for URL in $@; do
3569        fetch url=${URL}
3570    done
3571}
3572
3573USERS="alice bob charlie"
3574
3575for USER in ${USERS}; do
3576    greet ${USER}
3577    if [[ ${USER} == "bob" ]]; then
3578        echo "Found Bob!"
3579    fi
3580done
3581
3582long-running-task &
3583"#;
3584        let result = parse(script).unwrap();
3585        let stmts: Vec<_> = result.statements.iter()
3586            .filter(|s| !matches!(s, Stmt::Empty))
3587            .collect();
3588
3589        assert_eq!(stmts.len(), 5);
3590
3591        // First function def
3592        match stmts[0] {
3593            Stmt::ToolDef(t) => {
3594                assert_eq!(t.name, "greet");
3595                assert!(t.params.is_empty());
3596            }
3597            other => panic!("expected function def, got {:?}", other),
3598        }
3599
3600        // Second function def with nested for loop
3601        match stmts[1] {
3602            Stmt::ToolDef(t) => {
3603                assert_eq!(t.name, "fetch_all");
3604                assert_eq!(t.body.len(), 1);
3605                assert!(matches!(&t.body[0], Stmt::For(_)));
3606            }
3607            other => panic!("expected function def, got {:?}", other),
3608        }
3609
3610        // Assignment
3611        assert!(matches!(stmts[2], Stmt::Assignment(_)));
3612
3613        // For loop with nested if
3614        match stmts[3] {
3615            Stmt::For(f) => {
3616                assert_eq!(f.variable, "USER");
3617                assert_eq!(f.body.len(), 2);
3618                assert!(matches!(&f.body[0], Stmt::Command(_)));
3619                assert!(matches!(&f.body[1], Stmt::If(_)));
3620            }
3621            other => panic!("expected for loop, got {:?}", other),
3622        }
3623
3624        // Background job
3625        match stmts[4] {
3626            Stmt::Pipeline(p) => {
3627                assert!(p.background);
3628                assert_eq!(p.commands[0].name, "long-running-task");
3629            }
3630            other => panic!("expected pipeline (background), got {:?}", other),
3631        }
3632    }
3633
3634    /// Level 4: Complex nested control flow (shell-compatible syntax)
3635    #[test]
3636    fn script_level4_complex_nesting() {
3637        let script = r#"
3638RESULT=$(cat "config.json" | jq query=".servers" | kaish-validate schema="server-schema.json")
3639
3640if ping host=${HOST} && [[ ${RESULT} == true ]]; then
3641    for SERVER in "prod-1 prod-2"; do
3642        deploy target=${SERVER} port=8080
3643        if [[ $? -ne 0 ]]; then
3644            notify channel="ops" message="Deploy failed"
3645        fi
3646    done
3647fi
3648"#;
3649        let result = parse(script).unwrap();
3650        let stmts: Vec<_> = result.statements.iter()
3651            .filter(|s| !matches!(s, Stmt::Empty))
3652            .collect();
3653
3654        assert_eq!(stmts.len(), 2);
3655
3656        // Command substitution with pipeline
3657        match stmts[0] {
3658            Stmt::Assignment(a) => {
3659                assert_eq!(a.name, "RESULT");
3660                assert_eq!(subst_pipeline(&a.value).commands.len(), 3);
3661            }
3662            other => panic!("expected assignment, got {:?}", other),
3663        }
3664
3665        // If with && condition, containing for loop with nested if
3666        match stmts[1] {
3667            Stmt::If(if_stmt) => {
3668                match if_stmt.condition.as_ref() {
3669                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3670                    other => panic!("expected && condition, got {:?}", other),
3671                }
3672                assert_eq!(if_stmt.then_branch.len(), 1);
3673                match &if_stmt.then_branch[0] {
3674                    Stmt::For(f) => {
3675                        assert_eq!(f.body.len(), 2);
3676                        assert!(matches!(&f.body[1], Stmt::If(_)));
3677                    }
3678                    other => panic!("expected for in if body, got {:?}", other),
3679                }
3680            }
3681            other => panic!("expected if, got {:?}", other),
3682        }
3683    }
3684
3685    /// Level 5: Edge cases and parser stress test
3686    #[test]
3687    fn script_level5_edge_cases() {
3688        let script = r#"
3689echo ""
3690echo "quotes: \"nested\" here"
3691echo "escapes: \n\t\r\\"
3692echo "unicode: \u2764"
3693
3694X=-99999
3695Y=3.14159265358979
3696Z=-0.001
3697
3698cmd a=1 b="two" c=true d=false e=null
3699
3700if true; then
3701    if false; then
3702        echo "inner"
3703    else
3704        echo "else"
3705    fi
3706fi
3707
3708for I in "a b c"; do
3709    echo ${I}
3710done
3711
3712no_params() {
3713    echo "no params"
3714}
3715
3716function all_args {
3717    echo "args: $@"
3718}
3719
3720a | b | c | d | e &
3721cmd 2> "errors.log"
3722cmd &> "all.log"
3723cmd >> "append.log"
3724cmd < "input.txt"
3725"#;
3726        let result = parse(script).unwrap();
3727        let stmts: Vec<_> = result.statements.iter()
3728            .filter(|s| !matches!(s, Stmt::Empty))
3729            .collect();
3730
3731        // Verify it parses without error
3732        assert!(stmts.len() >= 10, "expected many statements, got {}", stmts.len());
3733
3734        // Background pipeline
3735        let bg_stmt = stmts.iter().find(|s| matches!(s, Stmt::Pipeline(p) if p.background));
3736        assert!(bg_stmt.is_some(), "expected background pipeline");
3737
3738        match bg_stmt.unwrap() {
3739            Stmt::Pipeline(p) => {
3740                assert_eq!(p.commands.len(), 5);
3741                assert!(p.background);
3742            }
3743            _ => unreachable!(),
3744        }
3745    }
3746
3747    // ═══════════════════════════════════════════════════════════════════════════
3748    // Edge Case Tests: Ambiguity Resolution
3749    // ═══════════════════════════════════════════════════════════════════════════
3750
3751    #[test]
3752    fn parse_keyword_as_variable_rejected() {
3753        // Keywords CANNOT be used as variable names - this is intentional
3754        // to avoid ambiguity. Use different names instead.
3755        let result = parse(r#"if="value""#);
3756        assert!(result.is_err(), "if= should fail - 'if' is a keyword");
3757
3758        let result = parse("while=true");
3759        assert!(result.is_err(), "while= should fail - 'while' is a keyword");
3760
3761        let result = parse(r#"then="next""#);
3762        assert!(result.is_err(), "then= should fail - 'then' is a keyword");
3763    }
3764
3765    #[test]
3766    fn parse_set_command_with_flag() {
3767        let result = parse("set -e");
3768        assert!(result.is_ok(), "failed to parse set -e: {:?}", result);
3769        let program = result.unwrap();
3770        match &program.statements[0] {
3771            Stmt::Command(cmd) => {
3772                assert_eq!(cmd.name, "set");
3773                assert_eq!(cmd.args.len(), 1);
3774                match &cmd.args[0] {
3775                    Arg::ShortFlag(f) => assert_eq!(f, "e"),
3776                    other => panic!("expected ShortFlag, got {:?}", other),
3777                }
3778            }
3779            other => panic!("expected Command, got {:?}", other),
3780        }
3781    }
3782
3783    #[test]
3784    fn parse_set_command_no_args() {
3785        let result = parse("set");
3786        assert!(result.is_ok(), "failed to parse set: {:?}", result);
3787        let program = result.unwrap();
3788        match &program.statements[0] {
3789            Stmt::Command(cmd) => {
3790                assert_eq!(cmd.name, "set");
3791                assert_eq!(cmd.args.len(), 0);
3792            }
3793            other => panic!("expected Command, got {:?}", other),
3794        }
3795    }
3796
3797    #[test]
3798    fn parse_set_assignment_vs_command() {
3799        // X=5 should be assignment
3800        let result = parse("X=5");
3801        assert!(result.is_ok());
3802        let program = result.unwrap();
3803        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
3804
3805        // set -e should be command
3806        let result = parse("set -e");
3807        assert!(result.is_ok());
3808        let program = result.unwrap();
3809        assert!(matches!(&program.statements[0], Stmt::Command(_)));
3810    }
3811
3812    #[test]
3813    fn parse_true_as_command() {
3814        let result = parse("true");
3815        assert!(result.is_ok());
3816        let program = result.unwrap();
3817        match &program.statements[0] {
3818            Stmt::Command(cmd) => assert_eq!(cmd.name, "true"),
3819            other => panic!("expected Command(true), got {:?}", other),
3820        }
3821    }
3822
3823    #[test]
3824    fn parse_false_as_command() {
3825        let result = parse("false");
3826        assert!(result.is_ok());
3827        let program = result.unwrap();
3828        match &program.statements[0] {
3829            Stmt::Command(cmd) => assert_eq!(cmd.name, "false"),
3830            other => panic!("expected Command(false), got {:?}", other),
3831        }
3832    }
3833
3834    #[test]
3835    fn parse_dot_as_source_alias() {
3836        let result = parse(". script.kai");
3837        assert!(result.is_ok(), "failed to parse . script.kai: {:?}", result);
3838        let program = result.unwrap();
3839        match &program.statements[0] {
3840            Stmt::Command(cmd) => {
3841                assert_eq!(cmd.name, ".");
3842                assert_eq!(cmd.args.len(), 1);
3843            }
3844            other => panic!("expected Command(.), got {:?}", other),
3845        }
3846    }
3847
3848    #[test]
3849    fn parse_source_command() {
3850        let result = parse("source utils.kai");
3851        assert!(result.is_ok(), "failed to parse source: {:?}", result);
3852        let program = result.unwrap();
3853        match &program.statements[0] {
3854            Stmt::Command(cmd) => {
3855                assert_eq!(cmd.name, "source");
3856                assert_eq!(cmd.args.len(), 1);
3857            }
3858            other => panic!("expected Command(source), got {:?}", other),
3859        }
3860    }
3861
3862    #[test]
3863    fn parse_test_expr_file_test() {
3864        // Paths must be quoted strings in test expressions
3865        let result = parse(r#"[[ -f "/path/file" ]]"#);
3866        assert!(result.is_ok(), "failed to parse file test: {:?}", result);
3867    }
3868
3869    #[test]
3870    fn parse_test_expr_comparison() {
3871        let result = parse(r#"[[ $X == "value" ]]"#);
3872        assert!(result.is_ok(), "failed to parse comparison test: {:?}", result);
3873    }
3874
3875    #[test]
3876    fn parse_test_expr_single_eq() {
3877        // = and == are equivalent inside [[ ]] (matching bash behavior)
3878        let result = parse(r#"[[ $X = "value" ]]"#);
3879        assert!(result.is_ok(), "failed to parse single-= comparison: {:?}", result);
3880        let program = result.unwrap();
3881        match &program.statements[0] {
3882            Stmt::Test(TestExpr::Comparison { op, .. }) => {
3883                assert_eq!(op, &TestCmpOp::Eq);
3884            }
3885            other => panic!("expected Test(Comparison), got {:?}", other),
3886        }
3887    }
3888
3889    #[test]
3890    fn parse_while_loop() {
3891        let result = parse("while true; do echo; done");
3892        assert!(result.is_ok(), "failed to parse while loop: {:?}", result);
3893        let program = result.unwrap();
3894        assert!(matches!(&program.statements[0], Stmt::While(_)));
3895    }
3896
3897    #[test]
3898    fn parse_break_with_level() {
3899        let result = parse("break 2");
3900        assert!(result.is_ok());
3901        let program = result.unwrap();
3902        match &program.statements[0] {
3903            Stmt::Break(Some(n)) => assert_eq!(*n, 2),
3904            other => panic!("expected Break(2), got {:?}", other),
3905        }
3906    }
3907
3908    #[test]
3909    fn parse_continue_with_level() {
3910        let result = parse("continue 3");
3911        assert!(result.is_ok());
3912        let program = result.unwrap();
3913        match &program.statements[0] {
3914            Stmt::Continue(Some(n)) => assert_eq!(*n, 3),
3915            other => panic!("expected Continue(3), got {:?}", other),
3916        }
3917    }
3918
3919    #[test]
3920    fn parse_exit_with_code() {
3921        let result = parse("exit 1");
3922        assert!(result.is_ok());
3923        let program = result.unwrap();
3924        match &program.statements[0] {
3925            Stmt::Exit(Some(expr)) => {
3926                match expr.as_ref() {
3927                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 1),
3928                    other => panic!("expected Int(1), got {:?}", other),
3929                }
3930            }
3931            other => panic!("expected Exit(1), got {:?}", other),
3932        }
3933    }
3934
3935    // ========================================================================
3936    // parse_interpolated_string_spanned — body-internal span tracking for
3937    // heredoc bodies. The byte offsets these tests pin become validator
3938    // issue spans via the HereDocBody → SpannedPart flow.
3939    // ========================================================================
3940
3941    #[test]
3942    fn spanned_literal_only_records_byte_range() {
3943        let parts = parse_interpolated_string_spanned("hello world", 100);
3944        assert_eq!(parts.len(), 1);
3945        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello world"));
3946        assert_eq!(parts[0].offset, 100, "base_offset must propagate to literals");
3947        assert_eq!(parts[0].len, 11);
3948    }
3949
3950    #[test]
3951    fn spanned_braced_var_at_zero() {
3952        let parts = parse_interpolated_string_spanned("${X}", 50);
3953        assert_eq!(parts.len(), 1);
3954        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3955        assert_eq!(parts[0].offset, 50);
3956        assert_eq!(parts[0].len, 4); // "${X}"
3957    }
3958
3959    #[test]
3960    fn spanned_simple_var_then_literal() {
3961        let parts = parse_interpolated_string_spanned("$X end", 10);
3962        assert_eq!(parts.len(), 2);
3963        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3964        assert_eq!(parts[0].offset, 10);
3965        assert_eq!(parts[0].len, 2); // "$X"
3966        assert!(matches!(&parts[1].part, StringPart::Literal(s) if s == " end"));
3967        assert_eq!(parts[1].offset, 12);
3968        assert_eq!(parts[1].len, 4);
3969    }
3970
3971    #[test]
3972    fn spanned_mixed_literal_var_literal() {
3973        let parts = parse_interpolated_string_spanned("hi ${X} bye", 0);
3974        assert_eq!(parts.len(), 3);
3975        // "hi "
3976        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hi "));
3977        assert_eq!(parts[0].offset, 0);
3978        assert_eq!(parts[0].len, 3);
3979        // ${X}
3980        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3981        assert_eq!(parts[1].offset, 3);
3982        assert_eq!(parts[1].len, 4);
3983        // " bye"
3984        assert!(matches!(&parts[2].part, StringPart::Literal(s) if s == " bye"));
3985        assert_eq!(parts[2].offset, 7);
3986        assert_eq!(parts[2].len, 4);
3987    }
3988
3989    #[test]
3990    fn spanned_positional_param() {
3991        let parts = parse_interpolated_string_spanned("$1 done", 0);
3992        assert_eq!(parts.len(), 2);
3993        assert!(matches!(&parts[0].part, StringPart::Positional(1)));
3994        assert_eq!(parts[0].offset, 0);
3995        assert_eq!(parts[0].len, 2); // "$1"
3996    }
3997
3998    #[test]
3999    fn spanned_special_dollar_dollar() {
4000        let parts = parse_interpolated_string_spanned("$$", 5);
4001        assert_eq!(parts.len(), 1);
4002        assert!(matches!(&parts[0].part, StringPart::CurrentPid));
4003        assert_eq!(parts[0].offset, 5);
4004        assert_eq!(parts[0].len, 2);
4005    }
4006
4007    #[test]
4008    fn spanned_arithmetic_marker_recognised() {
4009        // The lexer wraps arithmetic markers as ${__ARITH:expr__} for
4010        // interpolated heredocs; the spanned parser must produce
4011        // StringPart::Arithmetic for that shape.
4012        let parts = parse_interpolated_string_spanned("${__ARITH:1+2__}", 0);
4013        assert_eq!(parts.len(), 1);
4014        assert!(matches!(&parts[0].part, StringPart::Arithmetic(e) if e == "1+2"));
4015    }
4016
4017    #[test]
4018    fn spanned_default_separator_yields_var_with_default() {
4019        let parts = parse_interpolated_string_spanned("${X:-fallback}", 0);
4020        assert_eq!(parts.len(), 1);
4021        assert!(matches!(&parts[0].part, StringPart::VarWithDefault { .. }));
4022        assert_eq!(parts[0].offset, 0);
4023        assert_eq!(parts[0].len, 14); // "${X:-fallback}"
4024    }
4025
4026    #[test]
4027    fn spanned_no_dollar_runs_one_literal() {
4028        let parts = parse_interpolated_string_spanned("plain text only", 7);
4029        assert_eq!(parts.len(), 1);
4030        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "plain text only"));
4031        assert_eq!(parts[0].offset, 7);
4032        assert_eq!(parts[0].len, 15);
4033    }
4034
4035    #[test]
4036    fn spanned_matches_unspanned_part_count() {
4037        // Spanned and spanless variants must agree on the part decomposition.
4038        // Bug fixes in one should land in the other.
4039        let cases = [
4040            "hello",
4041            "$X",
4042            "${X}",
4043            "${X:-d}",
4044            "hi $A and $B",
4045            "$0 $1 $2",
4046            "$$ $? $#",
4047        ];
4048        for s in &cases {
4049            let unspanned = parse_interpolated_string(s);
4050            let spanned = parse_interpolated_string_spanned(s, 0);
4051            assert_eq!(
4052                unspanned.len(),
4053                spanned.len(),
4054                "part count differs for {:?}",
4055                s
4056            );
4057        }
4058    }
4059
4060    #[test]
4061    fn spanned_multibyte_utf8_before_var_uses_byte_offsets() {
4062        // 🚀 is 4 bytes in UTF-8 and a space is 1 byte, so the literal
4063        // prefix is 5 bytes total. `${X}` then sits at byte offset 5.
4064        // Right-by-luck for char-vs-byte indexing is precisely what this
4065        // test catches: if someone swaps .len_utf8() for 1, offset becomes 2.
4066        let parts = parse_interpolated_string_spanned("🚀 ${X}", 0);
4067        assert_eq!(parts.len(), 2);
4068
4069        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "🚀 "));
4070        assert_eq!(parts[0].offset, 0);
4071        assert_eq!(parts[0].len, 5, "literal len must be bytes, not chars");
4072
4073        assert!(matches!(&parts[1].part, StringPart::Var(_)));
4074        assert_eq!(parts[1].offset, 5, "var offset must be bytes, not chars");
4075        assert_eq!(parts[1].len, 4);
4076    }
4077
4078    #[test]
4079    fn spanned_multibyte_utf8_pure_literal_is_byte_length() {
4080        // "hello 世界 world": 5 + 1 + 6 (3 per CJK char) + 1 + 5 = 18 bytes,
4081        // 13 chars. The `len` field must report 18, not 13.
4082        let parts = parse_interpolated_string_spanned("hello 世界 world", 0);
4083        assert_eq!(parts.len(), 1);
4084        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello 世界 world"));
4085        assert_eq!(parts[0].offset, 0);
4086        assert_eq!(parts[0].len, 18);
4087    }
4088
4089    #[test]
4090    fn spanned_escape_dollar_consumes_two_bytes_emits_one_char() {
4091        // `\$` is 2 source bytes and resolves to a single literal `$`.
4092        // The literal part's `len` should reflect the SOURCE length (2).
4093        let parts = parse_interpolated_string_spanned("\\$", 0);
4094        assert_eq!(parts.len(), 1);
4095        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "$"));
4096        assert_eq!(parts[0].offset, 0);
4097        assert_eq!(parts[0].len, 2, "len is source byte length, not rendered length");
4098    }
4099
4100    #[test]
4101    fn spanned_escape_backslash_collapses_pair_to_one() {
4102        let parts = parse_interpolated_string_spanned("\\\\", 0);
4103        assert_eq!(parts.len(), 1);
4104        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "\\"));
4105        assert_eq!(parts[0].len, 2);
4106    }
4107}
kaish_kernel/parser.rs

kaish_kernel/
parser.rs