kaish_kernel/
parser.rs

1//! Parser for kaish source code.
2//!
3//! Transforms a token stream from the lexer into an Abstract Syntax Tree.
4//! Uses chumsky for parser combinators with good error recovery.
5
6use crate::ast::{
7    Arg, Assignment, BinaryOp, CaseBranch, CaseStmt, Command, Expr, FileTestOp, ForLoop, IfStmt,
8    Pipeline, Program, Redirect, RedirectKind, SpannedPart, Stmt, StringPart, StringTestOp,
9    TestCmpOp, TestExpr, ToolDef, Value, VarPath, VarSegment, WhileLoop,
10};
11use crate::lexer::{self, HereDocData, Token};
12use chumsky::{input::ValueInput, prelude::*};
13
14/// Span type used throughout the parser.
15pub type Span = SimpleSpan;
16
17/// Parse a raw `${...}` string into an Expr.
18///
19/// Handles:
20/// - Special variables: `${?}` → LastExitCode, `${$}` → CurrentPid
21/// - Simple paths: `${VAR}`, `${VAR.field}`, `${VAR[0]}` → VarRef
22/// - Default values: `${VAR:-default}` → VarWithDefault (with nested expansion support)
23fn parse_var_expr(raw: &str) -> Expr {
24    // Special case: ${?} is the last exit code (same as $?)
25    if raw == "${?}" {
26        return Expr::LastExitCode;
27    }
28
29    // Special case: ${$} is the current PID (same as $$)
30    if raw == "${$}" {
31        return Expr::CurrentPid;
32    }
33
34    // Check for default value syntax: ${VAR:-default}
35    // Need to find :- that's not inside a nested ${...}
36    if let Some(colon_idx) = find_default_separator(raw) {
37        // Extract variable name (between ${ and :-)
38        let name = raw[2..colon_idx].to_string();
39        // Extract default value (between :- and }) and recursively parse it,
40        // after stripping shell quoting from the word (quotes are syntax).
41        let default_str = &raw[colon_idx + 2..raw.len() - 1];
42        let default = parse_interpolated_string(&unquote_default_word(default_str));
43        return Expr::VarWithDefault { name, default };
44    }
45
46    // Regular variable path
47    Expr::VarRef(parse_varpath(raw))
48}
49
50/// Remove shell quoting from a `${VAR:-WORD}` default word, bash-style, before
51/// the word is parsed for interpolation.
52///
53/// The quotes around a default word are syntax, not data: `${X:-"default"}`
54/// yields `default`, not `"default"`. Double quotes are stripped but `$`-style
55/// interpolation inside them stays active; single quotes are stripped and
56/// suppress interpolation (their `$` becomes a literal, via the lexer's
57/// `__KAISH_ESCAPED_DOLLAR__` marker that `parse_interpolated_string` turns
58/// back into a bare `$`). Unquoted text passes through unchanged.
59fn unquote_default_word(word: &str) -> String {
60    let mut out = String::with_capacity(word.len());
61    let mut in_single = false;
62    let mut in_double = false;
63    for ch in word.chars() {
64        match ch {
65            // A quote delimiter toggles its mode and is itself dropped; the
66            // other quote kind is literal data while inside one.
67            '\'' if !in_double => in_single = !in_single,
68            '"' if !in_single => in_double = !in_double,
69            // `$` inside single quotes must not interpolate downstream.
70            '$' if in_single => out.push_str("__KAISH_ESCAPED_DOLLAR__"),
71            _ => out.push(ch),
72        }
73    }
74    out
75}
76
77/// Find the position of :- in a ${VAR:-default} expression, accounting for nested ${...}.
78fn find_default_separator(raw: &str) -> Option<usize> {
79    let bytes = raw.as_bytes();
80    let mut depth = 0;
81    let mut i = 0;
82
83    while i < bytes.len() {
84        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
85            depth += 1;
86            i += 2;
87            continue;
88        }
89        if bytes[i] == b'}' && depth > 0 {
90            depth -= 1;
91            i += 1;
92            continue;
93        }
94        // Only find :- at the top level (depth == 1 means we're inside the outer ${...})
95        if depth == 1 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
96            return Some(i);
97        }
98        i += 1;
99    }
100    None
101}
102
103/// Find the position of :- in variable content (without outer braces), accounting for nested ${...}.
104fn find_default_separator_in_content(content: &str) -> Option<usize> {
105    let bytes = content.as_bytes();
106    let mut depth = 0;
107    let mut i = 0;
108
109    while i < bytes.len() {
110        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
111            depth += 1;
112            i += 2;
113            continue;
114        }
115        if bytes[i] == b'}' && depth > 0 {
116            depth -= 1;
117            i += 1;
118            continue;
119        }
120        // Find :- at the top level (depth == 0)
121        if depth == 0 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
122            return Some(i);
123        }
124        i += 1;
125    }
126    None
127}
128
129/// Parse a raw `${...}` string into a VarPath.
130///
131/// Handles paths like `${VAR}` and `${VAR.field}`. Array indexing is not supported.
132fn parse_varpath(raw: &str) -> VarPath {
133    let segments_strs = lexer::parse_var_ref(raw).unwrap_or_default();
134    let segments = segments_strs
135        .into_iter()
136        .filter(|s| !s.starts_with('['))  // Skip index segments
137        .map(VarSegment::Field)
138        .collect();
139    VarPath { segments }
140}
141
142/// Parse an interpolated string like "Hello ${NAME}!" or "Hello $NAME!" into parts.
143/// Extract a pipeline from a statement if possible.
144fn stmt_to_pipeline(stmt: Stmt) -> Option<Pipeline> {
145    match stmt {
146        Stmt::Pipeline(p) => Some(p),
147        Stmt::Command(cmd) => Some(Pipeline {
148            commands: vec![cmd],
149            background: false,
150        }),
151        _ => None,
152    }
153}
154
155/// Parse an unquoted heredoc body's interpolation while tracking each part's
156/// byte offset in the source.
157///
158/// `base_offset` is added to every part's offset so callers can attribute
159/// positions to a larger source (e.g., heredoc body inside the original
160/// script). Returns parts in source order with offset+len populated.
161///
162/// **Heredoc-specific behaviour**: per POSIX, unquoted heredoc bodies process
163/// three backslash escapes — `\$` (suppress expansion), `\\` (literal
164/// backslash), and `\<newline>` (line continuation). All other backslashes
165/// are kept verbatim. This differs from [`parse_interpolated_string`], which
166/// is called on double-quoted string content where the lexer has already
167/// processed escapes via `__KAISH_ESCAPED_DOLLAR__`.
168///
169/// This sibling of [`parse_interpolated_string`] duplicates parsing logic
170/// for now; unifying them behind a position-tracking core is a follow-up
171/// cleanup. Behaviour MUST stay aligned for the non-escape paths — bug fixes
172/// for the shared interpolation logic here should land there as well.
173fn parse_interpolated_string_spanned(s: &str, base_offset: usize) -> Vec<SpannedPart> {
174    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
175
176    let chars_vec: Vec<char> = s.chars().collect();
177    let mut i = 0;
178    let mut pos: usize = 0;
179
180    let mut parts: Vec<SpannedPart> = Vec::new();
181    let mut current_text = String::new();
182    let mut current_text_start: usize = pos;
183
184    let push_literal =
185        |current_text: &mut String, start: &mut usize, end: usize, parts: &mut Vec<SpannedPart>| {
186            if !current_text.is_empty() {
187                parts.push(SpannedPart {
188                    part: StringPart::Literal(std::mem::take(current_text)),
189                    offset: base_offset + *start,
190                    len: end - *start,
191                });
192                *start = end;
193            }
194        };
195
196    while i < chars_vec.len() {
197        let ch = chars_vec[i];
198
199        if ch == '\x00' {
200            // Escaped-dollar marker: \x00 DOLLAR \x00 → literal '$'
201            let start = pos;
202            i += 1;
203            pos += 1;
204            let mut marker = String::new();
205            while let Some(&c) = chars_vec.get(i) {
206                if c == '\x00' {
207                    i += 1;
208                    pos += 1;
209                    break;
210                }
211                marker.push(c);
212                i += 1;
213                pos += c.len_utf8();
214            }
215            if marker == "DOLLAR" {
216                if current_text.is_empty() {
217                    current_text_start = start;
218                }
219                current_text.push('$');
220            }
221        } else if ch == '\\' {
222            // POSIX heredoc-body escape processing for unquoted heredocs.
223            // Only `\$`, `\\`, and `\<newline>` are escapes; everything else
224            // keeps the backslash verbatim. Each case advances `pos` by the
225            // bytes consumed from the source so subsequent part offsets stay
226            // anchored to original-source coordinates.
227            let next = chars_vec.get(i + 1).copied();
228            match next {
229                Some('$') => {
230                    if current_text.is_empty() {
231                        current_text_start = pos;
232                    }
233                    current_text.push('$');
234                    i += 2;
235                    pos += 2;
236                }
237                Some('\\') => {
238                    if current_text.is_empty() {
239                        current_text_start = pos;
240                    }
241                    current_text.push('\\');
242                    i += 2;
243                    pos += 2;
244                }
245                Some('\n') => {
246                    // Line continuation: consume both bytes, emit nothing.
247                    // The literal run resumes on the next line.
248                    i += 2;
249                    pos += 2;
250                    if current_text.is_empty() {
251                        current_text_start = pos;
252                    }
253                }
254                Some('\r') => {
255                    // \<CR> or \<CR><LF>: line continuation
256                    i += 2;
257                    pos += 2;
258                    if chars_vec.get(i) == Some(&'\n') {
259                        i += 1;
260                        pos += 1;
261                    }
262                    if current_text.is_empty() {
263                        current_text_start = pos;
264                    }
265                }
266                _ => {
267                    // Other backslash sequences: keep `\` literally,
268                    // consume only the backslash. The next iteration will
269                    // process the following char on its own merits.
270                    if current_text.is_empty() {
271                        current_text_start = pos;
272                    }
273                    current_text.push('\\');
274                    i += 1;
275                    pos += 1;
276                }
277            }
278        } else if ch == '$' {
279            // Possible expansion. Save current run before peeking ahead.
280            let part_start = pos;
281            let next = chars_vec.get(i + 1).copied();
282
283            if next == Some('(') && chars_vec.get(i + 2) != Some(&'(') {
284                // $(...) command substitution
285                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
286                i += 2; // consume "$("
287                pos += 2;
288                let mut cmd_content = String::new();
289                let mut depth = 1;
290                while let Some(&c) = chars_vec.get(i) {
291                    i += 1;
292                    pos += c.len_utf8();
293                    if c == '(' {
294                        depth += 1;
295                        cmd_content.push(c);
296                    } else if c == ')' {
297                        depth -= 1;
298                        if depth == 0 {
299                            break;
300                        }
301                        cmd_content.push(c);
302                    } else {
303                        cmd_content.push(c);
304                    }
305                }
306                let inserted = if let Ok(program) = parse(&cmd_content) {
307                    if let Some(stmt) = program.statements.first() {
308                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
309                            parts.push(SpannedPart {
310                                part: StringPart::CommandSubst(pipeline),
311                                offset: base_offset + part_start,
312                                len: pos - part_start,
313                            });
314                            true
315                        } else {
316                            false
317                        }
318                    } else {
319                        false
320                    }
321                } else {
322                    false
323                };
324                if inserted {
325                    // Successfully pushed a CommandSubst; the next literal
326                    // run will start after the closing ')'.
327                    current_text_start = pos;
328                } else {
329                    // Fall back to literal text. The literal run starts at
330                    // the leading '$' (set above only if current_text was
331                    // empty); leave current_text_start alone otherwise so we
332                    // don't lose an in-progress run.
333                    if current_text.is_empty() {
334                        current_text_start = part_start;
335                    }
336                    current_text.push_str("$(");
337                    current_text.push_str(&cmd_content);
338                    current_text.push(')');
339                }
340            } else if next == Some('{') {
341                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
342                i += 2; // consume "${"
343                pos += 2;
344                let mut var_content = String::new();
345                let mut depth = 1;
346                while let Some(&c) = chars_vec.get(i) {
347                    i += 1;
348                    pos += c.len_utf8();
349                    if c == '{' && var_content.ends_with('$') {
350                        depth += 1;
351                        var_content.push(c);
352                    } else if c == '}' {
353                        depth -= 1;
354                        if depth == 0 {
355                            break;
356                        }
357                        var_content.push(c);
358                    } else {
359                        var_content.push(c);
360                    }
361                }
362                let part = if let Some(name) = var_content.strip_prefix('#') {
363                    StringPart::VarLength(name.to_string())
364                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
365                    let expr = var_content
366                        .strip_prefix("__ARITH:")
367                        .and_then(|s| s.strip_suffix("__"))
368                        .unwrap_or("");
369                    StringPart::Arithmetic(expr.to_string())
370                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
371                    let name = var_content[..colon_idx].to_string();
372                    let default_str = &var_content[colon_idx + 2..];
373                    // Default value spans recursively kept relative to the
374                    // outer body — the inner parts get their own offsets via
375                    // the recursive call when needed. For now, the default's
376                    // parts are stored without spans (default is a Vec<StringPart>).
377                    let default = parse_interpolated_string(&unquote_default_word(default_str));
378                    StringPart::VarWithDefault { name, default }
379                } else {
380                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
381                };
382                parts.push(SpannedPart {
383                    part,
384                    offset: base_offset + part_start,
385                    len: pos - part_start,
386                });
387                current_text_start = pos;
388            } else if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
389                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
390                i += 1; // consume '$'
391                pos += 1;
392                if let Some(&digit) = chars_vec.get(i) {
393                    let n = digit.to_digit(10).unwrap_or(0) as usize;
394                    i += 1;
395                    pos += digit.len_utf8();
396                    parts.push(SpannedPart {
397                        part: StringPart::Positional(n),
398                        offset: base_offset + part_start,
399                        len: pos - part_start,
400                    });
401                }
402                current_text_start = pos;
403            } else if next == Some('@') {
404                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
405                i += 2; // consume "$@"
406                pos += 2;
407                parts.push(SpannedPart {
408                    part: StringPart::AllArgs,
409                    offset: base_offset + part_start,
410                    len: pos - part_start,
411                });
412                current_text_start = pos;
413            } else if next == Some('#') {
414                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
415                i += 2; // consume "$#"
416                pos += 2;
417                parts.push(SpannedPart {
418                    part: StringPart::ArgCount,
419                    offset: base_offset + part_start,
420                    len: pos - part_start,
421                });
422                current_text_start = pos;
423            } else if next == Some('?') {
424                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
425                i += 2; // consume "$?"
426                pos += 2;
427                parts.push(SpannedPart {
428                    part: StringPart::LastExitCode,
429                    offset: base_offset + part_start,
430                    len: pos - part_start,
431                });
432                current_text_start = pos;
433            } else if next == Some('$') {
434                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
435                i += 2; // consume "$$"
436                pos += 2;
437                parts.push(SpannedPart {
438                    part: StringPart::CurrentPid,
439                    offset: base_offset + part_start,
440                    len: pos - part_start,
441                });
442                current_text_start = pos;
443            } else if next.map(|c| c.is_ascii_alphabetic() || c == '_').unwrap_or(false) {
444                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
445                i += 1; // consume '$'
446                pos += 1;
447                let mut var_name = String::new();
448                while let Some(&c) = chars_vec.get(i) {
449                    if c.is_ascii_alphanumeric() || c == '_' {
450                        var_name.push(c);
451                        i += 1;
452                        pos += c.len_utf8();
453                    } else {
454                        break;
455                    }
456                }
457                parts.push(SpannedPart {
458                    part: StringPart::Var(VarPath::simple(var_name)),
459                    offset: base_offset + part_start,
460                    len: pos - part_start,
461                });
462                current_text_start = pos;
463            } else {
464                // Bare $ — treat as literal
465                if current_text.is_empty() {
466                    current_text_start = pos;
467                }
468                current_text.push(ch);
469                i += 1;
470                pos += 1;
471            }
472        } else {
473            if current_text.is_empty() {
474                current_text_start = pos;
475            }
476            current_text.push(ch);
477            i += 1;
478            pos += ch.len_utf8();
479        }
480    }
481
482    push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
483
484    parts
485}
486
487fn parse_interpolated_string(s: &str) -> Vec<StringPart> {
488    // First, replace escaped dollar markers with a temporary placeholder
489    // The lexer uses __KAISH_ESCAPED_DOLLAR__ for \$ to prevent re-interpretation
490    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
491
492    let mut parts = Vec::new();
493    let mut current_text = String::new();
494    let mut chars = s.chars().peekable();
495
496    while let Some(ch) = chars.next() {
497        if ch == '\x00' {
498            // This is our escaped dollar marker - skip "DOLLAR" and the closing \x00
499            let mut marker = String::new();
500            while let Some(&c) = chars.peek() {
501                if c == '\x00' {
502                    chars.next(); // consume closing marker
503                    break;
504                }
505                if let Some(c) = chars.next() {
506                    marker.push(c);
507                }
508            }
509            if marker == "DOLLAR" {
510                current_text.push('$');
511            }
512        } else if ch == '$' {
513            // Check for command substitution $(...)
514            if chars.peek() == Some(&'(') {
515                // Command substitution $(...)
516                if !current_text.is_empty() {
517                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
518                }
519
520                // Consume the '('
521                chars.next();
522
523                // Collect until matching ')' accounting for nested parens
524                let mut cmd_content = String::new();
525                let mut paren_depth = 1;
526                for c in chars.by_ref() {
527                    if c == '(' {
528                        paren_depth += 1;
529                        cmd_content.push(c);
530                    } else if c == ')' {
531                        paren_depth -= 1;
532                        if paren_depth == 0 {
533                            break;
534                        }
535                        cmd_content.push(c);
536                    } else {
537                        cmd_content.push(c);
538                    }
539                }
540
541                // Parse the command content as a pipeline
542                // We need to use the main parser for this
543                if let Ok(program) = parse(&cmd_content) {
544                    // Extract the pipeline from the parsed result
545                    if let Some(stmt) = program.statements.first() {
546                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
547                            parts.push(StringPart::CommandSubst(pipeline));
548                        } else {
549                            // If we can't extract a pipeline, treat as literal
550                            current_text.push_str("$(");
551                            current_text.push_str(&cmd_content);
552                            current_text.push(')');
553                        }
554                    }
555                } else {
556                    // Parse failed - treat as literal
557                    current_text.push_str("$(");
558                    current_text.push_str(&cmd_content);
559                    current_text.push(')');
560                }
561            } else if chars.peek() == Some(&'{') {
562                // Braced variable reference ${...}
563                if !current_text.is_empty() {
564                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
565                }
566
567                // Consume the '{'
568                chars.next();
569
570                // Collect until matching '}', tracking nesting depth
571                let mut var_content = String::new();
572                let mut depth = 1;
573                for c in chars.by_ref() {
574                    if c == '{' && var_content.ends_with('$') {
575                        depth += 1;
576                        var_content.push(c);
577                    } else if c == '}' {
578                        depth -= 1;
579                        if depth == 0 {
580                            break;
581                        }
582                        var_content.push(c);
583                    } else {
584                        var_content.push(c);
585                    }
586                }
587
588                // Parse the content for special syntax
589                let part = if let Some(name) = var_content.strip_prefix('#') {
590                    // Variable length: ${#VAR}
591                    StringPart::VarLength(name.to_string())
592                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
593                    // Arithmetic expression: ${__ARITH:expr__}
594                    let expr = var_content
595                        .strip_prefix("__ARITH:")
596                        .and_then(|s| s.strip_suffix("__"))
597                        .unwrap_or("");
598                    StringPart::Arithmetic(expr.to_string())
599                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
600                    // Variable with default: ${VAR:-default} - recursively parse the default
601                    let name = var_content[..colon_idx].to_string();
602                    let default_str = &var_content[colon_idx + 2..];
603                    let default = parse_interpolated_string(&unquote_default_word(default_str));
604                    StringPart::VarWithDefault { name, default }
605                } else {
606                    // Regular variable: ${VAR} or ${VAR.field}
607                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
608                };
609                parts.push(part);
610            } else if chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
611                // Positional parameter $0-$9
612                if !current_text.is_empty() {
613                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
614                }
615                if let Some(digit) = chars.next() {
616                    let n = digit.to_digit(10).unwrap_or(0) as usize;
617                    parts.push(StringPart::Positional(n));
618                }
619            } else if chars.peek() == Some(&'@') {
620                // All arguments $@
621                if !current_text.is_empty() {
622                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
623                }
624                chars.next(); // consume '@'
625                parts.push(StringPart::AllArgs);
626            } else if chars.peek() == Some(&'#') {
627                // Argument count $#
628                if !current_text.is_empty() {
629                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
630                }
631                chars.next(); // consume '#'
632                parts.push(StringPart::ArgCount);
633            } else if chars.peek() == Some(&'?') {
634                // Last exit code $?
635                if !current_text.is_empty() {
636                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
637                }
638                chars.next(); // consume '?'
639                parts.push(StringPart::LastExitCode);
640            } else if chars.peek() == Some(&'$') {
641                // Current PID $$
642                if !current_text.is_empty() {
643                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
644                }
645                chars.next(); // consume second '$'
646                parts.push(StringPart::CurrentPid);
647            } else if chars.peek().map(|c| c.is_ascii_alphabetic() || *c == '_').unwrap_or(false) {
648                // Simple variable reference $NAME
649                if !current_text.is_empty() {
650                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
651                }
652
653                // Collect identifier characters
654                let mut var_name = String::new();
655                while let Some(&c) = chars.peek() {
656                    if c.is_ascii_alphanumeric() || c == '_' {
657                        if let Some(c) = chars.next() {
658                            var_name.push(c);
659                        }
660                    } else {
661                        break;
662                    }
663                }
664
665                parts.push(StringPart::Var(VarPath::simple(var_name)));
666            } else {
667                // Literal $ (not followed by { or identifier start)
668                current_text.push(ch);
669            }
670        } else {
671            current_text.push(ch);
672        }
673    }
674
675    if !current_text.is_empty() {
676        parts.push(StringPart::Literal(current_text));
677    }
678
679    parts
680}
681
682/// Parse error with location and context.
683#[derive(Debug, Clone)]
684pub struct ParseError {
685    pub span: Span,
686    pub message: String,
687}
688
689impl ParseError {
690    /// Format the error against the original source, emitting a 1-indexed
691    /// `line:col [parse]: <message>` prefix and a snippet of the offending
692    /// line. Mirrors `ValidationIssue::format` so error reporting feels
693    /// consistent across pipeline phases.
694    pub fn format(&self, source: &str) -> String {
695        let start = self.span.start;
696        let mut line = 1usize;
697        let mut col = 1usize;
698        for (i, ch) in source.char_indices() {
699            if i >= start {
700                break;
701            }
702            if ch == '\n' {
703                line += 1;
704                col = 1;
705            } else {
706                col += 1;
707            }
708        }
709        let line_content = {
710            let line_start = source[..start.min(source.len())]
711                .rfind('\n')
712                .map_or(0, |i| i + 1);
713            let line_end = source[start.min(source.len())..]
714                .find('\n')
715                .map_or(source.len(), |i| start + i);
716            source.get(line_start..line_end).unwrap_or("")
717        };
718        if line_content.is_empty() {
719            format!("{}:{} [parse]: {}", line, col, self.message)
720        } else {
721            format!(
722                "{}:{} [parse]: {}\n  | {}",
723                line, col, self.message, line_content
724            )
725        }
726    }
727}
728
729impl std::fmt::Display for ParseError {
730    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
731        write!(f, "{} at {:?}", self.message, self.span)
732    }
733}
734
735impl std::error::Error for ParseError {}
736
737/// Parse kaish source code into a Program AST.
738pub fn parse(source: &str) -> Result<Program, Vec<ParseError>> {
739    // Tokenize with logos
740    let tokens = lexer::tokenize(source).map_err(|errs| {
741        errs.into_iter()
742            .map(|e| ParseError {
743                span: (e.span.start..e.span.end).into(),
744                message: format!("lexer error: {}", e.token),
745            })
746            .collect::<Vec<_>>()
747    })?;
748
749    // Convert tokens to (Token, SimpleSpan) pairs
750    let tokens: Vec<(Token, Span)> = tokens
751        .into_iter()
752        .map(|spanned| (spanned.token, (spanned.span.start..spanned.span.end).into()))
753        .collect();
754
755    // End-of-input span
756    let end_span: Span = (source.len()..source.len()).into();
757
758    // Parse using slice-based input (like nano_rust example)
759    let parser = program_parser();
760    let result = parser.parse(tokens.as_slice().map(end_span, |(t, s)| (t, s)));
761
762    let program = result.into_result().map_err(|errs| {
763        errs.into_iter()
764            .map(|e| ParseError {
765                span: *e.span(),
766                message: e.to_string(),
767            })
768            .collect::<Vec<_>>()
769    })?;
770
771    // Structural well-formedness checks that chumsky's grammar can't surface a
772    // clean message for. A command with two stdin sources (`<`/`<<`/`<<<`)
773    // would silently depend on redirect ordering at execution time, so reject
774    // it here — at parse time, which (unlike validation) can never be skipped.
775    if first_ambiguous_stdin(&program.statements) {
776        return Err(vec![ParseError {
777            // Redirects carry no AST span, so anchor at the start of the
778            // source; the message is the actionable part. Precise columns
779            // would require spanning `Redirect` (deferred — see docs/issues.md).
780            span: (0..0).into(),
781            message: "multiple stdin redirects on one command are ambiguous; \
782                      use exactly one of `<`, `<<`, or `<<<`"
783                .to_string(),
784        }]);
785    }
786
787    Ok(program)
788}
789
790/// Parse a single statement (useful for REPL).
791pub fn parse_statement(source: &str) -> Result<Stmt, Vec<ParseError>> {
792    let program = parse(source)?;
793    program
794        .statements
795        .into_iter()
796        .find(|s| !matches!(s, Stmt::Empty))
797        .ok_or_else(|| {
798            vec![ParseError {
799                span: (0..source.len()).into(),
800                message: "empty input".to_string(),
801            }]
802        })
803}
804
805// ═══════════════════════════════════════════════════════════════════════════
806// Parser Combinators - generic over input type
807// ═══════════════════════════════════════════════════════════════════════════
808
809/// Top-level program parser.
810fn program_parser<'tokens, 'src: 'tokens, I>(
811) -> impl Parser<'tokens, I, Program, extra::Err<Rich<'tokens, Token, Span>>>
812where
813    I: ValueInput<'tokens, Token = Token, Span = Span>,
814{
815    statement_parser()
816        .repeated()
817        .collect::<Vec<_>>()
818        .map(|statements| Program { statements })
819}
820
821/// Statement parser - dispatches based on leading token.
822/// Supports statement-level chaining with && and ||.
823fn statement_parser<'tokens, I>(
824) -> impl Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
825where
826    I: ValueInput<'tokens, Token = Token, Span = Span>,
827{
828    recursive(|stmt| {
829        let terminator = choice((just(Token::Newline), just(Token::Semi))).repeated();
830
831        // break [N] - break out of N levels of loops (default 1)
832        let break_stmt = just(Token::Break)
833            .ignore_then(
834                select! { Token::Int(n) => n as usize }.or_not()
835            )
836            .map(Stmt::Break);
837
838        // continue [N] - continue to next iteration, skipping N levels (default 1)
839        let continue_stmt = just(Token::Continue)
840            .ignore_then(
841                select! { Token::Int(n) => n as usize }.or_not()
842            )
843            .map(Stmt::Continue);
844
845        // return [expr] - return from a tool
846        let return_stmt = just(Token::Return)
847            .ignore_then(primary_expr_parser().or_not())
848            .map(|e| Stmt::Return(e.map(Box::new)));
849
850        // exit [code] - exit the script
851        let exit_stmt = just(Token::Exit)
852            .ignore_then(primary_expr_parser().or_not())
853            .map(|e| Stmt::Exit(e.map(Box::new)));
854
855        // set command: `set -e`, `set +e`, `set` (no args), `set -o pipefail`
856        // This must come BEFORE assignment_parser to handle `set -e` vs `X=value`
857        //
858        // Strategy: Use lookahead to check what follows `set`:
859        // - If followed by a flag (-e, --long, +e): parse as set command
860        // - If followed by identifier NOT followed by =: parse as set command (e.g., `set pipefail`)
861        // - If followed by nothing (end/newline/semi): parse as set command
862        // - If followed by identifier then =: let assignment_parser handle it
863        let set_flag_arg = choice((
864            select! { Token::ShortFlag(f) => Arg::ShortFlag(f) },
865            select! { Token::LongFlag(f) => Arg::LongFlag(f) },
866            // PlusFlag for +e, +x etc. - convert to positional arg with + prefix
867            select! { Token::PlusFlag(f) => Arg::Positional(Expr::Literal(Value::String(format!("+{}", f)))) },
868        ));
869
870        // Option value after `-o`/`+o`: a size literal (`8K`, `1M`) or raw
871        // byte count. Stringified so `set.rs` can `parse_size` the
872        // `output-limit=<value>` it reconstructs.
873        let option_value_str = select! {
874            Token::NumberIdent(s) => s,
875            Token::Int(n) => n.to_string(),
876            Token::Ident(s) => s,
877        };
878
879        // `-o output-limit=8K`: `name`, `=`, `value` are three tokens; fold
880        // them back into a single `name=value` positional (the form `set.rs`
881        // and bash both expect). Without this the `=` is a parse error.
882        let set_option_assign = ident_parser()
883            .then_ignore(just(Token::Eq))
884            .then(option_value_str)
885            .map(|(name, value)| {
886                Arg::Positional(Expr::Literal(Value::String(format!("{name}={value}"))))
887            });
888
889        // Quoted option such as `set -o "output-limit=8K"`: the whole thing is
890        // one string token. Accept it as a positional so the quoted form works
891        // too (agents reach for it after the unquoted form trips a shell lint).
892        let set_quoted_arg = select! {
893            Token::String(s) => Arg::Positional(Expr::Literal(Value::String(s))),
894            Token::SingleString(s) => Arg::Positional(Expr::Literal(Value::String(s))),
895        };
896
897        // set with flags: `set -e`, `set -e -u -o pipefail`
898        let set_with_flags = just(Token::Set)
899            .then(set_flag_arg)
900            .then(
901                choice((
902                    set_flag_arg,
903                    // `-o name=value` (try before the bare-ident arm).
904                    set_option_assign,
905                    set_quoted_arg,
906                    // Identifiers like 'pipefail' after -o
907                    ident_parser().map(|name| Arg::Positional(Expr::Literal(Value::String(name)))),
908                ))
909                .repeated()
910                .collect::<Vec<_>>(),
911            )
912            .map(|((_, first_arg), mut rest_args)| {
913                let mut args = vec![first_arg];
914                args.append(&mut rest_args);
915                Stmt::Command(Command {
916                    name: "set".to_string(),
917                    args,
918                    redirects: vec![],
919                })
920            });
921
922        // set with no args: `set` alone (shows settings)
923        // Must be followed by newline, semicolon, end of input, or a chaining operator (&&, ||)
924        let set_no_args = just(Token::Set)
925            .then(
926                choice((
927                    just(Token::Newline).to(()),
928                    just(Token::Semi).to(()),
929                    just(Token::And).to(()),
930                    just(Token::Or).to(()),
931                    end(),
932                ))
933                .rewind(),
934            )
935            .map(|_| Stmt::Command(Command {
936                name: "set".to_string(),
937                args: vec![],
938                redirects: vec![],
939            }));
940
941        // Try set_with_flags first (requires at least one flag)
942        // Then try set_no_args (no args, followed by terminator)
943        // If neither matches, fall through to assignment_parser
944        let set_command = set_with_flags.or(set_no_args);
945
946        // Base statement (without chaining)
947        let base_statement = choice((
948            just(Token::Newline).to(Stmt::Empty),
949            set_command,
950            assignment_parser().map(Stmt::Assignment),
951            // Shell-style functions (use $1, $2 positional params)
952            posix_function_parser(stmt.clone()).map(Stmt::ToolDef),  // name() { }
953            bash_function_parser(stmt.clone()).map(Stmt::ToolDef),   // function name { }
954            if_parser(stmt.clone()).map(Stmt::If),
955            for_parser(stmt.clone()).map(Stmt::For),
956            while_parser(stmt.clone()).map(Stmt::While),
957            case_parser(stmt.clone()).map(Stmt::Case),
958            break_stmt,
959            continue_stmt,
960            return_stmt,
961            exit_stmt,
962            test_expr_stmt_parser().map(Stmt::Test),
963            // Note: 'true' and 'false' are handled by command_parser/pipeline_parser
964            pipeline_parser().map(|p| {
965                // Unwrap single-command pipelines without background and without redirects
966                if p.commands.len() == 1 && !p.background {
967                    // Only unwrap if no redirects - redirects require pipeline processing
968                    if p.commands[0].redirects.is_empty() {
969                        // Safe: we just checked len == 1
970                        match p.commands.into_iter().next() {
971                            Some(cmd) => Stmt::Command(cmd),
972                            None => Stmt::Empty, // unreachable but safe
973                        }
974                    } else {
975                        Stmt::Pipeline(p)
976                    }
977                } else {
978                    Stmt::Pipeline(p)
979                }
980            }),
981        ))
982        .boxed();
983
984        // Statement chaining with precedence: && binds tighter than ||
985        // and_chain = base_stmt { "&&" base_stmt }
986        // or_chain  = and_chain { "||" and_chain }
987        let and_chain = base_statement
988            .clone()
989            .foldl(
990                just(Token::And).ignore_then(base_statement).repeated(),
991                |left, right| Stmt::AndChain {
992                    left: Box::new(left),
993                    right: Box::new(right),
994                },
995            );
996
997        and_chain
998            .clone()
999            .foldl(
1000                just(Token::Or).ignore_then(and_chain).repeated(),
1001                |left, right| Stmt::OrChain {
1002                    left: Box::new(left),
1003                    right: Box::new(right),
1004                },
1005            )
1006            .then_ignore(terminator)
1007    })
1008}
1009
1010/// Assignment: `NAME=value` (bash-style) or `local NAME = value` (scoped)
1011fn assignment_parser<'tokens, I>(
1012) -> impl Parser<'tokens, I, Assignment, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1013where
1014    I: ValueInput<'tokens, Token = Token, Span = Span>,
1015{
1016    // local NAME = value (with spaces around =)
1017    let local_assignment = just(Token::Local)
1018        .ignore_then(ident_parser())
1019        .then_ignore(just(Token::Eq))
1020        .then(expr_parser())
1021        .map(|(name, value)| Assignment {
1022            name,
1023            value,
1024            local: true,
1025        });
1026
1027    // Bash-style: NAME=value (no spaces around =)
1028    // The lexer produces IDENT EQ EXPR, so we parse it here
1029    let bash_assignment = ident_parser()
1030        .then_ignore(just(Token::Eq))
1031        .then(expr_parser())
1032        .map(|(name, value)| Assignment {
1033            name,
1034            value,
1035            local: false,
1036        });
1037
1038    choice((local_assignment, bash_assignment))
1039        .labelled("assignment")
1040        .boxed()
1041}
1042
1043/// POSIX-style function: `name() { body }`
1044///
1045/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
1046fn posix_function_parser<'tokens, I, S>(
1047    stmt: S,
1048) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1049where
1050    I: ValueInput<'tokens, Token = Token, Span = Span>,
1051    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1052{
1053    ident_parser()
1054        .then_ignore(just(Token::LParen))
1055        .then_ignore(just(Token::RParen))
1056        .then_ignore(just(Token::LBrace))
1057        .then_ignore(just(Token::Newline).repeated())
1058        .then(
1059            stmt.repeated()
1060                .collect::<Vec<_>>()
1061                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1062        )
1063        .then_ignore(just(Token::Newline).repeated())
1064        .then_ignore(just(Token::RBrace))
1065        .map(|(name, body)| ToolDef { name, params: vec![], body })
1066        .labelled("POSIX function")
1067        .boxed()
1068}
1069
1070/// Bash-style function: `function name { body }` (without parens)
1071///
1072/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
1073fn bash_function_parser<'tokens, I, S>(
1074    stmt: S,
1075) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1076where
1077    I: ValueInput<'tokens, Token = Token, Span = Span>,
1078    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1079{
1080    just(Token::Function)
1081        .ignore_then(ident_parser())
1082        .then_ignore(just(Token::LBrace))
1083        .then_ignore(just(Token::Newline).repeated())
1084        .then(
1085            stmt.repeated()
1086                .collect::<Vec<_>>()
1087                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1088        )
1089        .then_ignore(just(Token::Newline).repeated())
1090        .then_ignore(just(Token::RBrace))
1091        .map(|(name, body)| ToolDef { name, params: vec![], body })
1092        .labelled("bash function")
1093        .boxed()
1094}
1095
1096/// If statement: `if COND; then STMTS [elif COND; then STMTS]* [else STMTS] fi`
1097///
1098/// elif clauses are desugared to nested if/else:
1099///   `if A; then X elif B; then Y else Z fi`
1100/// becomes:
1101///   `if A; then X else { if B; then Y else Z fi } fi`
1102fn if_parser<'tokens, I, S>(
1103    stmt: S,
1104) -> impl Parser<'tokens, I, IfStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1105where
1106    I: ValueInput<'tokens, Token = Token, Span = Span>,
1107    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1108{
1109    // Parse a single branch: condition + then statements
1110    let branch = condition_parser()
1111        .then_ignore(just(Token::Semi).or_not())
1112        .then_ignore(just(Token::Newline).repeated())
1113        .then_ignore(just(Token::Then))
1114        .then_ignore(just(Token::Newline).repeated())
1115        .then(
1116            stmt.clone()
1117                .repeated()
1118                .collect::<Vec<_>>()
1119                .map(|stmts: Vec<Stmt>| {
1120                    stmts
1121                        .into_iter()
1122                        .filter(|s| !matches!(s, Stmt::Empty))
1123                        .collect::<Vec<_>>()
1124                }),
1125        );
1126
1127    // Parse elif branches: `elif COND; then STMTS`
1128    let elif_branch = just(Token::Elif)
1129        .ignore_then(condition_parser())
1130        .then_ignore(just(Token::Semi).or_not())
1131        .then_ignore(just(Token::Newline).repeated())
1132        .then_ignore(just(Token::Then))
1133        .then_ignore(just(Token::Newline).repeated())
1134        .then(
1135            stmt.clone()
1136                .repeated()
1137                .collect::<Vec<_>>()
1138                .map(|stmts: Vec<Stmt>| {
1139                    stmts
1140                        .into_iter()
1141                        .filter(|s| !matches!(s, Stmt::Empty))
1142                        .collect::<Vec<_>>()
1143                }),
1144        );
1145
1146    // Parse else branch: `else STMTS`
1147    let else_branch = just(Token::Else)
1148        .ignore_then(just(Token::Newline).repeated())
1149        .ignore_then(stmt.repeated().collect::<Vec<_>>())
1150        .map(|stmts: Vec<Stmt>| {
1151            stmts
1152                .into_iter()
1153                .filter(|s| !matches!(s, Stmt::Empty))
1154                .collect::<Vec<_>>()
1155        });
1156
1157    just(Token::If)
1158        .ignore_then(branch)
1159        .then(elif_branch.repeated().collect::<Vec<_>>())
1160        .then(else_branch.or_not())
1161        .then_ignore(just(Token::Fi))
1162        .map(|(((condition, then_branch), elif_branches), else_branch)| {
1163            // Build nested if/else structure from elif branches
1164            build_if_chain(condition, then_branch, elif_branches, else_branch)
1165        })
1166        .labelled("if statement")
1167        .boxed()
1168}
1169
1170/// Build a nested IfStmt chain from elif branches.
1171///
1172/// Transforms:
1173///   if A then X elif B then Y elif C then Z else W fi
1174/// Into:
1175///   IfStmt { cond: A, then: X, else: Some([IfStmt { cond: B, then: Y, else: Some([IfStmt { cond: C, then: Z, else: Some(W) }]) }]) }
1176fn build_if_chain(
1177    condition: Expr,
1178    then_branch: Vec<Stmt>,
1179    mut elif_branches: Vec<(Expr, Vec<Stmt>)>,
1180    else_branch: Option<Vec<Stmt>>,
1181) -> IfStmt {
1182    if elif_branches.is_empty() {
1183        // No elif, just if/else
1184        IfStmt {
1185            condition: Box::new(condition),
1186            then_branch,
1187            else_branch,
1188        }
1189    } else {
1190        // Pop the first elif and recursively build the rest
1191        let (elif_cond, elif_then) = elif_branches.remove(0);
1192        let nested_if = build_if_chain(elif_cond, elif_then, elif_branches, else_branch);
1193        IfStmt {
1194            condition: Box::new(condition),
1195            then_branch,
1196            else_branch: Some(vec![Stmt::If(nested_if)]),
1197        }
1198    }
1199}
1200
1201/// For loop: `for VAR in ITEMS; do STMTS done`
1202fn for_parser<'tokens, I, S>(
1203    stmt: S,
1204) -> impl Parser<'tokens, I, ForLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1205where
1206    I: ValueInput<'tokens, Token = Token, Span = Span>,
1207    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1208{
1209    just(Token::For)
1210        .ignore_then(ident_parser())
1211        .then_ignore(just(Token::In))
1212        .then(expr_parser().repeated().at_least(1).collect::<Vec<_>>())
1213        .then_ignore(just(Token::Semi).or_not())
1214        .then_ignore(just(Token::Newline).repeated())
1215        .then_ignore(just(Token::Do))
1216        .then_ignore(just(Token::Newline).repeated())
1217        .then(
1218            stmt.repeated()
1219                .collect::<Vec<_>>()
1220                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1221        )
1222        .then_ignore(just(Token::Done))
1223        .map(|((variable, items), body)| ForLoop {
1224            variable,
1225            items,
1226            body,
1227        })
1228        .labelled("for loop")
1229        .boxed()
1230}
1231
1232/// While loop: `while condition; do ...; done`
1233fn while_parser<'tokens, I, S>(
1234    stmt: S,
1235) -> impl Parser<'tokens, I, WhileLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1236where
1237    I: ValueInput<'tokens, Token = Token, Span = Span>,
1238    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1239{
1240    just(Token::While)
1241        .ignore_then(condition_parser())
1242        .then_ignore(just(Token::Semi).or_not())
1243        .then_ignore(just(Token::Newline).repeated())
1244        .then_ignore(just(Token::Do))
1245        .then_ignore(just(Token::Newline).repeated())
1246        .then(
1247            stmt.repeated()
1248                .collect::<Vec<_>>()
1249                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1250        )
1251        .then_ignore(just(Token::Done))
1252        .map(|(condition, body)| WhileLoop {
1253            condition: Box::new(condition),
1254            body,
1255        })
1256        .labelled("while loop")
1257        .boxed()
1258}
1259
1260/// Case statement: `case expr in pattern) commands ;; esac`
1261///
1262/// Supports:
1263/// - Single patterns: `pattern) commands ;;`
1264/// - Multiple patterns: `pattern1|pattern2) commands ;;`
1265/// - Optional leading `(` before patterns: `(pattern) commands ;;`
1266fn case_parser<'tokens, I, S>(
1267    stmt: S,
1268) -> impl Parser<'tokens, I, CaseStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1269where
1270    I: ValueInput<'tokens, Token = Token, Span = Span>,
1271    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1272{
1273    // Pattern part: individual tokens that make up a glob pattern
1274    // e.g., "*.rs" is Star + Dot + Ident("rs")
1275    let pattern_part = choice((
1276        select! { Token::GlobWord(s) => s },
1277        select! { Token::Ident(s) => s },
1278        select! { Token::NumberIdent(s) => s },
1279        select! { Token::DottedIdent(s) => s },
1280        select! { Token::String(s) => s },
1281        select! { Token::SingleString(s) => s },
1282        select! { Token::Int(n) => n.to_string() },
1283        select! { Token::Star => "*".to_string() },
1284        select! { Token::Question => "?".to_string() },
1285        select! { Token::Dot => ".".to_string() },
1286        select! { Token::DotDot => "..".to_string() },
1287        select! { Token::Tilde => "~".to_string() },
1288        select! { Token::TildePath(s) => s },
1289        select! { Token::RelativePath(s) => s },
1290        select! { Token::DotSlashPath(s) => s },
1291        select! { Token::Path(p) => p },
1292        select! { Token::VarRef(v) => v },
1293        select! { Token::SimpleVarRef(v) => format!("${}", v) },
1294        // Character class: [a-z], [!abc], [^abc], etc.
1295        just(Token::LBracket)
1296            .ignore_then(
1297                choice((
1298                    select! { Token::Ident(s) => s },
1299                    select! { Token::Int(n) => n.to_string() },
1300                    just(Token::Colon).to(":".to_string()),
1301                    // Negation: ! or ^ at start of char class
1302                    just(Token::Bang).to("!".to_string()),
1303                    // Range like a-z
1304                    select! { Token::ShortFlag(s) => format!("-{}", s) },
1305                ))
1306                .repeated()
1307                .at_least(1)
1308                .collect::<Vec<String>>()
1309            )
1310            .then_ignore(just(Token::RBracket))
1311            .map(|parts| format!("[{}]", parts.join(""))),
1312        // Brace expansion: {a,b,c} or {js,ts}
1313        just(Token::LBrace)
1314            .ignore_then(
1315                choice((
1316                    select! { Token::Ident(s) => s },
1317                    select! { Token::Int(n) => n.to_string() },
1318                ))
1319                .separated_by(just(Token::Comma))
1320                .at_least(1)
1321                .collect::<Vec<String>>()
1322            )
1323            .then_ignore(just(Token::RBrace))
1324            .map(|parts| format!("{{{}}}", parts.join(","))),
1325    ));
1326
1327    // A complete pattern is one or more pattern parts joined together
1328    // e.g., "*.rs" = Star + Dot + Ident
1329    let pattern = pattern_part
1330        .repeated()
1331        .at_least(1)
1332        .collect::<Vec<String>>()
1333        .map(|parts| parts.join(""))
1334        .labelled("case pattern");
1335
1336    // Multiple patterns separated by pipe: `pattern1 | pattern2`
1337    let patterns = pattern
1338        .separated_by(just(Token::Pipe))
1339        .at_least(1)
1340        .collect::<Vec<String>>()
1341        .labelled("case patterns");
1342
1343    // Branch: `[( ] patterns ) commands ;;`
1344    let branch = just(Token::LParen)
1345        .or_not()
1346        .ignore_then(just(Token::Newline).repeated())
1347        .ignore_then(patterns)
1348        .then_ignore(just(Token::RParen))
1349        .then_ignore(just(Token::Newline).repeated())
1350        .then(
1351            stmt.clone()
1352                .repeated()
1353                .collect::<Vec<_>>()
1354                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1355        )
1356        .then_ignore(just(Token::DoubleSemi))
1357        .then_ignore(just(Token::Newline).repeated())
1358        .map(|(patterns, body)| CaseBranch { patterns, body })
1359        .labelled("case branch");
1360
1361    just(Token::Case)
1362        .ignore_then(expr_parser())
1363        .then_ignore(just(Token::In))
1364        .then_ignore(just(Token::Newline).repeated())
1365        .then(branch.repeated().collect::<Vec<_>>())
1366        .then_ignore(just(Token::Esac))
1367        .map(|(expr, branches)| CaseStmt { expr, branches })
1368        .labelled("case statement")
1369        .boxed()
1370}
1371
1372/// Pipeline: `cmd | cmd | cmd [&]`
1373fn pipeline_parser<'tokens, I>(
1374) -> impl Parser<'tokens, I, Pipeline, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1375where
1376    I: ValueInput<'tokens, Token = Token, Span = Span>,
1377{
1378    command_parser()
1379        .separated_by(just(Token::Pipe))
1380        .at_least(1)
1381        .collect::<Vec<_>>()
1382        .then(just(Token::Amp).or_not())
1383        .map(|(commands, bg)| Pipeline {
1384            commands,
1385            background: bg.is_some(),
1386        })
1387        .labelled("pipeline")
1388        .boxed()
1389}
1390
1391/// Command: `name args... [redirects...]`
1392/// Command names can be identifiers, 'true', 'false', or '.' (source alias).
1393fn command_parser<'tokens, I>(
1394) -> impl Parser<'tokens, I, Command, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1395where
1396    I: ValueInput<'tokens, Token = Token, Span = Span>,
1397{
1398    // Command name can be an identifier, path, 'true', 'false', '.' (source alias), or ./path
1399    let command_name = choice((
1400        ident_parser(),
1401        path_parser(),
1402        select! { Token::DotSlashPath(s) => s },
1403        just(Token::True).to("true".to_string()),
1404        just(Token::False).to("false".to_string()),
1405        just(Token::Dot).to(".".to_string()),
1406    ));
1407
1408    // NB: the "at most one stdin source per command" rule is enforced by a
1409    // post-parse scan in `parse()` (see `first_ambiguous_stdin`), NOT here.
1410    // A `try_map` rejection at this level cannot surface its own message: a
1411    // command like `cat <<< a <<< b` also fails the competing statement-level
1412    // assignment/function alternative ("expected '=', or '('"), and chumsky's
1413    // `choice` merge keeps that alternative's error regardless of which span
1414    // our custom error carries. So we accept the command here and reject it
1415    // structurally after parsing, where the message is fully under our control
1416    // (verified empirically 2026-06-07; see docs/issues.md).
1417    command_name
1418        .then(args_list_parser())
1419        .then(redirect_parser().repeated().collect::<Vec<_>>())
1420        .map(|((name, args), redirects)| Command {
1421            name,
1422            args,
1423            redirects,
1424        })
1425        .labelled("command")
1426        .boxed()
1427}
1428
1429/// True if `cmd` has more than one stdin source (`<`, `<<`, `<<<`). Such a
1430/// command would silently depend on redirect ordering at execution time
1431/// (`setup_stdin_redirects` is last-wins), so `parse()` rejects it loudly.
1432fn command_has_ambiguous_stdin(cmd: &Command) -> bool {
1433    cmd.redirects
1434        .iter()
1435        .filter(|r| {
1436            matches!(
1437                r.kind,
1438                RedirectKind::Stdin | RedirectKind::HereDoc | RedirectKind::HereString
1439            )
1440        })
1441        .count()
1442        > 1
1443}
1444
1445/// Find the first command anywhere in `stmts` (recursing into pipelines,
1446/// control-flow bodies, chains, and tool definitions) that has more than one
1447/// stdin source. Used by `parse()` to reject the ambiguity after parsing.
1448fn first_ambiguous_stdin(stmts: &[Stmt]) -> bool {
1449    stmts.iter().any(stmt_has_ambiguous_stdin)
1450}
1451
1452fn stmt_has_ambiguous_stdin(stmt: &Stmt) -> bool {
1453    match stmt {
1454        Stmt::Command(c) => command_has_ambiguous_stdin(c),
1455        Stmt::Pipeline(p) => p.commands.iter().any(command_has_ambiguous_stdin),
1456        Stmt::If(i) => {
1457            first_ambiguous_stdin(&i.then_branch)
1458                || i.else_branch
1459                    .as_deref()
1460                    .is_some_and(first_ambiguous_stdin)
1461        }
1462        Stmt::For(f) => first_ambiguous_stdin(&f.body),
1463        Stmt::While(w) => first_ambiguous_stdin(&w.body),
1464        Stmt::Case(c) => c.branches.iter().any(|b| first_ambiguous_stdin(&b.body)),
1465        Stmt::ToolDef(t) => first_ambiguous_stdin(&t.body),
1466        Stmt::AndChain { left, right } | Stmt::OrChain { left, right } => {
1467            stmt_has_ambiguous_stdin(left) || stmt_has_ambiguous_stdin(right)
1468        }
1469        Stmt::Assignment(_)
1470        | Stmt::Break(_)
1471        | Stmt::Continue(_)
1472        | Stmt::Return(_)
1473        | Stmt::Exit(_)
1474        | Stmt::Test(_)
1475        | Stmt::Empty => false,
1476    }
1477}
1478
1479/// Arguments list parser that handles `--` flag terminator.
1480///
1481/// After `--`, all subsequent flags are converted to positional string arguments.
1482fn args_list_parser<'tokens, I>(
1483) -> impl Parser<'tokens, I, Vec<Arg>, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1484where
1485    I: ValueInput<'tokens, Token = Token, Span = Span>,
1486{
1487    // Arguments before `--` (normal parsing)
1488    let pre_dash = arg_before_double_dash_parser()
1489        .repeated()
1490        .collect::<Vec<_>>();
1491
1492    // The `--` marker itself
1493    let double_dash = select! {
1494        Token::DoubleDash => Arg::DoubleDash,
1495    };
1496
1497    // Arguments after `--` (flags become positional strings)
1498    let post_dash_arg = choice((
1499        // Flags become positional strings
1500        select! {
1501            Token::ShortFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("-{}", name)))),
1502            Token::LongFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("--{}", name)))),
1503        },
1504        // Everything else stays the same
1505        primary_expr_parser().map(Arg::Positional),
1506    ));
1507
1508    let post_dash = post_dash_arg.repeated().collect::<Vec<_>>();
1509
1510    // Combine: args_before ++ [--] ++ args_after
1511    pre_dash
1512        .then(double_dash.then(post_dash).or_not())
1513        .map(|(mut args, maybe_dd)| {
1514            if let Some((dd, post)) = maybe_dd {
1515                args.push(dd);
1516                args.extend(post);
1517            }
1518            args
1519        })
1520}
1521
1522/// Argument parser for arguments before `--` (normal flag handling).
1523fn arg_before_double_dash_parser<'tokens, I>(
1524) -> impl Parser<'tokens, I, Arg, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1525where
1526    I: ValueInput<'tokens, Token = Token, Span = Span>,
1527{
1528    // Long flag with value: --name=value
1529    let long_flag_with_value = select! {
1530        Token::LongFlag(name) => name,
1531    }
1532    .then_ignore(just(Token::Eq))
1533    .then(primary_expr_parser())
1534    .map(|(key, value)| Arg::Named { key, value });
1535
1536    // Boolean long flag: --name
1537    let long_flag = select! {
1538        Token::LongFlag(name) => Arg::LongFlag(name),
1539    };
1540
1541    // Boolean short flag: -x
1542    let short_flag = select! {
1543        Token::ShortFlag(name) => Arg::ShortFlag(name),
1544    };
1545
1546    // Shell assignment in argv position: name=value (must not have spaces around =).
1547    // Produces Arg::WordAssign; the kernel routes it through tool_args.named
1548    // only for shell-assignment-accepting builtins (export, alias). For every
1549    // other command it materialises as a `"name=value"` positional, matching
1550    // bash semantics (`cat foo=bar` opens a file named `foo=bar`).
1551    let named = select! {
1552        Token::Ident(s) => s,
1553    }
1554    .map_with(|s, e| -> (String, Span) { (s, e.span()) })
1555    .then(just(Token::Eq).map_with(|_, e| -> Span { e.span() }))
1556    .then(primary_expr_parser().map_with(|expr, e| -> (Expr, Span) { (expr, e.span()) }))
1557    .try_map(|(((key, key_span), eq_span), (value, value_span)): (((String, Span), Span), (Expr, Span)), span| {
1558        // Check that key ends where = starts and = ends where value starts
1559        if key_span.end != eq_span.start || eq_span.end != value_span.start {
1560            Err(Rich::custom(
1561                span,
1562                "shell assignment must not have spaces around '=' (use 'key=value' not 'key = value')",
1563            ))
1564        } else {
1565            Ok(Arg::WordAssign { key, value })
1566        }
1567    });
1568
1569    // Positional argument
1570    let positional = primary_expr_parser().map(Arg::Positional);
1571
1572    // Order matters: try more specific patterns first
1573    // Note: DoubleDash is NOT included here - it's handled by args_list_parser
1574    choice((
1575        long_flag_with_value,
1576        long_flag,
1577        short_flag,
1578        named,
1579        positional,
1580    ))
1581    .boxed()
1582}
1583
1584/// Redirect: `> file`, `>> file`, `< file`, `<< heredoc`, `2> file`, `&> file`, `2>&1`
1585fn redirect_parser<'tokens, I>(
1586) -> impl Parser<'tokens, I, Redirect, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1587where
1588    I: ValueInput<'tokens, Token = Token, Span = Span>,
1589{
1590    // Regular redirects: >, >>, <, 2>, &>
1591    let regular_redirect = select! {
1592        Token::GtGt => RedirectKind::StdoutAppend,
1593        Token::Gt => RedirectKind::StdoutOverwrite,
1594        Token::Lt => RedirectKind::Stdin,
1595        Token::Stderr => RedirectKind::Stderr,
1596        Token::Both => RedirectKind::Both,
1597    }
1598    .then(primary_expr_parser())
1599    .map(|(kind, target)| Redirect { kind, target });
1600
1601    // Here-doc redirect: << content
1602    // Quoted delimiters (<<'EOF' or <<"EOF") produce literal heredocs (no expansion).
1603    // Unquoted delimiters produce interpolated heredocs (variables are expanded).
1604    // For literal heredocs the `<<-EOF` tab stripping is applied here at parse
1605    // time (the body is fully known); for interpolated heredocs the stripping
1606    // is deferred to the interpreter so source byte offsets in `parts` stay
1607    // aligned with the original source for span reporting.
1608    let heredoc_redirect = just(Token::HereDocStart)
1609        .ignore_then(select! { Token::HereDoc(data) => data })
1610        .map(|data: HereDocData| {
1611            let target = if data.literal {
1612                let body = if data.strip_tabs {
1613                    crate::interpreter::strip_leading_tabs(&data.content)
1614                } else {
1615                    data.content
1616                };
1617                Expr::Literal(Value::String(body))
1618            } else {
1619                let parts = parse_interpolated_string_spanned(
1620                    &data.content,
1621                    data.body_start_offset,
1622                );
1623                // If there's only one literal part and no tab stripping is
1624                // needed, simplify to Expr::Literal — keeps the AST shape
1625                // identical to the pre-spans path for trivial bodies.
1626                if parts.len() == 1 && !data.strip_tabs {
1627                    if let StringPart::Literal(text) = &parts[0].part {
1628                        return Redirect {
1629                            kind: RedirectKind::HereDoc,
1630                            target: Expr::Literal(Value::String(text.clone())),
1631                        };
1632                    }
1633                }
1634                Expr::HereDocBody {
1635                    parts,
1636                    strip_tabs: data.strip_tabs,
1637                }
1638            };
1639            Redirect {
1640                kind: RedirectKind::HereDoc,
1641                target,
1642            }
1643        });
1644
1645    // Here-string redirect: <<< word
1646    // The target is any single expression; kaish's existing Expr machinery
1647    // handles interpolation, single-quoted literals, and command substitution.
1648    let herestring_redirect = just(Token::HereString)
1649        .ignore_then(primary_expr_parser())
1650        .map(|target| Redirect {
1651            kind: RedirectKind::HereString,
1652            target,
1653        });
1654
1655    // Merge stderr to stdout: 2>&1 (no target needed - implicit)
1656    let merge_stderr_redirect = just(Token::StderrToStdout)
1657        .map(|_| Redirect {
1658            kind: RedirectKind::MergeStderr,
1659            // Target is unused for MergeStderr, but we need something
1660            target: Expr::Literal(Value::Null),
1661        });
1662
1663    // Merge stdout to stderr: 1>&2 or >&2 (no target needed - implicit)
1664    let merge_stdout_redirect = choice((
1665        just(Token::StdoutToStderr),
1666        just(Token::StdoutToStderr2),
1667    ))
1668    .map(|_| Redirect {
1669        kind: RedirectKind::MergeStdout,
1670        // Target is unused for MergeStdout, but we need something
1671        target: Expr::Literal(Value::Null),
1672    });
1673
1674    choice((
1675        heredoc_redirect,
1676        herestring_redirect,
1677        merge_stderr_redirect,
1678        merge_stdout_redirect,
1679        regular_redirect,
1680    ))
1681    .labelled("redirect")
1682    .boxed()
1683}
1684
1685/// Test expression parser for `[[ ... ]]` syntax.
1686///
1687/// Supports:
1688/// - File tests: `[[ -f path ]]`, `[[ -d path ]]`, etc.
1689/// - String tests: `[[ -z str ]]`, `[[ -n str ]]`
1690/// - Comparisons: `[[ $X == "value" ]]`, `[[ $NUM -gt 5 ]]`
1691/// - Compound: `[[ -f a && -d b ]]`, `[[ -z x || -n y ]]`, `[[ ! -f file ]]`
1692///
1693/// Precedence (highest to lowest): `!` > `&&` > `||`
1694fn test_expr_stmt_parser<'tokens, I>(
1695) -> impl Parser<'tokens, I, TestExpr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1696where
1697    I: ValueInput<'tokens, Token = Token, Span = Span>,
1698{
1699    // File test operators: -e, -f, -d, -r, -w, -x
1700    let file_test_op = select! {
1701        Token::ShortFlag(s) if s == "e" => FileTestOp::Exists,
1702        Token::ShortFlag(s) if s == "f" => FileTestOp::IsFile,
1703        Token::ShortFlag(s) if s == "d" => FileTestOp::IsDir,
1704        Token::ShortFlag(s) if s == "r" => FileTestOp::Readable,
1705        Token::ShortFlag(s) if s == "w" => FileTestOp::Writable,
1706        Token::ShortFlag(s) if s == "x" => FileTestOp::Executable,
1707    };
1708
1709    // String test operators: -z, -n
1710    let string_test_op = select! {
1711        Token::ShortFlag(s) if s == "z" => StringTestOp::IsEmpty,
1712        Token::ShortFlag(s) if s == "n" => StringTestOp::IsNonEmpty,
1713    };
1714
1715    // Comparison operators: =, ==, !=, =~, !~, >, <, >=, <=, -gt, -lt, -ge, -le, -eq, -ne
1716    // Note: = and == are equivalent inside [[ ]] (matching bash behavior)
1717    let cmp_op = choice((
1718        just(Token::EqEq).to(TestCmpOp::Eq),
1719        just(Token::Eq).to(TestCmpOp::Eq),
1720        just(Token::NotEq).to(TestCmpOp::NotEq),
1721        just(Token::Match).to(TestCmpOp::Match),
1722        just(Token::NotMatch).to(TestCmpOp::NotMatch),
1723        just(Token::Gt).to(TestCmpOp::Gt),
1724        just(Token::Lt).to(TestCmpOp::Lt),
1725        just(Token::GtEq).to(TestCmpOp::GtEq),
1726        just(Token::LtEq).to(TestCmpOp::LtEq),
1727        select! { Token::ShortFlag(s) if s == "eq" => TestCmpOp::NumEq },
1728        select! { Token::ShortFlag(s) if s == "ne" => TestCmpOp::NumNotEq },
1729        select! { Token::ShortFlag(s) if s == "gt" => TestCmpOp::NumGt },
1730        select! { Token::ShortFlag(s) if s == "lt" => TestCmpOp::NumLt },
1731        select! { Token::ShortFlag(s) if s == "ge" => TestCmpOp::NumGtEq },
1732        select! { Token::ShortFlag(s) if s == "le" => TestCmpOp::NumLtEq },
1733    ));
1734
1735    // File test: -f path
1736    let file_test = file_test_op
1737        .then(primary_expr_parser())
1738        .map(|(op, path)| TestExpr::FileTest {
1739            op,
1740            path: Box::new(path),
1741        });
1742
1743    // String test: -z str
1744    let string_test = string_test_op
1745        .then(primary_expr_parser())
1746        .map(|(op, value)| TestExpr::StringTest {
1747            op,
1748            value: Box::new(value),
1749        });
1750
1751    // Comparison: $X == "value" or $NUM -gt 5
1752    let comparison = primary_expr_parser()
1753        .then(cmp_op)
1754        .then(primary_expr_parser())
1755        .map(|((left, op), right)| TestExpr::Comparison {
1756            left: Box::new(left),
1757            op,
1758            right: Box::new(right),
1759        });
1760
1761    // Primary test expression (atomic - no compound operators)
1762    let primary_test = choice((file_test, string_test, comparison));
1763
1764    // Build compound expressions with proper precedence:
1765    // Grammar:
1766    //   test_expr = or_expr
1767    //   or_expr   = and_expr { "||" and_expr }
1768    //   and_expr  = unary_expr { "&&" unary_expr }
1769    //   unary_expr = "!" unary_expr | primary_test
1770    //
1771    // Precedence: ! (highest) > && > ||
1772
1773    // Unary NOT binds tighter than `&&`/`||`, so it must recurse at the
1774    // unary level — `! A || B` is `(!A) || B`, NOT `!(A || B)`. The inner
1775    // `recursive` lets `!` chain (`! ! expr`) while bottoming out at a
1776    // primary test, so the bang never swallows a following `&&`/`||` operand.
1777    let unary = recursive(|unary| {
1778        let not_expr = just(Token::Bang)
1779            .ignore_then(unary)
1780            .map(|expr| TestExpr::Not { expr: Box::new(expr) });
1781        choice((not_expr, primary_test.clone()))
1782    });
1783
1784    // AND level: unary && unary && ...
1785    let and_expr = unary.clone().foldl(
1786        just(Token::And).ignore_then(unary).repeated(),
1787        |left, right| TestExpr::And {
1788            left: Box::new(left),
1789            right: Box::new(right),
1790        },
1791    );
1792
1793    // OR level: and_expr || and_expr || ...
1794    let compound_test = and_expr.clone().foldl(
1795        just(Token::Or).ignore_then(and_expr).repeated(),
1796        |left, right| TestExpr::Or {
1797            left: Box::new(left),
1798            right: Box::new(right),
1799        },
1800    );
1801
1802    // [[ ]] is two consecutive bracket tokens (not a single TestStart token)
1803    // to avoid conflicts with nested array syntax like [[1, 2], [3, 4]]
1804    just(Token::LBracket)
1805        .then(just(Token::LBracket))
1806        .ignore_then(compound_test)
1807        .then_ignore(just(Token::RBracket).then(just(Token::RBracket)))
1808        .labelled("test expression")
1809        .boxed()
1810}
1811
1812/// Condition parser: supports [[ ]] test expressions and commands with && / || chaining.
1813///
1814/// Shell semantics: conditions are commands whose exit codes determine truthiness.
1815/// - `if true; then` → runs `true` builtin, exit code 0 = truthy
1816/// - `if grep -q pattern file; then` → runs command, checks exit code
1817/// - `if a && b; then` → runs `a`, if exit 0, runs `b`
1818///
1819/// Use `[[ ]]` for comparisons: `if [[ $X -gt 5 ]]; then`
1820///
1821/// Grammar (with precedence - && binds tighter than ||):
1822///   condition = or_expr
1823///   or_expr   = and_expr { "||" and_expr }
1824///   and_expr  = base { "&&" base }
1825///   base      = test_expr | command
1826fn condition_parser<'tokens, I>(
1827) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1828where
1829    I: ValueInput<'tokens, Token = Token, Span = Span>,
1830{
1831    // [[ ]] test expression - wrap as Expr::Test
1832    let test_expr_condition = test_expr_stmt_parser().map(|test| Expr::Test(Box::new(test)));
1833
1834    // Command as condition (includes true/false as command names)
1835    // The command's exit code determines truthiness (0 = true, non-zero = false)
1836    let command_condition = command_parser().map(Expr::Command);
1837
1838    // Base: test expr OR command
1839    let base = choice((test_expr_condition, command_condition));
1840
1841    // && has higher precedence than ||
1842    // First chain with && (higher precedence)
1843    let and_expr = base.clone().foldl(
1844        just(Token::And).ignore_then(base).repeated(),
1845        |left, right| Expr::BinaryOp {
1846            left: Box::new(left),
1847            op: BinaryOp::And,
1848            right: Box::new(right),
1849        },
1850    );
1851
1852    // Then chain with || (lower precedence)
1853    and_expr
1854        .clone()
1855        .foldl(
1856            just(Token::Or).ignore_then(and_expr).repeated(),
1857            |left, right| Expr::BinaryOp {
1858                left: Box::new(left),
1859                op: BinaryOp::Or,
1860                right: Box::new(right),
1861            },
1862        )
1863        .labelled("condition")
1864        .boxed()
1865}
1866
1867/// Expression parser - supports && and || binary operators.
1868fn expr_parser<'tokens, I>(
1869) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1870where
1871    I: ValueInput<'tokens, Token = Token, Span = Span>,
1872{
1873    // For now, just primary expressions. Can extend for && / || later if needed.
1874    primary_expr_parser()
1875}
1876
1877/// Primary expression: literal, variable reference, command substitution, or bare identifier.
1878///
1879/// Uses `recursive` to support nested command substitution like `$(echo $(date))`.
1880fn primary_expr_parser<'tokens, I>(
1881) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1882where
1883    I: ValueInput<'tokens, Token = Token, Span = Span>,
1884{
1885    // Positional parameters: $0-$9, $@, $#, ${#VAR}, $?, $$
1886    let positional = select! {
1887        Token::Positional(n) => Expr::Positional(n),
1888        Token::AllArgs => Expr::AllArgs,
1889        Token::ArgCount => Expr::ArgCount,
1890        Token::VarLength(name) => Expr::VarLength(name),
1891        Token::LastExitCode => Expr::LastExitCode,
1892        Token::CurrentPid => Expr::CurrentPid,
1893    };
1894
1895    // Arithmetic expression: $((expr)) - preprocessed into Arithmetic token
1896    let arithmetic = select! {
1897        Token::Arithmetic(expr_str) => Expr::Arithmetic(expr_str),
1898    };
1899
1900    // Keywords that can also be used as barewords in argument position
1901    // (e.g., `echo done` should work even though `done` is a keyword)
1902    let keyword_as_bareword = select! {
1903        Token::Done => "done",
1904        Token::Fi => "fi",
1905        Token::Then => "then",
1906        Token::Else => "else",
1907        Token::Elif => "elif",
1908        Token::In => "in",
1909        Token::Do => "do",
1910        Token::Esac => "esac",
1911        // `set` in argument position is the literal word (`echo set`,
1912        // `kaish-output-limit set 1K`); the `set` *builtin* is only matched
1913        // when `Token::Set` leads a statement (see `set_command`), so this
1914        // arm never shadows it.
1915        Token::Set => "set",
1916    }
1917    .map(|s| Expr::Literal(Value::String(s.to_string())));
1918
1919    // Bare words starting with + or - (e.g., date +%s, cat -)
1920    let plus_minus_bare = select! {
1921        Token::PlusBare(s) => Expr::Literal(Value::String(s)),
1922        Token::MinusBare(s) => Expr::Literal(Value::String(s)),
1923        Token::MinusAlone => Expr::Literal(Value::String("-".to_string())),
1924    };
1925
1926    // Glob patterns: merged GlobWord tokens and bare Star/Question
1927    let glob_pattern = select! {
1928        Token::GlobWord(s) => Expr::GlobPattern(s),
1929        Token::Star => Expr::GlobPattern("*".to_string()),
1930        Token::Question => Expr::GlobPattern("?".to_string()),
1931    };
1932
1933    recursive(|expr| {
1934        choice((
1935            positional,
1936            arithmetic,
1937            cmd_subst_parser(expr.clone()),
1938            var_expr_parser(),
1939            interpolated_string_parser(),
1940            literal_parser().map(Expr::Literal),
1941            // Glob patterns before ident (GlobWord is more specific)
1942            glob_pattern,
1943            // Bare identifiers become string literals (shell barewords)
1944            ident_parser().map(|s| Expr::Literal(Value::String(s))),
1945            // Absolute paths become string literals
1946            path_parser().map(|s| Expr::Literal(Value::String(s))),
1947            // Bare words starting with + or - (date +%s, cat -)
1948            // Shell navigation tokens
1949            select! {
1950                // Bare `.` in argument/expression position is the literal
1951                // current-directory path (`find .`, `ls .`, `echo .`). The
1952                // `source` alias is unaffected: `command_parser` consumes a
1953                // *leading* `.` as the command name before args are parsed,
1954                // so only a `.` that follows a command reaches here.
1955                Token::Dot => Expr::Literal(Value::String(".".into())),
1956                Token::DotDot => Expr::Literal(Value::String("..".into())),
1957                Token::Tilde => Expr::Literal(Value::String("~".into())),
1958                Token::TildePath(s) => Expr::Literal(Value::String(s)),
1959                Token::RelativePath(s) => Expr::Literal(Value::String(s)),
1960                Token::DotSlashPath(s) => Expr::Literal(Value::String(s)),
1961                // Digit-leading bareword (SHA prefix `019dda1c`, UUIDs).
1962                Token::NumberIdent(s) => Expr::Literal(Value::String(s)),
1963                // Dot-prefixed bareword (`.gitignore`, `.parent`, `.parent.parent`).
1964                // Distinct from `Token::Dot` (the source alias), which only
1965                // matches a bare `.` and requires whitespace before its file
1966                // argument.
1967                Token::DottedIdent(s) => Expr::Literal(Value::String(s)),
1968                // Job specifier `%1` for wait/kill — flows as the literal
1969                // string "%1"; the builtins interpret the leading `%`.
1970                Token::JobSpec(s) => Expr::Literal(Value::String(s)),
1971            },
1972            plus_minus_bare,
1973            // Keywords can be used as barewords in argument position
1974            keyword_as_bareword,
1975        ))
1976        .labelled("expression")
1977    })
1978    .boxed()
1979}
1980
1981/// Variable reference: `${VAR}`, `${VAR.field}`, `${VAR:-default}`, or `$VAR` (simple form).
1982/// Returns Expr directly to support both VarRef and VarWithDefault.
1983fn var_expr_parser<'tokens, I>(
1984) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1985where
1986    I: ValueInput<'tokens, Token = Token, Span = Span>,
1987{
1988    select! {
1989        Token::VarRef(raw) => parse_var_expr(&raw),
1990        Token::SimpleVarRef(name) => Expr::VarRef(VarPath::simple(name)),
1991    }
1992    .labelled("variable reference")
1993}
1994
1995/// Command substitution: `$(pipeline)` - runs a pipeline and returns its result.
1996///
1997/// Accepts a recursive expression parser to support nested command substitution.
1998fn cmd_subst_parser<'tokens, I, E>(
1999    expr: E,
2000) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2001where
2002    I: ValueInput<'tokens, Token = Token, Span = Span>,
2003    E: Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone,
2004{
2005    // Argument parser using the recursive expression parser
2006    // Long flag with value: --name=value
2007    let long_flag_with_value = select! {
2008        Token::LongFlag(name) => name,
2009    }
2010    .then_ignore(just(Token::Eq))
2011    .then(expr.clone())
2012    .map(|(key, value)| Arg::Named { key, value });
2013
2014    // Boolean long flag: --name
2015    let long_flag = select! {
2016        Token::LongFlag(name) => Arg::LongFlag(name),
2017    };
2018
2019    // Boolean short flag: -x
2020    let short_flag = select! {
2021        Token::ShortFlag(name) => Arg::ShortFlag(name),
2022    };
2023
2024    // Shell assignment in argv position: name=value (see arg_before_double_dash_parser).
2025    let named = ident_parser()
2026        .then_ignore(just(Token::Eq))
2027        .then(expr.clone())
2028        .map(|(key, value)| Arg::WordAssign { key, value });
2029
2030    // Positional argument
2031    let positional = expr.map(Arg::Positional);
2032
2033    let arg = choice((
2034        long_flag_with_value,
2035        long_flag,
2036        short_flag,
2037        named,
2038        positional,
2039    ));
2040
2041    // Command name parser - accepts identifiers and boolean keywords (true/false are builtins)
2042    let command_name = choice((
2043        ident_parser(),
2044        just(Token::True).to("true".to_string()),
2045        just(Token::False).to("false".to_string()),
2046    ));
2047
2048    // Command parser
2049    let command = command_name
2050        .then(arg.repeated().collect::<Vec<_>>())
2051        .map(|(name, args)| Command {
2052            name,
2053            args,
2054            redirects: vec![],
2055        });
2056
2057    // Pipeline parser
2058    let pipeline = command
2059        .separated_by(just(Token::Pipe))
2060        .at_least(1)
2061        .collect::<Vec<_>>()
2062        .map(|commands| Pipeline {
2063            commands,
2064            background: false,
2065        });
2066
2067    just(Token::CmdSubstStart)
2068        .ignore_then(pipeline)
2069        .then_ignore(just(Token::RParen))
2070        .map(|pipeline| Expr::CommandSubst(Box::new(pipeline)))
2071        .labelled("command substitution")
2072}
2073
2074/// String parser - handles double-quoted strings (with interpolation) and single-quoted (literal).
2075fn interpolated_string_parser<'tokens, I>(
2076) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2077where
2078    I: ValueInput<'tokens, Token = Token, Span = Span>,
2079{
2080    // Double-quoted string: may contain $VAR or ${VAR} interpolation
2081    let double_quoted = select! {
2082        Token::String(s) => s,
2083    }
2084    .map(|s| {
2085        // Check if string contains interpolation markers (${} or $NAME) or escaped dollars
2086        if s.contains('$') || s.contains("__KAISH_ESCAPED_DOLLAR__") {
2087            // Parse interpolated parts
2088            let parts = parse_interpolated_string(&s);
2089            if parts.len() == 1
2090                && let StringPart::Literal(text) = &parts[0] {
2091                    return Expr::Literal(Value::String(text.clone()));
2092                }
2093            Expr::Interpolated(parts)
2094        } else {
2095            Expr::Literal(Value::String(s))
2096        }
2097    });
2098
2099    // Single-quoted string: literal, no interpolation
2100    let single_quoted = select! {
2101        Token::SingleString(s) => Expr::Literal(Value::String(s)),
2102    };
2103
2104    choice((single_quoted, double_quoted)).labelled("string")
2105}
2106
2107/// Literal value parser (excluding strings, which are handled by interpolated_string_parser).
2108fn literal_parser<'tokens, I>(
2109) -> impl Parser<'tokens, I, Value, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2110where
2111    I: ValueInput<'tokens, Token = Token, Span = Span>,
2112{
2113    choice((
2114        select! {
2115            Token::True => Value::Bool(true),
2116            Token::False => Value::Bool(false),
2117        },
2118        select! {
2119            Token::Int(n) => Value::Int(n),
2120            Token::Float(f) => Value::Float(f),
2121        },
2122    ))
2123    .labelled("literal")
2124    .boxed()
2125}
2126
2127/// Identifier parser.
2128fn ident_parser<'tokens, I>(
2129) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2130where
2131    I: ValueInput<'tokens, Token = Token, Span = Span>,
2132{
2133    select! {
2134        Token::Ident(s) => s,
2135    }
2136    .labelled("identifier")
2137}
2138
2139/// Path parser: matches absolute paths like `/tmp/out`, `/etc/hosts`.
2140fn path_parser<'tokens, I>(
2141) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2142where
2143    I: ValueInput<'tokens, Token = Token, Span = Span>,
2144{
2145    select! {
2146        Token::Path(s) => s,
2147    }
2148    .labelled("path")
2149}
2150
2151#[cfg(test)]
2152mod tests {
2153    use super::*;
2154
2155    #[test]
2156    fn parse_empty() {
2157        let result = parse("");
2158        assert!(result.is_ok());
2159        assert_eq!(result.expect("ok").statements.len(), 0);
2160    }
2161
2162    #[test]
2163    fn parse_newlines_only() {
2164        let result = parse("\n\n\n");
2165        assert!(result.is_ok());
2166    }
2167
2168    #[test]
2169    fn parse_simple_command() {
2170        let result = parse("echo");
2171        assert!(result.is_ok());
2172        let program = result.expect("ok");
2173        assert_eq!(program.statements.len(), 1);
2174        assert!(matches!(&program.statements[0], Stmt::Command(_)));
2175    }
2176
2177    #[test]
2178    fn parse_command_with_string_arg() {
2179        let result = parse(r#"echo "hello""#);
2180        assert!(result.is_ok());
2181        let program = result.expect("ok");
2182        match &program.statements[0] {
2183            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 1),
2184            _ => panic!("expected Command"),
2185        }
2186    }
2187
2188    #[test]
2189    fn parse_assignment() {
2190        let result = parse("X=5");
2191        assert!(result.is_ok());
2192        let program = result.expect("ok");
2193        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
2194    }
2195
2196    #[test]
2197    fn parse_pipeline() {
2198        let result = parse("a | b | c");
2199        assert!(result.is_ok());
2200        let program = result.expect("ok");
2201        match &program.statements[0] {
2202            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2203            _ => panic!("expected Pipeline"),
2204        }
2205    }
2206
2207    #[test]
2208    fn parse_background_job() {
2209        let result = parse("cmd &");
2210        assert!(result.is_ok());
2211        let program = result.expect("ok");
2212        match &program.statements[0] {
2213            Stmt::Pipeline(p) => assert!(p.background),
2214            _ => panic!("expected Pipeline with background"),
2215        }
2216    }
2217
2218    #[test]
2219    fn parse_if_simple() {
2220        let result = parse("if true; then echo; fi");
2221        assert!(result.is_ok());
2222        let program = result.expect("ok");
2223        assert!(matches!(&program.statements[0], Stmt::If(_)));
2224    }
2225
2226    #[test]
2227    fn parse_if_else() {
2228        let result = parse("if true; then echo; else echo; fi");
2229        assert!(result.is_ok());
2230        let program = result.expect("ok");
2231        match &program.statements[0] {
2232            Stmt::If(if_stmt) => assert!(if_stmt.else_branch.is_some()),
2233            _ => panic!("expected If"),
2234        }
2235    }
2236
2237    #[test]
2238    fn parse_elif_simple() {
2239        let result = parse("if true; then echo a; elif false; then echo b; fi");
2240        assert!(result.is_ok(), "parse failed: {:?}", result);
2241        let program = result.expect("ok");
2242        match &program.statements[0] {
2243            Stmt::If(if_stmt) => {
2244                // elif is desugared to nested if in else
2245                assert!(if_stmt.else_branch.is_some());
2246                let else_branch = if_stmt.else_branch.as_ref().unwrap();
2247                assert_eq!(else_branch.len(), 1);
2248                assert!(matches!(&else_branch[0], Stmt::If(_)));
2249            }
2250            _ => panic!("expected If"),
2251        }
2252    }
2253
2254    #[test]
2255    fn parse_elif_with_else() {
2256        let result = parse("if true; then echo a; elif false; then echo b; else echo c; fi");
2257        assert!(result.is_ok(), "parse failed: {:?}", result);
2258        let program = result.expect("ok");
2259        match &program.statements[0] {
2260            Stmt::If(outer_if) => {
2261                // Check nested structure: if -> elif -> else
2262                let else_branch = outer_if.else_branch.as_ref().expect("outer else");
2263                assert_eq!(else_branch.len(), 1);
2264                match &else_branch[0] {
2265                    Stmt::If(inner_if) => {
2266                        // The inner if (from elif) should have the final else
2267                        assert!(inner_if.else_branch.is_some());
2268                    }
2269                    _ => panic!("expected nested If from elif"),
2270                }
2271            }
2272            _ => panic!("expected If"),
2273        }
2274    }
2275
2276    #[test]
2277    fn parse_multiple_elif() {
2278        // Shell-compatible: use [[ ]] for comparisons
2279        let result = parse(
2280            "if [[ ${X} == 1 ]]; then echo one; elif [[ ${X} == 2 ]]; then echo two; elif [[ ${X} == 3 ]]; then echo three; else echo other; fi",
2281        );
2282        assert!(result.is_ok(), "parse failed: {:?}", result);
2283    }
2284
2285    #[test]
2286    fn parse_for_loop() {
2287        let result = parse("for X in items; do echo; done");
2288        assert!(result.is_ok());
2289        let program = result.expect("ok");
2290        assert!(matches!(&program.statements[0], Stmt::For(_)));
2291    }
2292
2293    #[test]
2294    fn parse_brackets_not_array_literal() {
2295        // Array literals are no longer supported, [ is just a regular char
2296        let result = parse("cmd [1");
2297        // This should fail or parse unexpectedly - arrays are removed
2298        // Just verify we don't crash
2299        let _ = result;
2300    }
2301
2302    #[test]
2303    fn parse_named_arg() {
2304        // Bareword key=value parses as WordAssign — the kernel decides per
2305        // command whether to route it to tool_args.named (export/alias) or
2306        // stringify to a positional (every other builtin).
2307        let result = parse("cmd foo=5");
2308        assert!(result.is_ok());
2309        let program = result.expect("ok");
2310        match &program.statements[0] {
2311            Stmt::Command(cmd) => {
2312                assert_eq!(cmd.args.len(), 1);
2313                assert!(matches!(&cmd.args[0], Arg::WordAssign { .. }));
2314            }
2315            _ => panic!("expected Command"),
2316        }
2317    }
2318
2319    #[test]
2320    fn parse_short_flag() {
2321        let result = parse("ls -l");
2322        assert!(result.is_ok());
2323        let program = result.expect("ok");
2324        match &program.statements[0] {
2325            Stmt::Command(cmd) => {
2326                assert_eq!(cmd.name, "ls");
2327                assert_eq!(cmd.args.len(), 1);
2328                match &cmd.args[0] {
2329                    Arg::ShortFlag(name) => assert_eq!(name, "l"),
2330                    _ => panic!("expected ShortFlag"),
2331                }
2332            }
2333            _ => panic!("expected Command"),
2334        }
2335    }
2336
2337    #[test]
2338    fn parse_long_flag() {
2339        let result = parse("git push --force");
2340        assert!(result.is_ok());
2341        let program = result.expect("ok");
2342        match &program.statements[0] {
2343            Stmt::Command(cmd) => {
2344                assert_eq!(cmd.name, "git");
2345                assert_eq!(cmd.args.len(), 2);
2346                match &cmd.args[0] {
2347                    Arg::Positional(Expr::Literal(Value::String(s))) => assert_eq!(s, "push"),
2348                    _ => panic!("expected Positional push"),
2349                }
2350                match &cmd.args[1] {
2351                    Arg::LongFlag(name) => assert_eq!(name, "force"),
2352                    _ => panic!("expected LongFlag"),
2353                }
2354            }
2355            _ => panic!("expected Command"),
2356        }
2357    }
2358
2359    #[test]
2360    fn parse_long_flag_with_value() {
2361        let result = parse(r#"git commit --message="hello""#);
2362        assert!(result.is_ok());
2363        let program = result.expect("ok");
2364        match &program.statements[0] {
2365            Stmt::Command(cmd) => {
2366                assert_eq!(cmd.name, "git");
2367                assert_eq!(cmd.args.len(), 2);
2368                match &cmd.args[1] {
2369                    Arg::Named { key, value } => {
2370                        assert_eq!(key, "message");
2371                        match value {
2372                            Expr::Literal(Value::String(s)) => assert_eq!(s, "hello"),
2373                            _ => panic!("expected String value"),
2374                        }
2375                    }
2376                    _ => panic!("expected Named from --flag=value"),
2377                }
2378            }
2379            _ => panic!("expected Command"),
2380        }
2381    }
2382
2383    #[test]
2384    fn parse_mixed_flags_and_args() {
2385        let result = parse(r#"git commit -m "message" --amend"#);
2386        assert!(result.is_ok());
2387        let program = result.expect("ok");
2388        match &program.statements[0] {
2389            Stmt::Command(cmd) => {
2390                assert_eq!(cmd.name, "git");
2391                assert_eq!(cmd.args.len(), 4);
2392                // commit (positional)
2393                assert!(matches!(&cmd.args[0], Arg::Positional(_)));
2394                // -m (short flag)
2395                match &cmd.args[1] {
2396                    Arg::ShortFlag(name) => assert_eq!(name, "m"),
2397                    _ => panic!("expected ShortFlag -m"),
2398                }
2399                // "message" (positional)
2400                assert!(matches!(&cmd.args[2], Arg::Positional(_)));
2401                // --amend (long flag)
2402                match &cmd.args[3] {
2403                    Arg::LongFlag(name) => assert_eq!(name, "amend"),
2404                    _ => panic!("expected LongFlag --amend"),
2405                }
2406            }
2407            _ => panic!("expected Command"),
2408        }
2409    }
2410
2411    #[test]
2412    fn parse_redirect_stdout() {
2413        let result = parse("cmd > file");
2414        assert!(result.is_ok());
2415        let program = result.expect("ok");
2416        // Commands with redirects stay as Pipeline, not Command
2417        match &program.statements[0] {
2418            Stmt::Pipeline(p) => {
2419                assert_eq!(p.commands.len(), 1);
2420                let cmd = &p.commands[0];
2421                assert_eq!(cmd.redirects.len(), 1);
2422                assert!(matches!(cmd.redirects[0].kind, RedirectKind::StdoutOverwrite));
2423            }
2424            _ => panic!("expected Pipeline"),
2425        }
2426    }
2427
2428    #[test]
2429    fn parse_var_ref() {
2430        let result = parse("echo ${VAR}");
2431        assert!(result.is_ok());
2432        let program = result.expect("ok");
2433        match &program.statements[0] {
2434            Stmt::Command(cmd) => {
2435                assert_eq!(cmd.args.len(), 1);
2436                assert!(matches!(&cmd.args[0], Arg::Positional(Expr::VarRef(_))));
2437            }
2438            _ => panic!("expected Command"),
2439        }
2440    }
2441
2442    #[test]
2443    fn parse_multiple_statements() {
2444        let result = parse("a\nb\nc");
2445        assert!(result.is_ok());
2446        let program = result.expect("ok");
2447        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2448        assert_eq!(non_empty.len(), 3);
2449    }
2450
2451    #[test]
2452    fn parse_semicolon_separated() {
2453        let result = parse("a; b; c");
2454        assert!(result.is_ok());
2455        let program = result.expect("ok");
2456        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2457        assert_eq!(non_empty.len(), 3);
2458    }
2459
2460    #[test]
2461    fn parse_complex_pipeline() {
2462        let result = parse(r#"cat file | grep pattern="foo" | head count=10"#);
2463        assert!(result.is_ok());
2464        let program = result.expect("ok");
2465        match &program.statements[0] {
2466            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2467            _ => panic!("expected Pipeline"),
2468        }
2469    }
2470
2471    #[test]
2472    fn parse_json_as_string_arg() {
2473        // JSON arrays/objects should be passed as string arguments
2474        let result = parse(r#"cmd '[[1, 2], [3, 4]]'"#);
2475        assert!(result.is_ok());
2476    }
2477
2478    #[test]
2479    fn parse_mixed_args() {
2480        let result = parse(r#"cmd pos1 key="val" pos2 num=42"#);
2481        assert!(result.is_ok());
2482        let program = result.expect("ok");
2483        match &program.statements[0] {
2484            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 4),
2485            _ => panic!("expected Command"),
2486        }
2487    }
2488
2489    #[test]
2490    fn error_unterminated_string() {
2491        let result = parse(r#"echo "hello"#);
2492        assert!(result.is_err());
2493    }
2494
2495    #[test]
2496    fn error_unterminated_var_ref() {
2497        let result = parse("echo ${VAR");
2498        assert!(result.is_err());
2499    }
2500
2501    #[test]
2502    fn error_missing_fi() {
2503        let result = parse("if true; then echo");
2504        assert!(result.is_err());
2505    }
2506
2507    #[test]
2508    fn error_missing_done() {
2509        let result = parse("for X in items; do echo");
2510        assert!(result.is_err());
2511    }
2512
2513    #[test]
2514    fn parse_nested_cmd_subst() {
2515        // Nested command substitution is supported
2516        let result = parse("X=$(echo $(date))").unwrap();
2517        match &result.statements[0] {
2518            Stmt::Assignment(a) => {
2519                assert_eq!(a.name, "X");
2520                match &a.value {
2521                    Expr::CommandSubst(outer) => {
2522                        assert_eq!(outer.commands[0].name, "echo");
2523                        // The argument should be another command substitution
2524                        match &outer.commands[0].args[0] {
2525                            Arg::Positional(Expr::CommandSubst(inner)) => {
2526                                assert_eq!(inner.commands[0].name, "date");
2527                            }
2528                            other => panic!("expected nested cmd subst, got {:?}", other),
2529                        }
2530                    }
2531                    other => panic!("expected cmd subst, got {:?}", other),
2532                }
2533            }
2534            other => panic!("expected assignment, got {:?}", other),
2535        }
2536    }
2537
2538    #[test]
2539    fn parse_deeply_nested_cmd_subst() {
2540        // Three levels deep
2541        let result = parse("X=$(a $(b $(c)))").unwrap();
2542        match &result.statements[0] {
2543            Stmt::Assignment(a) => match &a.value {
2544                Expr::CommandSubst(level1) => {
2545                    assert_eq!(level1.commands[0].name, "a");
2546                    match &level1.commands[0].args[0] {
2547                        Arg::Positional(Expr::CommandSubst(level2)) => {
2548                            assert_eq!(level2.commands[0].name, "b");
2549                            match &level2.commands[0].args[0] {
2550                                Arg::Positional(Expr::CommandSubst(level3)) => {
2551                                    assert_eq!(level3.commands[0].name, "c");
2552                                }
2553                                other => panic!("expected level3 cmd subst, got {:?}", other),
2554                            }
2555                        }
2556                        other => panic!("expected level2 cmd subst, got {:?}", other),
2557                    }
2558                }
2559                other => panic!("expected cmd subst, got {:?}", other),
2560            },
2561            other => panic!("expected assignment, got {:?}", other),
2562        }
2563    }
2564
2565    // ═══════════════════════════════════════════════════════════════════════════
2566    // Value Preservation Tests - These test that actual values are captured
2567    // ═══════════════════════════════════════════════════════════════════════════
2568
2569    #[test]
2570    fn value_int_preserved() {
2571        let result = parse("X=42").unwrap();
2572        match &result.statements[0] {
2573            Stmt::Assignment(a) => {
2574                assert_eq!(a.name, "X");
2575                match &a.value {
2576                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2577                    other => panic!("expected int literal, got {:?}", other),
2578                }
2579            }
2580            other => panic!("expected assignment, got {:?}", other),
2581        }
2582    }
2583
2584    #[test]
2585    fn value_negative_int_preserved() {
2586        let result = parse("X=-99").unwrap();
2587        match &result.statements[0] {
2588            Stmt::Assignment(a) => match &a.value {
2589                Expr::Literal(Value::Int(n)) => assert_eq!(*n, -99),
2590                other => panic!("expected int, got {:?}", other),
2591            },
2592            other => panic!("expected assignment, got {:?}", other),
2593        }
2594    }
2595
2596    #[test]
2597    fn value_float_preserved() {
2598        let result = parse("PI=3.14").unwrap();
2599        match &result.statements[0] {
2600            Stmt::Assignment(a) => match &a.value {
2601                Expr::Literal(Value::Float(f)) => assert!((*f - 3.14).abs() < 0.001),
2602                other => panic!("expected float, got {:?}", other),
2603            },
2604            other => panic!("expected assignment, got {:?}", other),
2605        }
2606    }
2607
2608    #[test]
2609    fn value_string_preserved() {
2610        let result = parse(r#"echo "hello world""#).unwrap();
2611        match &result.statements[0] {
2612            Stmt::Command(cmd) => {
2613                assert_eq!(cmd.name, "echo");
2614                match &cmd.args[0] {
2615                    Arg::Positional(Expr::Literal(Value::String(s))) => {
2616                        assert_eq!(s, "hello world");
2617                    }
2618                    other => panic!("expected string arg, got {:?}", other),
2619                }
2620            }
2621            other => panic!("expected command, got {:?}", other),
2622        }
2623    }
2624
2625    #[test]
2626    fn value_string_with_escapes_preserved() {
2627        let result = parse(r#"echo "line1\nline2""#).unwrap();
2628        match &result.statements[0] {
2629            Stmt::Command(cmd) => match &cmd.args[0] {
2630                Arg::Positional(Expr::Literal(Value::String(s))) => {
2631                    assert_eq!(s, "line1\nline2");
2632                }
2633                other => panic!("expected string, got {:?}", other),
2634            },
2635            other => panic!("expected command, got {:?}", other),
2636        }
2637    }
2638
2639    #[test]
2640    fn value_command_name_preserved() {
2641        let result = parse("my-command").unwrap();
2642        match &result.statements[0] {
2643            Stmt::Command(cmd) => assert_eq!(cmd.name, "my-command"),
2644            other => panic!("expected command, got {:?}", other),
2645        }
2646    }
2647
2648    #[test]
2649    fn value_assignment_name_preserved() {
2650        let result = parse("MY_VAR=1").unwrap();
2651        match &result.statements[0] {
2652            Stmt::Assignment(a) => assert_eq!(a.name, "MY_VAR"),
2653            other => panic!("expected assignment, got {:?}", other),
2654        }
2655    }
2656
2657    #[test]
2658    fn value_for_variable_preserved() {
2659        let result = parse("for ITEM in items; do echo; done").unwrap();
2660        match &result.statements[0] {
2661            Stmt::For(f) => assert_eq!(f.variable, "ITEM"),
2662            other => panic!("expected for, got {:?}", other),
2663        }
2664    }
2665
2666    #[test]
2667    fn value_varref_name_preserved() {
2668        let result = parse("echo ${MESSAGE}").unwrap();
2669        match &result.statements[0] {
2670            Stmt::Command(cmd) => match &cmd.args[0] {
2671                Arg::Positional(Expr::VarRef(path)) => {
2672                    assert_eq!(path.segments.len(), 1);
2673                    let VarSegment::Field(name) = &path.segments[0];
2674                    assert_eq!(name, "MESSAGE");
2675                }
2676                other => panic!("expected varref, got {:?}", other),
2677            },
2678            other => panic!("expected command, got {:?}", other),
2679        }
2680    }
2681
2682    #[test]
2683    fn value_varref_field_access_preserved() {
2684        let result = parse("echo ${RESULT.data}").unwrap();
2685        match &result.statements[0] {
2686            Stmt::Command(cmd) => match &cmd.args[0] {
2687                Arg::Positional(Expr::VarRef(path)) => {
2688                    assert_eq!(path.segments.len(), 2);
2689                    let VarSegment::Field(a) = &path.segments[0];
2690                    let VarSegment::Field(b) = &path.segments[1];
2691                    assert_eq!(a, "RESULT");
2692                    assert_eq!(b, "data");
2693                }
2694                other => panic!("expected varref, got {:?}", other),
2695            },
2696            other => panic!("expected command, got {:?}", other),
2697        }
2698    }
2699
2700    #[test]
2701    fn value_varref_index_ignored() {
2702        // Index segments are no longer supported - they're filtered out by parse_varpath
2703        let result = parse("echo ${ITEMS[0]}").unwrap();
2704        match &result.statements[0] {
2705            Stmt::Command(cmd) => match &cmd.args[0] {
2706                Arg::Positional(Expr::VarRef(path)) => {
2707                    // Index segment [0] is skipped, only ITEMS remains
2708                    assert_eq!(path.segments.len(), 1);
2709                    let VarSegment::Field(name) = &path.segments[0];
2710                    assert_eq!(name, "ITEMS");
2711                }
2712                other => panic!("expected varref, got {:?}", other),
2713            },
2714            other => panic!("expected command, got {:?}", other),
2715        }
2716    }
2717
2718    #[test]
2719    fn value_named_arg_preserved() {
2720        // Bareword key=value parses as WordAssign — the kernel decides per
2721        // command whether to route into args.named (export/alias) or
2722        // stringify as a positional.
2723        let result = parse("cmd count=42").unwrap();
2724        match &result.statements[0] {
2725            Stmt::Command(cmd) => {
2726                assert_eq!(cmd.name, "cmd");
2727                match &cmd.args[0] {
2728                    Arg::WordAssign { key, value } => {
2729                        assert_eq!(key, "count");
2730                        match value {
2731                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2732                            other => panic!("expected int, got {:?}", other),
2733                        }
2734                    }
2735                    other => panic!("expected WordAssign arg, got {:?}", other),
2736                }
2737            }
2738            other => panic!("expected command, got {:?}", other),
2739        }
2740    }
2741
2742    #[test]
2743    fn value_function_def_name_preserved() {
2744        let result = parse("greet() { echo }").unwrap();
2745        match &result.statements[0] {
2746            Stmt::ToolDef(t) => {
2747                assert_eq!(t.name, "greet");
2748                assert!(t.params.is_empty());
2749            }
2750            other => panic!("expected function def, got {:?}", other),
2751        }
2752    }
2753
2754    // ═══════════════════════════════════════════════════════════════════════════
2755    // New Feature Tests - Comparisons, Interpolation, Nested Structures
2756    // ═══════════════════════════════════════════════════════════════════════════
2757
2758    #[test]
2759    fn parse_comparison_equals() {
2760        // Shell-compatible: use [[ ]] for comparisons
2761        let result = parse("if [[ ${X} == 5 ]]; then echo; fi").unwrap();
2762        match &result.statements[0] {
2763            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2764                Expr::Test(test) => match test.as_ref() {
2765                    TestExpr::Comparison { left, op, right } => {
2766                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2767                        assert_eq!(*op, TestCmpOp::Eq);
2768                        match right.as_ref() {
2769                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 5),
2770                            other => panic!("expected int, got {:?}", other),
2771                        }
2772                    }
2773                    other => panic!("expected comparison, got {:?}", other),
2774                },
2775                other => panic!("expected test expr, got {:?}", other),
2776            },
2777            other => panic!("expected if, got {:?}", other),
2778        }
2779    }
2780
2781    #[test]
2782    fn parse_comparison_not_equals() {
2783        let result = parse("if [[ ${X} != 0 ]]; then echo; fi").unwrap();
2784        match &result.statements[0] {
2785            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2786                Expr::Test(test) => match test.as_ref() {
2787                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotEq),
2788                    other => panic!("expected comparison, got {:?}", other),
2789                },
2790                other => panic!("expected test expr, got {:?}", other),
2791            },
2792            other => panic!("expected if, got {:?}", other),
2793        }
2794    }
2795
2796    #[test]
2797    fn parse_comparison_less_than() {
2798        let result = parse("if [[ ${COUNT} -lt 10 ]]; then echo; fi").unwrap();
2799        match &result.statements[0] {
2800            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2801                Expr::Test(test) => match test.as_ref() {
2802                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLt),
2803                    other => panic!("expected comparison, got {:?}", other),
2804                },
2805                other => panic!("expected test expr, got {:?}", other),
2806            },
2807            other => panic!("expected if, got {:?}", other),
2808        }
2809    }
2810
2811    #[test]
2812    fn parse_comparison_greater_than() {
2813        let result = parse("if [[ ${COUNT} -gt 0 ]]; then echo; fi").unwrap();
2814        match &result.statements[0] {
2815            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2816                Expr::Test(test) => match test.as_ref() {
2817                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGt),
2818                    other => panic!("expected comparison, got {:?}", other),
2819                },
2820                other => panic!("expected test expr, got {:?}", other),
2821            },
2822            other => panic!("expected if, got {:?}", other),
2823        }
2824    }
2825
2826    #[test]
2827    fn parse_comparison_less_equal() {
2828        let result = parse("if [[ ${X} -le 100 ]]; then echo; fi").unwrap();
2829        match &result.statements[0] {
2830            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2831                Expr::Test(test) => match test.as_ref() {
2832                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLtEq),
2833                    other => panic!("expected comparison, got {:?}", other),
2834                },
2835                other => panic!("expected test expr, got {:?}", other),
2836            },
2837            other => panic!("expected if, got {:?}", other),
2838        }
2839    }
2840
2841    #[test]
2842    fn parse_comparison_greater_equal() {
2843        let result = parse("if [[ ${X} -ge 1 ]]; then echo; fi").unwrap();
2844        match &result.statements[0] {
2845            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2846                Expr::Test(test) => match test.as_ref() {
2847                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGtEq),
2848                    other => panic!("expected comparison, got {:?}", other),
2849                },
2850                other => panic!("expected test expr, got {:?}", other),
2851            },
2852            other => panic!("expected if, got {:?}", other),
2853        }
2854    }
2855
2856    #[test]
2857    fn parse_regex_match() {
2858        let result = parse(r#"if [[ ${NAME} =~ "^test" ]]; then echo; fi"#).unwrap();
2859        match &result.statements[0] {
2860            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2861                Expr::Test(test) => match test.as_ref() {
2862                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Match),
2863                    other => panic!("expected comparison, got {:?}", other),
2864                },
2865                other => panic!("expected test expr, got {:?}", other),
2866            },
2867            other => panic!("expected if, got {:?}", other),
2868        }
2869    }
2870
2871    #[test]
2872    fn parse_regex_not_match() {
2873        let result = parse(r#"if [[ ${NAME} !~ "^test" ]]; then echo; fi"#).unwrap();
2874        match &result.statements[0] {
2875            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2876                Expr::Test(test) => match test.as_ref() {
2877                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotMatch),
2878                    other => panic!("expected comparison, got {:?}", other),
2879                },
2880                other => panic!("expected test expr, got {:?}", other),
2881            },
2882            other => panic!("expected if, got {:?}", other),
2883        }
2884    }
2885
2886    #[test]
2887    fn parse_string_interpolation() {
2888        let result = parse(r#"echo "Hello ${NAME}!""#).unwrap();
2889        match &result.statements[0] {
2890            Stmt::Command(cmd) => match &cmd.args[0] {
2891                Arg::Positional(Expr::Interpolated(parts)) => {
2892                    assert_eq!(parts.len(), 3);
2893                    match &parts[0] {
2894                        StringPart::Literal(s) => assert_eq!(s, "Hello "),
2895                        other => panic!("expected literal, got {:?}", other),
2896                    }
2897                    match &parts[1] {
2898                        StringPart::Var(path) => {
2899                            assert_eq!(path.segments.len(), 1);
2900                            let VarSegment::Field(name) = &path.segments[0];
2901                            assert_eq!(name, "NAME");
2902                        }
2903                        other => panic!("expected var, got {:?}", other),
2904                    }
2905                    match &parts[2] {
2906                        StringPart::Literal(s) => assert_eq!(s, "!"),
2907                        other => panic!("expected literal, got {:?}", other),
2908                    }
2909                }
2910                other => panic!("expected interpolated, got {:?}", other),
2911            },
2912            other => panic!("expected command, got {:?}", other),
2913        }
2914    }
2915
2916    #[test]
2917    fn parse_string_interpolation_multiple_vars() {
2918        let result = parse(r#"echo "${FIRST} and ${SECOND}""#).unwrap();
2919        match &result.statements[0] {
2920            Stmt::Command(cmd) => match &cmd.args[0] {
2921                Arg::Positional(Expr::Interpolated(parts)) => {
2922                    // ${FIRST} + " and " + ${SECOND} = 3 parts
2923                    assert_eq!(parts.len(), 3);
2924                    assert!(matches!(&parts[0], StringPart::Var(_)));
2925                    assert!(matches!(&parts[1], StringPart::Literal(_)));
2926                    assert!(matches!(&parts[2], StringPart::Var(_)));
2927                }
2928                other => panic!("expected interpolated, got {:?}", other),
2929            },
2930            other => panic!("expected command, got {:?}", other),
2931        }
2932    }
2933
2934    #[test]
2935    fn parse_empty_function_body() {
2936        let result = parse("empty() { }").unwrap();
2937        match &result.statements[0] {
2938            Stmt::ToolDef(t) => {
2939                assert_eq!(t.name, "empty");
2940                assert!(t.params.is_empty());
2941                assert!(t.body.is_empty());
2942            }
2943            other => panic!("expected function def, got {:?}", other),
2944        }
2945    }
2946
2947    #[test]
2948    fn parse_bash_style_function() {
2949        let result = parse("function greet { echo hello }").unwrap();
2950        match &result.statements[0] {
2951            Stmt::ToolDef(t) => {
2952                assert_eq!(t.name, "greet");
2953                assert!(t.params.is_empty());
2954                assert_eq!(t.body.len(), 1);
2955            }
2956            other => panic!("expected function def, got {:?}", other),
2957        }
2958    }
2959
2960    #[test]
2961    fn parse_comparison_string_values() {
2962        let result = parse(r#"if [[ ${STATUS} == "ok" ]]; then echo; fi"#).unwrap();
2963        match &result.statements[0] {
2964            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2965                Expr::Test(test) => match test.as_ref() {
2966                    TestExpr::Comparison { left, op, right } => {
2967                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2968                        assert_eq!(*op, TestCmpOp::Eq);
2969                        match right.as_ref() {
2970                            Expr::Literal(Value::String(s)) => assert_eq!(s, "ok"),
2971                            other => panic!("expected string, got {:?}", other),
2972                        }
2973                    }
2974                    other => panic!("expected comparison, got {:?}", other),
2975                },
2976                other => panic!("expected test expr, got {:?}", other),
2977            },
2978            other => panic!("expected if, got {:?}", other),
2979        }
2980    }
2981
2982    // ═══════════════════════════════════════════════════════════════════════════
2983    // Command Substitution Tests
2984    // ═══════════════════════════════════════════════════════════════════════════
2985
2986    #[test]
2987    fn parse_cmd_subst_simple() {
2988        let result = parse("X=$(echo)").unwrap();
2989        match &result.statements[0] {
2990            Stmt::Assignment(a) => {
2991                assert_eq!(a.name, "X");
2992                match &a.value {
2993                    Expr::CommandSubst(pipeline) => {
2994                        assert_eq!(pipeline.commands.len(), 1);
2995                        assert_eq!(pipeline.commands[0].name, "echo");
2996                    }
2997                    other => panic!("expected command subst, got {:?}", other),
2998                }
2999            }
3000            other => panic!("expected assignment, got {:?}", other),
3001        }
3002    }
3003
3004    #[test]
3005    fn parse_cmd_subst_with_args() {
3006        let result = parse(r#"X=$(fetch url="http://example.com")"#).unwrap();
3007        match &result.statements[0] {
3008            Stmt::Assignment(a) => match &a.value {
3009                Expr::CommandSubst(pipeline) => {
3010                    assert_eq!(pipeline.commands[0].name, "fetch");
3011                    assert_eq!(pipeline.commands[0].args.len(), 1);
3012                    match &pipeline.commands[0].args[0] {
3013                        Arg::WordAssign { key, .. } => assert_eq!(key, "url"),
3014                        other => panic!("expected WordAssign arg, got {:?}", other),
3015                    }
3016                }
3017                other => panic!("expected command subst, got {:?}", other),
3018            },
3019            other => panic!("expected assignment, got {:?}", other),
3020        }
3021    }
3022
3023    #[test]
3024    fn parse_cmd_subst_pipeline() {
3025        let result = parse("X=$(cat file | grep pattern)").unwrap();
3026        match &result.statements[0] {
3027            Stmt::Assignment(a) => match &a.value {
3028                Expr::CommandSubst(pipeline) => {
3029                    assert_eq!(pipeline.commands.len(), 2);
3030                    assert_eq!(pipeline.commands[0].name, "cat");
3031                    assert_eq!(pipeline.commands[1].name, "grep");
3032                }
3033                other => panic!("expected command subst, got {:?}", other),
3034            },
3035            other => panic!("expected assignment, got {:?}", other),
3036        }
3037    }
3038
3039    #[test]
3040    fn parse_cmd_subst_in_condition() {
3041        // Shell-compatible: conditions are commands, not command substitutions
3042        let result = parse("if kaish-validate; then echo; fi").unwrap();
3043        match &result.statements[0] {
3044            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3045                Expr::Command(cmd) => {
3046                    assert_eq!(cmd.name, "kaish-validate");
3047                }
3048                other => panic!("expected command, got {:?}", other),
3049            },
3050            other => panic!("expected if, got {:?}", other),
3051        }
3052    }
3053
3054    #[test]
3055    fn parse_cmd_subst_in_command_arg() {
3056        let result = parse("echo $(whoami)").unwrap();
3057        match &result.statements[0] {
3058            Stmt::Command(cmd) => {
3059                assert_eq!(cmd.name, "echo");
3060                match &cmd.args[0] {
3061                    Arg::Positional(Expr::CommandSubst(pipeline)) => {
3062                        assert_eq!(pipeline.commands[0].name, "whoami");
3063                    }
3064                    other => panic!("expected command subst, got {:?}", other),
3065                }
3066            }
3067            other => panic!("expected command, got {:?}", other),
3068        }
3069    }
3070
3071    // ═══════════════════════════════════════════════════════════════════════════
3072    // Logical Operator Tests (&&, ||)
3073    // ═══════════════════════════════════════════════════════════════════════════
3074
3075    #[test]
3076    fn parse_condition_and() {
3077        // Shell-compatible: commands chained with &&
3078        let result = parse("if check-a && check-b; then echo; fi").unwrap();
3079        match &result.statements[0] {
3080            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3081                Expr::BinaryOp { left, op, right } => {
3082                    assert_eq!(*op, BinaryOp::And);
3083                    assert!(matches!(left.as_ref(), Expr::Command(_)));
3084                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3085                }
3086                other => panic!("expected binary op, got {:?}", other),
3087            },
3088            other => panic!("expected if, got {:?}", other),
3089        }
3090    }
3091
3092    #[test]
3093    fn parse_condition_or() {
3094        let result = parse("if try-a || try-b; then echo; fi").unwrap();
3095        match &result.statements[0] {
3096            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3097                Expr::BinaryOp { left, op, right } => {
3098                    assert_eq!(*op, BinaryOp::Or);
3099                    assert!(matches!(left.as_ref(), Expr::Command(_)));
3100                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3101                }
3102                other => panic!("expected binary op, got {:?}", other),
3103            },
3104            other => panic!("expected if, got {:?}", other),
3105        }
3106    }
3107
3108    #[test]
3109    fn parse_condition_and_or_precedence() {
3110        // a && b || c should parse as (a && b) || c
3111        let result = parse("if cmd-a && cmd-b || cmd-c; then echo; fi").unwrap();
3112        match &result.statements[0] {
3113            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3114                Expr::BinaryOp { left, op, right } => {
3115                    // Top level should be ||
3116                    assert_eq!(*op, BinaryOp::Or);
3117                    // Left side should be && expression
3118                    match left.as_ref() {
3119                        Expr::BinaryOp { op: inner_op, .. } => {
3120                            assert_eq!(*inner_op, BinaryOp::And);
3121                        }
3122                        other => panic!("expected binary op (&&), got {:?}", other),
3123                    }
3124                    // Right side should be command
3125                    assert!(matches!(right.as_ref(), Expr::Command(_)));
3126                }
3127                other => panic!("expected binary op, got {:?}", other),
3128            },
3129            other => panic!("expected if, got {:?}", other),
3130        }
3131    }
3132
3133    #[test]
3134    fn parse_condition_multiple_and() {
3135        let result = parse("if cmd-a && cmd-b && cmd-c; then echo; fi").unwrap();
3136        match &result.statements[0] {
3137            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3138                Expr::BinaryOp { left, op, .. } => {
3139                    assert_eq!(*op, BinaryOp::And);
3140                    // Left side should also be &&
3141                    match left.as_ref() {
3142                        Expr::BinaryOp { op: inner_op, .. } => {
3143                            assert_eq!(*inner_op, BinaryOp::And);
3144                        }
3145                        other => panic!("expected binary op, got {:?}", other),
3146                    }
3147                }
3148                other => panic!("expected binary op, got {:?}", other),
3149            },
3150            other => panic!("expected if, got {:?}", other),
3151        }
3152    }
3153
3154    #[test]
3155    fn parse_condition_mixed_comparison_and_logical() {
3156        // Shell-compatible: use [[ ]] for comparisons, && to chain them
3157        let result = parse("if [[ ${X} == 5 ]] && [[ ${Y} -gt 0 ]]; then echo; fi").unwrap();
3158        match &result.statements[0] {
3159            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3160                Expr::BinaryOp { left, op, right } => {
3161                    assert_eq!(*op, BinaryOp::And);
3162                    // Left: [[ ${X} == 5 ]]
3163                    match left.as_ref() {
3164                        Expr::Test(test) => match test.as_ref() {
3165                            TestExpr::Comparison { op: left_op, .. } => {
3166                                assert_eq!(*left_op, TestCmpOp::Eq);
3167                            }
3168                            other => panic!("expected comparison, got {:?}", other),
3169                        },
3170                        other => panic!("expected test, got {:?}", other),
3171                    }
3172                    // Right: [[ ${Y} -gt 0 ]]
3173                    match right.as_ref() {
3174                        Expr::Test(test) => match test.as_ref() {
3175                            TestExpr::Comparison { op: right_op, .. } => {
3176                                assert_eq!(*right_op, TestCmpOp::NumGt);
3177                            }
3178                            other => panic!("expected comparison, got {:?}", other),
3179                        },
3180                        other => panic!("expected test, got {:?}", other),
3181                    }
3182                }
3183                other => panic!("expected binary op, got {:?}", other),
3184            },
3185            other => panic!("expected if, got {:?}", other),
3186        }
3187    }
3188
3189    // ═══════════════════════════════════════════════════════════════════════════
3190    // Integration Tests - Complete Scripts
3191    // ═══════════════════════════════════════════════════════════════════════════
3192
3193    /// Level 1: Linear script using core features
3194    #[test]
3195    fn script_level1_linear() {
3196        let script = r#"
3197NAME="kaish"
3198VERSION=1
3199TIMEOUT=30
3200ITEMS="alpha beta gamma"
3201
3202echo "Starting ${NAME} v${VERSION}"
3203cat "README.md" | grep pattern="install" | head count=5
3204fetch url="https://api.example.com/status" timeout=${TIMEOUT} > "/tmp/status.json"
3205echo "Items: ${ITEMS}"
3206"#;
3207        let result = parse(script).unwrap();
3208        let stmts: Vec<_> = result.statements.iter()
3209            .filter(|s| !matches!(s, Stmt::Empty))
3210            .collect();
3211
3212        assert_eq!(stmts.len(), 8);
3213        assert!(matches!(stmts[0], Stmt::Assignment(_)));  // set NAME
3214        assert!(matches!(stmts[1], Stmt::Assignment(_)));  // set VERSION
3215        assert!(matches!(stmts[2], Stmt::Assignment(_)));  // set TIMEOUT
3216        assert!(matches!(stmts[3], Stmt::Assignment(_)));  // set ITEMS
3217        assert!(matches!(stmts[4], Stmt::Command(_)));     // echo "Starting..."
3218        assert!(matches!(stmts[5], Stmt::Pipeline(_)));    // cat | grep | head
3219        assert!(matches!(stmts[6], Stmt::Pipeline(_)));    // fetch (with redirect - Pipeline since it has redirects)
3220        assert!(matches!(stmts[7], Stmt::Command(_)));     // echo "Items: ${ITEMS}"
3221    }
3222
3223    /// Level 2: Script with conditionals (shell-compatible syntax)
3224    #[test]
3225    fn script_level2_branching() {
3226        let script = r#"
3227RESULT=$(kaish-validate "input.json")
3228
3229if [[ ${RESULT.ok} == true ]]; then
3230    echo "Validation passed"
3231    process "input.json" > "output.json"
3232else
3233    echo "Validation failed: ${RESULT.err}"
3234fi
3235
3236if [[ ${COUNT} -gt 0 ]] && [[ ${COUNT} -le 100 ]]; then
3237    echo "Count in valid range"
3238fi
3239
3240if check-network || check-cache; then
3241    fetch url=${URL}
3242fi
3243"#;
3244        let result = parse(script).unwrap();
3245        let stmts: Vec<_> = result.statements.iter()
3246            .filter(|s| !matches!(s, Stmt::Empty))
3247            .collect();
3248
3249        assert_eq!(stmts.len(), 4);
3250
3251        // First: assignment with command substitution
3252        match stmts[0] {
3253            Stmt::Assignment(a) => {
3254                assert_eq!(a.name, "RESULT");
3255                assert!(matches!(&a.value, Expr::CommandSubst(_)));
3256            }
3257            other => panic!("expected assignment, got {:?}", other),
3258        }
3259
3260        // Second: if/else
3261        match stmts[1] {
3262            Stmt::If(if_stmt) => {
3263                assert_eq!(if_stmt.then_branch.len(), 2);
3264                assert!(if_stmt.else_branch.is_some());
3265                assert_eq!(if_stmt.else_branch.as_ref().unwrap().len(), 1);
3266            }
3267            other => panic!("expected if, got {:?}", other),
3268        }
3269
3270        // Third: if with && condition
3271        match stmts[2] {
3272            Stmt::If(if_stmt) => {
3273                match if_stmt.condition.as_ref() {
3274                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3275                    other => panic!("expected && condition, got {:?}", other),
3276                }
3277            }
3278            other => panic!("expected if, got {:?}", other),
3279        }
3280
3281        // Fourth: if with || of commands
3282        match stmts[3] {
3283            Stmt::If(if_stmt) => {
3284                match if_stmt.condition.as_ref() {
3285                    Expr::BinaryOp { op, left, right } => {
3286                        assert_eq!(*op, BinaryOp::Or);
3287                        assert!(matches!(left.as_ref(), Expr::Command(_)));
3288                        assert!(matches!(right.as_ref(), Expr::Command(_)));
3289                    }
3290                    other => panic!("expected || condition, got {:?}", other),
3291                }
3292            }
3293            other => panic!("expected if, got {:?}", other),
3294        }
3295    }
3296
3297    /// Level 3: Script with loops and function definitions
3298    #[test]
3299    fn script_level3_loops_and_functions() {
3300        let script = r#"
3301greet() {
3302    echo "Hello, $1!"
3303}
3304
3305fetch_all() {
3306    for URL in $@; do
3307        fetch url=${URL}
3308    done
3309}
3310
3311USERS="alice bob charlie"
3312
3313for USER in ${USERS}; do
3314    greet ${USER}
3315    if [[ ${USER} == "bob" ]]; then
3316        echo "Found Bob!"
3317    fi
3318done
3319
3320long-running-task &
3321"#;
3322        let result = parse(script).unwrap();
3323        let stmts: Vec<_> = result.statements.iter()
3324            .filter(|s| !matches!(s, Stmt::Empty))
3325            .collect();
3326
3327        assert_eq!(stmts.len(), 5);
3328
3329        // First function def
3330        match stmts[0] {
3331            Stmt::ToolDef(t) => {
3332                assert_eq!(t.name, "greet");
3333                assert!(t.params.is_empty());
3334            }
3335            other => panic!("expected function def, got {:?}", other),
3336        }
3337
3338        // Second function def with nested for loop
3339        match stmts[1] {
3340            Stmt::ToolDef(t) => {
3341                assert_eq!(t.name, "fetch_all");
3342                assert_eq!(t.body.len(), 1);
3343                assert!(matches!(&t.body[0], Stmt::For(_)));
3344            }
3345            other => panic!("expected function def, got {:?}", other),
3346        }
3347
3348        // Assignment
3349        assert!(matches!(stmts[2], Stmt::Assignment(_)));
3350
3351        // For loop with nested if
3352        match stmts[3] {
3353            Stmt::For(f) => {
3354                assert_eq!(f.variable, "USER");
3355                assert_eq!(f.body.len(), 2);
3356                assert!(matches!(&f.body[0], Stmt::Command(_)));
3357                assert!(matches!(&f.body[1], Stmt::If(_)));
3358            }
3359            other => panic!("expected for loop, got {:?}", other),
3360        }
3361
3362        // Background job
3363        match stmts[4] {
3364            Stmt::Pipeline(p) => {
3365                assert!(p.background);
3366                assert_eq!(p.commands[0].name, "long-running-task");
3367            }
3368            other => panic!("expected pipeline (background), got {:?}", other),
3369        }
3370    }
3371
3372    /// Level 4: Complex nested control flow (shell-compatible syntax)
3373    #[test]
3374    fn script_level4_complex_nesting() {
3375        let script = r#"
3376RESULT=$(cat "config.json" | jq query=".servers" | kaish-validate schema="server-schema.json")
3377
3378if ping host=${HOST} && [[ ${RESULT} == true ]]; then
3379    for SERVER in "prod-1 prod-2"; do
3380        deploy target=${SERVER} port=8080
3381        if [[ $? -ne 0 ]]; then
3382            notify channel="ops" message="Deploy failed"
3383        fi
3384    done
3385fi
3386"#;
3387        let result = parse(script).unwrap();
3388        let stmts: Vec<_> = result.statements.iter()
3389            .filter(|s| !matches!(s, Stmt::Empty))
3390            .collect();
3391
3392        assert_eq!(stmts.len(), 2);
3393
3394        // Command substitution with pipeline
3395        match stmts[0] {
3396            Stmt::Assignment(a) => {
3397                assert_eq!(a.name, "RESULT");
3398                match &a.value {
3399                    Expr::CommandSubst(pipeline) => {
3400                        assert_eq!(pipeline.commands.len(), 3);
3401                    }
3402                    other => panic!("expected command subst, got {:?}", other),
3403                }
3404            }
3405            other => panic!("expected assignment, got {:?}", other),
3406        }
3407
3408        // If with && condition, containing for loop with nested if
3409        match stmts[1] {
3410            Stmt::If(if_stmt) => {
3411                match if_stmt.condition.as_ref() {
3412                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3413                    other => panic!("expected && condition, got {:?}", other),
3414                }
3415                assert_eq!(if_stmt.then_branch.len(), 1);
3416                match &if_stmt.then_branch[0] {
3417                    Stmt::For(f) => {
3418                        assert_eq!(f.body.len(), 2);
3419                        assert!(matches!(&f.body[1], Stmt::If(_)));
3420                    }
3421                    other => panic!("expected for in if body, got {:?}", other),
3422                }
3423            }
3424            other => panic!("expected if, got {:?}", other),
3425        }
3426    }
3427
3428    /// Level 5: Edge cases and parser stress test
3429    #[test]
3430    fn script_level5_edge_cases() {
3431        let script = r#"
3432echo ""
3433echo "quotes: \"nested\" here"
3434echo "escapes: \n\t\r\\"
3435echo "unicode: \u2764"
3436
3437X=-99999
3438Y=3.14159265358979
3439Z=-0.001
3440
3441cmd a=1 b="two" c=true d=false e=null
3442
3443if true; then
3444    if false; then
3445        echo "inner"
3446    else
3447        echo "else"
3448    fi
3449fi
3450
3451for I in "a b c"; do
3452    echo ${I}
3453done
3454
3455no_params() {
3456    echo "no params"
3457}
3458
3459function all_args {
3460    echo "args: $@"
3461}
3462
3463a | b | c | d | e &
3464cmd 2> "errors.log"
3465cmd &> "all.log"
3466cmd >> "append.log"
3467cmd < "input.txt"
3468"#;
3469        let result = parse(script).unwrap();
3470        let stmts: Vec<_> = result.statements.iter()
3471            .filter(|s| !matches!(s, Stmt::Empty))
3472            .collect();
3473
3474        // Verify it parses without error
3475        assert!(stmts.len() >= 10, "expected many statements, got {}", stmts.len());
3476
3477        // Background pipeline
3478        let bg_stmt = stmts.iter().find(|s| matches!(s, Stmt::Pipeline(p) if p.background));
3479        assert!(bg_stmt.is_some(), "expected background pipeline");
3480
3481        match bg_stmt.unwrap() {
3482            Stmt::Pipeline(p) => {
3483                assert_eq!(p.commands.len(), 5);
3484                assert!(p.background);
3485            }
3486            _ => unreachable!(),
3487        }
3488    }
3489
3490    // ═══════════════════════════════════════════════════════════════════════════
3491    // Edge Case Tests: Ambiguity Resolution
3492    // ═══════════════════════════════════════════════════════════════════════════
3493
3494    #[test]
3495    fn parse_keyword_as_variable_rejected() {
3496        // Keywords CANNOT be used as variable names - this is intentional
3497        // to avoid ambiguity. Use different names instead.
3498        let result = parse(r#"if="value""#);
3499        assert!(result.is_err(), "if= should fail - 'if' is a keyword");
3500
3501        let result = parse("while=true");
3502        assert!(result.is_err(), "while= should fail - 'while' is a keyword");
3503
3504        let result = parse(r#"then="next""#);
3505        assert!(result.is_err(), "then= should fail - 'then' is a keyword");
3506    }
3507
3508    #[test]
3509    fn parse_set_command_with_flag() {
3510        let result = parse("set -e");
3511        assert!(result.is_ok(), "failed to parse set -e: {:?}", result);
3512        let program = result.unwrap();
3513        match &program.statements[0] {
3514            Stmt::Command(cmd) => {
3515                assert_eq!(cmd.name, "set");
3516                assert_eq!(cmd.args.len(), 1);
3517                match &cmd.args[0] {
3518                    Arg::ShortFlag(f) => assert_eq!(f, "e"),
3519                    other => panic!("expected ShortFlag, got {:?}", other),
3520                }
3521            }
3522            other => panic!("expected Command, got {:?}", other),
3523        }
3524    }
3525
3526    #[test]
3527    fn parse_set_command_no_args() {
3528        let result = parse("set");
3529        assert!(result.is_ok(), "failed to parse set: {:?}", result);
3530        let program = result.unwrap();
3531        match &program.statements[0] {
3532            Stmt::Command(cmd) => {
3533                assert_eq!(cmd.name, "set");
3534                assert_eq!(cmd.args.len(), 0);
3535            }
3536            other => panic!("expected Command, got {:?}", other),
3537        }
3538    }
3539
3540    #[test]
3541    fn parse_set_assignment_vs_command() {
3542        // X=5 should be assignment
3543        let result = parse("X=5");
3544        assert!(result.is_ok());
3545        let program = result.unwrap();
3546        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
3547
3548        // set -e should be command
3549        let result = parse("set -e");
3550        assert!(result.is_ok());
3551        let program = result.unwrap();
3552        assert!(matches!(&program.statements[0], Stmt::Command(_)));
3553    }
3554
3555    #[test]
3556    fn parse_true_as_command() {
3557        let result = parse("true");
3558        assert!(result.is_ok());
3559        let program = result.unwrap();
3560        match &program.statements[0] {
3561            Stmt::Command(cmd) => assert_eq!(cmd.name, "true"),
3562            other => panic!("expected Command(true), got {:?}", other),
3563        }
3564    }
3565
3566    #[test]
3567    fn parse_false_as_command() {
3568        let result = parse("false");
3569        assert!(result.is_ok());
3570        let program = result.unwrap();
3571        match &program.statements[0] {
3572            Stmt::Command(cmd) => assert_eq!(cmd.name, "false"),
3573            other => panic!("expected Command(false), got {:?}", other),
3574        }
3575    }
3576
3577    #[test]
3578    fn parse_dot_as_source_alias() {
3579        let result = parse(". script.kai");
3580        assert!(result.is_ok(), "failed to parse . script.kai: {:?}", result);
3581        let program = result.unwrap();
3582        match &program.statements[0] {
3583            Stmt::Command(cmd) => {
3584                assert_eq!(cmd.name, ".");
3585                assert_eq!(cmd.args.len(), 1);
3586            }
3587            other => panic!("expected Command(.), got {:?}", other),
3588        }
3589    }
3590
3591    #[test]
3592    fn parse_source_command() {
3593        let result = parse("source utils.kai");
3594        assert!(result.is_ok(), "failed to parse source: {:?}", result);
3595        let program = result.unwrap();
3596        match &program.statements[0] {
3597            Stmt::Command(cmd) => {
3598                assert_eq!(cmd.name, "source");
3599                assert_eq!(cmd.args.len(), 1);
3600            }
3601            other => panic!("expected Command(source), got {:?}", other),
3602        }
3603    }
3604
3605    #[test]
3606    fn parse_test_expr_file_test() {
3607        // Paths must be quoted strings in test expressions
3608        let result = parse(r#"[[ -f "/path/file" ]]"#);
3609        assert!(result.is_ok(), "failed to parse file test: {:?}", result);
3610    }
3611
3612    #[test]
3613    fn parse_test_expr_comparison() {
3614        let result = parse(r#"[[ $X == "value" ]]"#);
3615        assert!(result.is_ok(), "failed to parse comparison test: {:?}", result);
3616    }
3617
3618    #[test]
3619    fn parse_test_expr_single_eq() {
3620        // = and == are equivalent inside [[ ]] (matching bash behavior)
3621        let result = parse(r#"[[ $X = "value" ]]"#);
3622        assert!(result.is_ok(), "failed to parse single-= comparison: {:?}", result);
3623        let program = result.unwrap();
3624        match &program.statements[0] {
3625            Stmt::Test(TestExpr::Comparison { op, .. }) => {
3626                assert_eq!(op, &TestCmpOp::Eq);
3627            }
3628            other => panic!("expected Test(Comparison), got {:?}", other),
3629        }
3630    }
3631
3632    #[test]
3633    fn parse_while_loop() {
3634        let result = parse("while true; do echo; done");
3635        assert!(result.is_ok(), "failed to parse while loop: {:?}", result);
3636        let program = result.unwrap();
3637        assert!(matches!(&program.statements[0], Stmt::While(_)));
3638    }
3639
3640    #[test]
3641    fn parse_break_with_level() {
3642        let result = parse("break 2");
3643        assert!(result.is_ok());
3644        let program = result.unwrap();
3645        match &program.statements[0] {
3646            Stmt::Break(Some(n)) => assert_eq!(*n, 2),
3647            other => panic!("expected Break(2), got {:?}", other),
3648        }
3649    }
3650
3651    #[test]
3652    fn parse_continue_with_level() {
3653        let result = parse("continue 3");
3654        assert!(result.is_ok());
3655        let program = result.unwrap();
3656        match &program.statements[0] {
3657            Stmt::Continue(Some(n)) => assert_eq!(*n, 3),
3658            other => panic!("expected Continue(3), got {:?}", other),
3659        }
3660    }
3661
3662    #[test]
3663    fn parse_exit_with_code() {
3664        let result = parse("exit 1");
3665        assert!(result.is_ok());
3666        let program = result.unwrap();
3667        match &program.statements[0] {
3668            Stmt::Exit(Some(expr)) => {
3669                match expr.as_ref() {
3670                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 1),
3671                    other => panic!("expected Int(1), got {:?}", other),
3672                }
3673            }
3674            other => panic!("expected Exit(1), got {:?}", other),
3675        }
3676    }
3677
3678    // ========================================================================
3679    // parse_interpolated_string_spanned — body-internal span tracking for
3680    // heredoc bodies. The byte offsets these tests pin become validator
3681    // issue spans via the HereDocBody → SpannedPart flow.
3682    // ========================================================================
3683
3684    #[test]
3685    fn spanned_literal_only_records_byte_range() {
3686        let parts = parse_interpolated_string_spanned("hello world", 100);
3687        assert_eq!(parts.len(), 1);
3688        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello world"));
3689        assert_eq!(parts[0].offset, 100, "base_offset must propagate to literals");
3690        assert_eq!(parts[0].len, 11);
3691    }
3692
3693    #[test]
3694    fn spanned_braced_var_at_zero() {
3695        let parts = parse_interpolated_string_spanned("${X}", 50);
3696        assert_eq!(parts.len(), 1);
3697        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3698        assert_eq!(parts[0].offset, 50);
3699        assert_eq!(parts[0].len, 4); // "${X}"
3700    }
3701
3702    #[test]
3703    fn spanned_simple_var_then_literal() {
3704        let parts = parse_interpolated_string_spanned("$X end", 10);
3705        assert_eq!(parts.len(), 2);
3706        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3707        assert_eq!(parts[0].offset, 10);
3708        assert_eq!(parts[0].len, 2); // "$X"
3709        assert!(matches!(&parts[1].part, StringPart::Literal(s) if s == " end"));
3710        assert_eq!(parts[1].offset, 12);
3711        assert_eq!(parts[1].len, 4);
3712    }
3713
3714    #[test]
3715    fn spanned_mixed_literal_var_literal() {
3716        let parts = parse_interpolated_string_spanned("hi ${X} bye", 0);
3717        assert_eq!(parts.len(), 3);
3718        // "hi "
3719        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hi "));
3720        assert_eq!(parts[0].offset, 0);
3721        assert_eq!(parts[0].len, 3);
3722        // ${X}
3723        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3724        assert_eq!(parts[1].offset, 3);
3725        assert_eq!(parts[1].len, 4);
3726        // " bye"
3727        assert!(matches!(&parts[2].part, StringPart::Literal(s) if s == " bye"));
3728        assert_eq!(parts[2].offset, 7);
3729        assert_eq!(parts[2].len, 4);
3730    }
3731
3732    #[test]
3733    fn spanned_positional_param() {
3734        let parts = parse_interpolated_string_spanned("$1 done", 0);
3735        assert_eq!(parts.len(), 2);
3736        assert!(matches!(&parts[0].part, StringPart::Positional(1)));
3737        assert_eq!(parts[0].offset, 0);
3738        assert_eq!(parts[0].len, 2); // "$1"
3739    }
3740
3741    #[test]
3742    fn spanned_special_dollar_dollar() {
3743        let parts = parse_interpolated_string_spanned("$$", 5);
3744        assert_eq!(parts.len(), 1);
3745        assert!(matches!(&parts[0].part, StringPart::CurrentPid));
3746        assert_eq!(parts[0].offset, 5);
3747        assert_eq!(parts[0].len, 2);
3748    }
3749
3750    #[test]
3751    fn spanned_arithmetic_marker_recognised() {
3752        // The lexer wraps arithmetic markers as ${__ARITH:expr__} for
3753        // interpolated heredocs; the spanned parser must produce
3754        // StringPart::Arithmetic for that shape.
3755        let parts = parse_interpolated_string_spanned("${__ARITH:1+2__}", 0);
3756        assert_eq!(parts.len(), 1);
3757        assert!(matches!(&parts[0].part, StringPart::Arithmetic(e) if e == "1+2"));
3758    }
3759
3760    #[test]
3761    fn spanned_default_separator_yields_var_with_default() {
3762        let parts = parse_interpolated_string_spanned("${X:-fallback}", 0);
3763        assert_eq!(parts.len(), 1);
3764        assert!(matches!(&parts[0].part, StringPart::VarWithDefault { .. }));
3765        assert_eq!(parts[0].offset, 0);
3766        assert_eq!(parts[0].len, 14); // "${X:-fallback}"
3767    }
3768
3769    #[test]
3770    fn spanned_no_dollar_runs_one_literal() {
3771        let parts = parse_interpolated_string_spanned("plain text only", 7);
3772        assert_eq!(parts.len(), 1);
3773        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "plain text only"));
3774        assert_eq!(parts[0].offset, 7);
3775        assert_eq!(parts[0].len, 15);
3776    }
3777
3778    #[test]
3779    fn spanned_matches_unspanned_part_count() {
3780        // Spanned and spanless variants must agree on the part decomposition.
3781        // Bug fixes in one should land in the other.
3782        let cases = [
3783            "hello",
3784            "$X",
3785            "${X}",
3786            "${X:-d}",
3787            "hi $A and $B",
3788            "$0 $1 $2",
3789            "$$ $? $#",
3790        ];
3791        for s in &cases {
3792            let unspanned = parse_interpolated_string(s);
3793            let spanned = parse_interpolated_string_spanned(s, 0);
3794            assert_eq!(
3795                unspanned.len(),
3796                spanned.len(),
3797                "part count differs for {:?}",
3798                s
3799            );
3800        }
3801    }
3802
3803    #[test]
3804    fn spanned_multibyte_utf8_before_var_uses_byte_offsets() {
3805        // 🚀 is 4 bytes in UTF-8 and a space is 1 byte, so the literal
3806        // prefix is 5 bytes total. `${X}` then sits at byte offset 5.
3807        // Right-by-luck for char-vs-byte indexing is precisely what this
3808        // test catches: if someone swaps .len_utf8() for 1, offset becomes 2.
3809        let parts = parse_interpolated_string_spanned("🚀 ${X}", 0);
3810        assert_eq!(parts.len(), 2);
3811
3812        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "🚀 "));
3813        assert_eq!(parts[0].offset, 0);
3814        assert_eq!(parts[0].len, 5, "literal len must be bytes, not chars");
3815
3816        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3817        assert_eq!(parts[1].offset, 5, "var offset must be bytes, not chars");
3818        assert_eq!(parts[1].len, 4);
3819    }
3820
3821    #[test]
3822    fn spanned_multibyte_utf8_pure_literal_is_byte_length() {
3823        // "hello 世界 world": 5 + 1 + 6 (3 per CJK char) + 1 + 5 = 18 bytes,
3824        // 13 chars. The `len` field must report 18, not 13.
3825        let parts = parse_interpolated_string_spanned("hello 世界 world", 0);
3826        assert_eq!(parts.len(), 1);
3827        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello 世界 world"));
3828        assert_eq!(parts[0].offset, 0);
3829        assert_eq!(parts[0].len, 18);
3830    }
3831
3832    #[test]
3833    fn spanned_escape_dollar_consumes_two_bytes_emits_one_char() {
3834        // `\$` is 2 source bytes and resolves to a single literal `$`.
3835        // The literal part's `len` should reflect the SOURCE length (2).
3836        let parts = parse_interpolated_string_spanned("\\$", 0);
3837        assert_eq!(parts.len(), 1);
3838        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "$"));
3839        assert_eq!(parts[0].offset, 0);
3840        assert_eq!(parts[0].len, 2, "len is source byte length, not rendered length");
3841    }
3842
3843    #[test]
3844    fn spanned_escape_backslash_collapses_pair_to_one() {
3845        let parts = parse_interpolated_string_spanned("\\\\", 0);
3846        assert_eq!(parts.len(), 1);
3847        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "\\"));
3848        assert_eq!(parts[0].len, 2);
3849    }
3850}
kaish_kernel/parser.rs

kaish_kernel/
parser.rs