kaish_kernel/
parser.rs

1//! Parser for kaish source code.
2//!
3//! Transforms a token stream from the lexer into an Abstract Syntax Tree.
4//! Uses chumsky for parser combinators with good error recovery.
5
6use crate::ast::{
7    Arg, Assignment, BinaryOp, CaseBranch, CaseStmt, Command, Expr, FileTestOp, ForLoop, IfStmt,
8    Pipeline, Program, Redirect, RedirectKind, SpannedPart, Stmt, StringPart, StringTestOp,
9    TestCmpOp, TestExpr, ToolDef, Value, VarPath, VarSegment, WhileLoop,
10};
11use crate::lexer::{self, HereDocData, Token};
12use chumsky::{input::ValueInput, prelude::*};
13
14/// Span type used throughout the parser.
15pub type Span = SimpleSpan;
16
17/// Parse a raw `${...}` string into an Expr.
18///
19/// Handles:
20/// - Special variables: `${?}` → LastExitCode, `${$}` → CurrentPid
21/// - Simple paths: `${VAR}`, `${VAR.field}`, `${VAR[0]}` → VarRef
22/// - Default values: `${VAR:-default}` → VarWithDefault (with nested expansion support)
23fn parse_var_expr(raw: &str) -> Expr {
24    // Special case: ${?} is the last exit code (same as $?)
25    if raw == "${?}" {
26        return Expr::LastExitCode;
27    }
28
29    // Special case: ${$} is the current PID (same as $$)
30    if raw == "${$}" {
31        return Expr::CurrentPid;
32    }
33
34    // Check for default value syntax: ${VAR:-default}
35    // Need to find :- that's not inside a nested ${...}
36    if let Some(colon_idx) = find_default_separator(raw) {
37        // Extract variable name (between ${ and :-)
38        let name = raw[2..colon_idx].to_string();
39        // Extract default value (between :- and }) and recursively parse it
40        let default_str = &raw[colon_idx + 2..raw.len() - 1];
41        let default = parse_interpolated_string(default_str);
42        return Expr::VarWithDefault { name, default };
43    }
44
45    // Regular variable path
46    Expr::VarRef(parse_varpath(raw))
47}
48
49/// Find the position of :- in a ${VAR:-default} expression, accounting for nested ${...}.
50fn find_default_separator(raw: &str) -> Option<usize> {
51    let bytes = raw.as_bytes();
52    let mut depth = 0;
53    let mut i = 0;
54
55    while i < bytes.len() {
56        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
57            depth += 1;
58            i += 2;
59            continue;
60        }
61        if bytes[i] == b'}' && depth > 0 {
62            depth -= 1;
63            i += 1;
64            continue;
65        }
66        // Only find :- at the top level (depth == 1 means we're inside the outer ${...})
67        if depth == 1 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
68            return Some(i);
69        }
70        i += 1;
71    }
72    None
73}
74
75/// Find the position of :- in variable content (without outer braces), accounting for nested ${...}.
76fn find_default_separator_in_content(content: &str) -> Option<usize> {
77    let bytes = content.as_bytes();
78    let mut depth = 0;
79    let mut i = 0;
80
81    while i < bytes.len() {
82        if i + 1 < bytes.len() && bytes[i] == b'$' && bytes[i + 1] == b'{' {
83            depth += 1;
84            i += 2;
85            continue;
86        }
87        if bytes[i] == b'}' && depth > 0 {
88            depth -= 1;
89            i += 1;
90            continue;
91        }
92        // Find :- at the top level (depth == 0)
93        if depth == 0 && i + 1 < bytes.len() && bytes[i] == b':' && bytes[i + 1] == b'-' {
94            return Some(i);
95        }
96        i += 1;
97    }
98    None
99}
100
101/// Parse a raw `${...}` string into a VarPath.
102///
103/// Handles paths like `${VAR}` and `${VAR.field}`. Array indexing is not supported.
104fn parse_varpath(raw: &str) -> VarPath {
105    let segments_strs = lexer::parse_var_ref(raw).unwrap_or_default();
106    let segments = segments_strs
107        .into_iter()
108        .filter(|s| !s.starts_with('['))  // Skip index segments
109        .map(VarSegment::Field)
110        .collect();
111    VarPath { segments }
112}
113
114/// Parse an interpolated string like "Hello ${NAME}!" or "Hello $NAME!" into parts.
115/// Extract a pipeline from a statement if possible.
116fn stmt_to_pipeline(stmt: Stmt) -> Option<Pipeline> {
117    match stmt {
118        Stmt::Pipeline(p) => Some(p),
119        Stmt::Command(cmd) => Some(Pipeline {
120            commands: vec![cmd],
121            background: false,
122        }),
123        _ => None,
124    }
125}
126
127/// Parse an unquoted heredoc body's interpolation while tracking each part's
128/// byte offset in the source.
129///
130/// `base_offset` is added to every part's offset so callers can attribute
131/// positions to a larger source (e.g., heredoc body inside the original
132/// script). Returns parts in source order with offset+len populated.
133///
134/// **Heredoc-specific behaviour**: per POSIX, unquoted heredoc bodies process
135/// three backslash escapes — `\$` (suppress expansion), `\\` (literal
136/// backslash), and `\<newline>` (line continuation). All other backslashes
137/// are kept verbatim. This differs from [`parse_interpolated_string`], which
138/// is called on double-quoted string content where the lexer has already
139/// processed escapes via `__KAISH_ESCAPED_DOLLAR__`.
140///
141/// This sibling of [`parse_interpolated_string`] duplicates parsing logic
142/// for now; unifying them behind a position-tracking core is a follow-up
143/// cleanup. Behaviour MUST stay aligned for the non-escape paths — bug fixes
144/// for the shared interpolation logic here should land there as well.
145fn parse_interpolated_string_spanned(s: &str, base_offset: usize) -> Vec<SpannedPart> {
146    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
147
148    let chars_vec: Vec<char> = s.chars().collect();
149    let mut i = 0;
150    let mut pos: usize = 0;
151
152    let mut parts: Vec<SpannedPart> = Vec::new();
153    let mut current_text = String::new();
154    let mut current_text_start: usize = pos;
155
156    let push_literal =
157        |current_text: &mut String, start: &mut usize, end: usize, parts: &mut Vec<SpannedPart>| {
158            if !current_text.is_empty() {
159                parts.push(SpannedPart {
160                    part: StringPart::Literal(std::mem::take(current_text)),
161                    offset: base_offset + *start,
162                    len: end - *start,
163                });
164                *start = end;
165            }
166        };
167
168    while i < chars_vec.len() {
169        let ch = chars_vec[i];
170
171        if ch == '\x00' {
172            // Escaped-dollar marker: \x00 DOLLAR \x00 → literal '$'
173            let start = pos;
174            i += 1;
175            pos += 1;
176            let mut marker = String::new();
177            while let Some(&c) = chars_vec.get(i) {
178                if c == '\x00' {
179                    i += 1;
180                    pos += 1;
181                    break;
182                }
183                marker.push(c);
184                i += 1;
185                pos += c.len_utf8();
186            }
187            if marker == "DOLLAR" {
188                if current_text.is_empty() {
189                    current_text_start = start;
190                }
191                current_text.push('$');
192            }
193        } else if ch == '\\' {
194            // POSIX heredoc-body escape processing for unquoted heredocs.
195            // Only `\$`, `\\`, and `\<newline>` are escapes; everything else
196            // keeps the backslash verbatim. Each case advances `pos` by the
197            // bytes consumed from the source so subsequent part offsets stay
198            // anchored to original-source coordinates.
199            let next = chars_vec.get(i + 1).copied();
200            match next {
201                Some('$') => {
202                    if current_text.is_empty() {
203                        current_text_start = pos;
204                    }
205                    current_text.push('$');
206                    i += 2;
207                    pos += 2;
208                }
209                Some('\\') => {
210                    if current_text.is_empty() {
211                        current_text_start = pos;
212                    }
213                    current_text.push('\\');
214                    i += 2;
215                    pos += 2;
216                }
217                Some('\n') => {
218                    // Line continuation: consume both bytes, emit nothing.
219                    // The literal run resumes on the next line.
220                    i += 2;
221                    pos += 2;
222                    if current_text.is_empty() {
223                        current_text_start = pos;
224                    }
225                }
226                Some('\r') => {
227                    // \<CR> or \<CR><LF>: line continuation
228                    i += 2;
229                    pos += 2;
230                    if chars_vec.get(i) == Some(&'\n') {
231                        i += 1;
232                        pos += 1;
233                    }
234                    if current_text.is_empty() {
235                        current_text_start = pos;
236                    }
237                }
238                _ => {
239                    // Other backslash sequences: keep `\` literally,
240                    // consume only the backslash. The next iteration will
241                    // process the following char on its own merits.
242                    if current_text.is_empty() {
243                        current_text_start = pos;
244                    }
245                    current_text.push('\\');
246                    i += 1;
247                    pos += 1;
248                }
249            }
250        } else if ch == '$' {
251            // Possible expansion. Save current run before peeking ahead.
252            let part_start = pos;
253            let next = chars_vec.get(i + 1).copied();
254
255            if next == Some('(') && chars_vec.get(i + 2) != Some(&'(') {
256                // $(...) command substitution
257                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
258                i += 2; // consume "$("
259                pos += 2;
260                let mut cmd_content = String::new();
261                let mut depth = 1;
262                while let Some(&c) = chars_vec.get(i) {
263                    i += 1;
264                    pos += c.len_utf8();
265                    if c == '(' {
266                        depth += 1;
267                        cmd_content.push(c);
268                    } else if c == ')' {
269                        depth -= 1;
270                        if depth == 0 {
271                            break;
272                        }
273                        cmd_content.push(c);
274                    } else {
275                        cmd_content.push(c);
276                    }
277                }
278                let inserted = if let Ok(program) = parse(&cmd_content) {
279                    if let Some(stmt) = program.statements.first() {
280                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
281                            parts.push(SpannedPart {
282                                part: StringPart::CommandSubst(pipeline),
283                                offset: base_offset + part_start,
284                                len: pos - part_start,
285                            });
286                            true
287                        } else {
288                            false
289                        }
290                    } else {
291                        false
292                    }
293                } else {
294                    false
295                };
296                if inserted {
297                    // Successfully pushed a CommandSubst; the next literal
298                    // run will start after the closing ')'.
299                    current_text_start = pos;
300                } else {
301                    // Fall back to literal text. The literal run starts at
302                    // the leading '$' (set above only if current_text was
303                    // empty); leave current_text_start alone otherwise so we
304                    // don't lose an in-progress run.
305                    if current_text.is_empty() {
306                        current_text_start = part_start;
307                    }
308                    current_text.push_str("$(");
309                    current_text.push_str(&cmd_content);
310                    current_text.push(')');
311                }
312            } else if next == Some('{') {
313                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
314                i += 2; // consume "${"
315                pos += 2;
316                let mut var_content = String::new();
317                let mut depth = 1;
318                while let Some(&c) = chars_vec.get(i) {
319                    i += 1;
320                    pos += c.len_utf8();
321                    if c == '{' && var_content.ends_with('$') {
322                        depth += 1;
323                        var_content.push(c);
324                    } else if c == '}' {
325                        depth -= 1;
326                        if depth == 0 {
327                            break;
328                        }
329                        var_content.push(c);
330                    } else {
331                        var_content.push(c);
332                    }
333                }
334                let part = if let Some(name) = var_content.strip_prefix('#') {
335                    StringPart::VarLength(name.to_string())
336                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
337                    let expr = var_content
338                        .strip_prefix("__ARITH:")
339                        .and_then(|s| s.strip_suffix("__"))
340                        .unwrap_or("");
341                    StringPart::Arithmetic(expr.to_string())
342                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
343                    let name = var_content[..colon_idx].to_string();
344                    let default_str = &var_content[colon_idx + 2..];
345                    // Default value spans recursively kept relative to the
346                    // outer body — the inner parts get their own offsets via
347                    // the recursive call when needed. For now, the default's
348                    // parts are stored without spans (default is a Vec<StringPart>).
349                    let default = parse_interpolated_string(default_str);
350                    StringPart::VarWithDefault { name, default }
351                } else {
352                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
353                };
354                parts.push(SpannedPart {
355                    part,
356                    offset: base_offset + part_start,
357                    len: pos - part_start,
358                });
359                current_text_start = pos;
360            } else if next.map(|c| c.is_ascii_digit()).unwrap_or(false) {
361                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
362                i += 1; // consume '$'
363                pos += 1;
364                if let Some(&digit) = chars_vec.get(i) {
365                    let n = digit.to_digit(10).unwrap_or(0) as usize;
366                    i += 1;
367                    pos += digit.len_utf8();
368                    parts.push(SpannedPart {
369                        part: StringPart::Positional(n),
370                        offset: base_offset + part_start,
371                        len: pos - part_start,
372                    });
373                }
374                current_text_start = pos;
375            } else if next == Some('@') {
376                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
377                i += 2; // consume "$@"
378                pos += 2;
379                parts.push(SpannedPart {
380                    part: StringPart::AllArgs,
381                    offset: base_offset + part_start,
382                    len: pos - part_start,
383                });
384                current_text_start = pos;
385            } else if next == Some('#') {
386                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
387                i += 2; // consume "$#"
388                pos += 2;
389                parts.push(SpannedPart {
390                    part: StringPart::ArgCount,
391                    offset: base_offset + part_start,
392                    len: pos - part_start,
393                });
394                current_text_start = pos;
395            } else if next == Some('?') {
396                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
397                i += 2; // consume "$?"
398                pos += 2;
399                parts.push(SpannedPart {
400                    part: StringPart::LastExitCode,
401                    offset: base_offset + part_start,
402                    len: pos - part_start,
403                });
404                current_text_start = pos;
405            } else if next == Some('$') {
406                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
407                i += 2; // consume "$$"
408                pos += 2;
409                parts.push(SpannedPart {
410                    part: StringPart::CurrentPid,
411                    offset: base_offset + part_start,
412                    len: pos - part_start,
413                });
414                current_text_start = pos;
415            } else if next.map(|c| c.is_ascii_alphabetic() || c == '_').unwrap_or(false) {
416                push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
417                i += 1; // consume '$'
418                pos += 1;
419                let mut var_name = String::new();
420                while let Some(&c) = chars_vec.get(i) {
421                    if c.is_ascii_alphanumeric() || c == '_' {
422                        var_name.push(c);
423                        i += 1;
424                        pos += c.len_utf8();
425                    } else {
426                        break;
427                    }
428                }
429                parts.push(SpannedPart {
430                    part: StringPart::Var(VarPath::simple(var_name)),
431                    offset: base_offset + part_start,
432                    len: pos - part_start,
433                });
434                current_text_start = pos;
435            } else {
436                // Bare $ — treat as literal
437                if current_text.is_empty() {
438                    current_text_start = pos;
439                }
440                current_text.push(ch);
441                i += 1;
442                pos += 1;
443            }
444        } else {
445            if current_text.is_empty() {
446                current_text_start = pos;
447            }
448            current_text.push(ch);
449            i += 1;
450            pos += ch.len_utf8();
451        }
452    }
453
454    push_literal(&mut current_text, &mut current_text_start, pos, &mut parts);
455
456    parts
457}
458
459fn parse_interpolated_string(s: &str) -> Vec<StringPart> {
460    // First, replace escaped dollar markers with a temporary placeholder
461    // The lexer uses __KAISH_ESCAPED_DOLLAR__ for \$ to prevent re-interpretation
462    let s = s.replace("__KAISH_ESCAPED_DOLLAR__", "\x00DOLLAR\x00");
463
464    let mut parts = Vec::new();
465    let mut current_text = String::new();
466    let mut chars = s.chars().peekable();
467
468    while let Some(ch) = chars.next() {
469        if ch == '\x00' {
470            // This is our escaped dollar marker - skip "DOLLAR" and the closing \x00
471            let mut marker = String::new();
472            while let Some(&c) = chars.peek() {
473                if c == '\x00' {
474                    chars.next(); // consume closing marker
475                    break;
476                }
477                if let Some(c) = chars.next() {
478                    marker.push(c);
479                }
480            }
481            if marker == "DOLLAR" {
482                current_text.push('$');
483            }
484        } else if ch == '$' {
485            // Check for command substitution $(...)
486            if chars.peek() == Some(&'(') {
487                // Command substitution $(...)
488                if !current_text.is_empty() {
489                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
490                }
491
492                // Consume the '('
493                chars.next();
494
495                // Collect until matching ')' accounting for nested parens
496                let mut cmd_content = String::new();
497                let mut paren_depth = 1;
498                for c in chars.by_ref() {
499                    if c == '(' {
500                        paren_depth += 1;
501                        cmd_content.push(c);
502                    } else if c == ')' {
503                        paren_depth -= 1;
504                        if paren_depth == 0 {
505                            break;
506                        }
507                        cmd_content.push(c);
508                    } else {
509                        cmd_content.push(c);
510                    }
511                }
512
513                // Parse the command content as a pipeline
514                // We need to use the main parser for this
515                if let Ok(program) = parse(&cmd_content) {
516                    // Extract the pipeline from the parsed result
517                    if let Some(stmt) = program.statements.first() {
518                        if let Some(pipeline) = stmt_to_pipeline(stmt.clone()) {
519                            parts.push(StringPart::CommandSubst(pipeline));
520                        } else {
521                            // If we can't extract a pipeline, treat as literal
522                            current_text.push_str("$(");
523                            current_text.push_str(&cmd_content);
524                            current_text.push(')');
525                        }
526                    }
527                } else {
528                    // Parse failed - treat as literal
529                    current_text.push_str("$(");
530                    current_text.push_str(&cmd_content);
531                    current_text.push(')');
532                }
533            } else if chars.peek() == Some(&'{') {
534                // Braced variable reference ${...}
535                if !current_text.is_empty() {
536                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
537                }
538
539                // Consume the '{'
540                chars.next();
541
542                // Collect until matching '}', tracking nesting depth
543                let mut var_content = String::new();
544                let mut depth = 1;
545                for c in chars.by_ref() {
546                    if c == '{' && var_content.ends_with('$') {
547                        depth += 1;
548                        var_content.push(c);
549                    } else if c == '}' {
550                        depth -= 1;
551                        if depth == 0 {
552                            break;
553                        }
554                        var_content.push(c);
555                    } else {
556                        var_content.push(c);
557                    }
558                }
559
560                // Parse the content for special syntax
561                let part = if let Some(name) = var_content.strip_prefix('#') {
562                    // Variable length: ${#VAR}
563                    StringPart::VarLength(name.to_string())
564                } else if var_content.starts_with("__ARITH:") && var_content.ends_with("__") {
565                    // Arithmetic expression: ${__ARITH:expr__}
566                    let expr = var_content
567                        .strip_prefix("__ARITH:")
568                        .and_then(|s| s.strip_suffix("__"))
569                        .unwrap_or("");
570                    StringPart::Arithmetic(expr.to_string())
571                } else if let Some(colon_idx) = find_default_separator_in_content(&var_content) {
572                    // Variable with default: ${VAR:-default} - recursively parse the default
573                    let name = var_content[..colon_idx].to_string();
574                    let default_str = &var_content[colon_idx + 2..];
575                    let default = parse_interpolated_string(default_str);
576                    StringPart::VarWithDefault { name, default }
577                } else {
578                    // Regular variable: ${VAR} or ${VAR.field}
579                    StringPart::Var(parse_varpath(&format!("${{{}}}", var_content)))
580                };
581                parts.push(part);
582            } else if chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
583                // Positional parameter $0-$9
584                if !current_text.is_empty() {
585                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
586                }
587                if let Some(digit) = chars.next() {
588                    let n = digit.to_digit(10).unwrap_or(0) as usize;
589                    parts.push(StringPart::Positional(n));
590                }
591            } else if chars.peek() == Some(&'@') {
592                // All arguments $@
593                if !current_text.is_empty() {
594                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
595                }
596                chars.next(); // consume '@'
597                parts.push(StringPart::AllArgs);
598            } else if chars.peek() == Some(&'#') {
599                // Argument count $#
600                if !current_text.is_empty() {
601                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
602                }
603                chars.next(); // consume '#'
604                parts.push(StringPart::ArgCount);
605            } else if chars.peek() == Some(&'?') {
606                // Last exit code $?
607                if !current_text.is_empty() {
608                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
609                }
610                chars.next(); // consume '?'
611                parts.push(StringPart::LastExitCode);
612            } else if chars.peek() == Some(&'$') {
613                // Current PID $$
614                if !current_text.is_empty() {
615                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
616                }
617                chars.next(); // consume second '$'
618                parts.push(StringPart::CurrentPid);
619            } else if chars.peek().map(|c| c.is_ascii_alphabetic() || *c == '_').unwrap_or(false) {
620                // Simple variable reference $NAME
621                if !current_text.is_empty() {
622                    parts.push(StringPart::Literal(std::mem::take(&mut current_text)));
623                }
624
625                // Collect identifier characters
626                let mut var_name = String::new();
627                while let Some(&c) = chars.peek() {
628                    if c.is_ascii_alphanumeric() || c == '_' {
629                        if let Some(c) = chars.next() {
630                            var_name.push(c);
631                        }
632                    } else {
633                        break;
634                    }
635                }
636
637                parts.push(StringPart::Var(VarPath::simple(var_name)));
638            } else {
639                // Literal $ (not followed by { or identifier start)
640                current_text.push(ch);
641            }
642        } else {
643            current_text.push(ch);
644        }
645    }
646
647    if !current_text.is_empty() {
648        parts.push(StringPart::Literal(current_text));
649    }
650
651    parts
652}
653
654/// Parse error with location and context.
655#[derive(Debug, Clone)]
656pub struct ParseError {
657    pub span: Span,
658    pub message: String,
659}
660
661impl ParseError {
662    /// Format the error against the original source, emitting a 1-indexed
663    /// `line:col [parse]: <message>` prefix and a snippet of the offending
664    /// line. Mirrors `ValidationIssue::format` so error reporting feels
665    /// consistent across pipeline phases.
666    pub fn format(&self, source: &str) -> String {
667        let start = self.span.start;
668        let mut line = 1usize;
669        let mut col = 1usize;
670        for (i, ch) in source.char_indices() {
671            if i >= start {
672                break;
673            }
674            if ch == '\n' {
675                line += 1;
676                col = 1;
677            } else {
678                col += 1;
679            }
680        }
681        let line_content = {
682            let line_start = source[..start.min(source.len())]
683                .rfind('\n')
684                .map_or(0, |i| i + 1);
685            let line_end = source[start.min(source.len())..]
686                .find('\n')
687                .map_or(source.len(), |i| start + i);
688            source.get(line_start..line_end).unwrap_or("")
689        };
690        if line_content.is_empty() {
691            format!("{}:{} [parse]: {}", line, col, self.message)
692        } else {
693            format!(
694                "{}:{} [parse]: {}\n  | {}",
695                line, col, self.message, line_content
696            )
697        }
698    }
699}
700
701impl std::fmt::Display for ParseError {
702    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
703        write!(f, "{} at {:?}", self.message, self.span)
704    }
705}
706
707impl std::error::Error for ParseError {}
708
709/// Parse kaish source code into a Program AST.
710pub fn parse(source: &str) -> Result<Program, Vec<ParseError>> {
711    // Tokenize with logos
712    let tokens = lexer::tokenize(source).map_err(|errs| {
713        errs.into_iter()
714            .map(|e| ParseError {
715                span: (e.span.start..e.span.end).into(),
716                message: format!("lexer error: {}", e.token),
717            })
718            .collect::<Vec<_>>()
719    })?;
720
721    // Convert tokens to (Token, SimpleSpan) pairs
722    let tokens: Vec<(Token, Span)> = tokens
723        .into_iter()
724        .map(|spanned| (spanned.token, (spanned.span.start..spanned.span.end).into()))
725        .collect();
726
727    // End-of-input span
728    let end_span: Span = (source.len()..source.len()).into();
729
730    // Parse using slice-based input (like nano_rust example)
731    let parser = program_parser();
732    let result = parser.parse(tokens.as_slice().map(end_span, |(t, s)| (t, s)));
733
734    result.into_result().map_err(|errs| {
735        errs.into_iter()
736            .map(|e| ParseError {
737                span: *e.span(),
738                message: e.to_string(),
739            })
740            .collect()
741    })
742}
743
744/// Parse a single statement (useful for REPL).
745pub fn parse_statement(source: &str) -> Result<Stmt, Vec<ParseError>> {
746    let program = parse(source)?;
747    program
748        .statements
749        .into_iter()
750        .find(|s| !matches!(s, Stmt::Empty))
751        .ok_or_else(|| {
752            vec![ParseError {
753                span: (0..source.len()).into(),
754                message: "empty input".to_string(),
755            }]
756        })
757}
758
759// ═══════════════════════════════════════════════════════════════════════════
760// Parser Combinators - generic over input type
761// ═══════════════════════════════════════════════════════════════════════════
762
763/// Top-level program parser.
764fn program_parser<'tokens, 'src: 'tokens, I>(
765) -> impl Parser<'tokens, I, Program, extra::Err<Rich<'tokens, Token, Span>>>
766where
767    I: ValueInput<'tokens, Token = Token, Span = Span>,
768{
769    statement_parser()
770        .repeated()
771        .collect::<Vec<_>>()
772        .map(|statements| Program { statements })
773}
774
775/// Statement parser - dispatches based on leading token.
776/// Supports statement-level chaining with && and ||.
777fn statement_parser<'tokens, I>(
778) -> impl Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
779where
780    I: ValueInput<'tokens, Token = Token, Span = Span>,
781{
782    recursive(|stmt| {
783        let terminator = choice((just(Token::Newline), just(Token::Semi))).repeated();
784
785        // break [N] - break out of N levels of loops (default 1)
786        let break_stmt = just(Token::Break)
787            .ignore_then(
788                select! { Token::Int(n) => n as usize }.or_not()
789            )
790            .map(Stmt::Break);
791
792        // continue [N] - continue to next iteration, skipping N levels (default 1)
793        let continue_stmt = just(Token::Continue)
794            .ignore_then(
795                select! { Token::Int(n) => n as usize }.or_not()
796            )
797            .map(Stmt::Continue);
798
799        // return [expr] - return from a tool
800        let return_stmt = just(Token::Return)
801            .ignore_then(primary_expr_parser().or_not())
802            .map(|e| Stmt::Return(e.map(Box::new)));
803
804        // exit [code] - exit the script
805        let exit_stmt = just(Token::Exit)
806            .ignore_then(primary_expr_parser().or_not())
807            .map(|e| Stmt::Exit(e.map(Box::new)));
808
809        // set command: `set -e`, `set +e`, `set` (no args), `set -o pipefail`
810        // This must come BEFORE assignment_parser to handle `set -e` vs `X=value`
811        //
812        // Strategy: Use lookahead to check what follows `set`:
813        // - If followed by a flag (-e, --long, +e): parse as set command
814        // - If followed by identifier NOT followed by =: parse as set command (e.g., `set pipefail`)
815        // - If followed by nothing (end/newline/semi): parse as set command
816        // - If followed by identifier then =: let assignment_parser handle it
817        let set_flag_arg = choice((
818            select! { Token::ShortFlag(f) => Arg::ShortFlag(f) },
819            select! { Token::LongFlag(f) => Arg::LongFlag(f) },
820            // PlusFlag for +e, +x etc. - convert to positional arg with + prefix
821            select! { Token::PlusFlag(f) => Arg::Positional(Expr::Literal(Value::String(format!("+{}", f)))) },
822        ));
823
824        // set with flags: `set -e`, `set -e -u -o pipefail`
825        let set_with_flags = just(Token::Set)
826            .then(set_flag_arg)
827            .then(
828                choice((
829                    set_flag_arg,
830                    // Identifiers like 'pipefail' after -o
831                    ident_parser().map(|name| Arg::Positional(Expr::Literal(Value::String(name)))),
832                ))
833                .repeated()
834                .collect::<Vec<_>>(),
835            )
836            .map(|((_, first_arg), mut rest_args)| {
837                let mut args = vec![first_arg];
838                args.append(&mut rest_args);
839                Stmt::Command(Command {
840                    name: "set".to_string(),
841                    args,
842                    redirects: vec![],
843                })
844            });
845
846        // set with no args: `set` alone (shows settings)
847        // Must be followed by newline, semicolon, end of input, or a chaining operator (&&, ||)
848        let set_no_args = just(Token::Set)
849            .then(
850                choice((
851                    just(Token::Newline).to(()),
852                    just(Token::Semi).to(()),
853                    just(Token::And).to(()),
854                    just(Token::Or).to(()),
855                    end(),
856                ))
857                .rewind(),
858            )
859            .map(|_| Stmt::Command(Command {
860                name: "set".to_string(),
861                args: vec![],
862                redirects: vec![],
863            }));
864
865        // Try set_with_flags first (requires at least one flag)
866        // Then try set_no_args (no args, followed by terminator)
867        // If neither matches, fall through to assignment_parser
868        let set_command = set_with_flags.or(set_no_args);
869
870        // Base statement (without chaining)
871        let base_statement = choice((
872            just(Token::Newline).to(Stmt::Empty),
873            set_command,
874            assignment_parser().map(Stmt::Assignment),
875            // Shell-style functions (use $1, $2 positional params)
876            posix_function_parser(stmt.clone()).map(Stmt::ToolDef),  // name() { }
877            bash_function_parser(stmt.clone()).map(Stmt::ToolDef),   // function name { }
878            if_parser(stmt.clone()).map(Stmt::If),
879            for_parser(stmt.clone()).map(Stmt::For),
880            while_parser(stmt.clone()).map(Stmt::While),
881            case_parser(stmt.clone()).map(Stmt::Case),
882            break_stmt,
883            continue_stmt,
884            return_stmt,
885            exit_stmt,
886            test_expr_stmt_parser().map(Stmt::Test),
887            // Note: 'true' and 'false' are handled by command_parser/pipeline_parser
888            pipeline_parser().map(|p| {
889                // Unwrap single-command pipelines without background and without redirects
890                if p.commands.len() == 1 && !p.background {
891                    // Only unwrap if no redirects - redirects require pipeline processing
892                    if p.commands[0].redirects.is_empty() {
893                        // Safe: we just checked len == 1
894                        match p.commands.into_iter().next() {
895                            Some(cmd) => Stmt::Command(cmd),
896                            None => Stmt::Empty, // unreachable but safe
897                        }
898                    } else {
899                        Stmt::Pipeline(p)
900                    }
901                } else {
902                    Stmt::Pipeline(p)
903                }
904            }),
905        ))
906        .boxed();
907
908        // Statement chaining with precedence: && binds tighter than ||
909        // and_chain = base_stmt { "&&" base_stmt }
910        // or_chain  = and_chain { "||" and_chain }
911        let and_chain = base_statement
912            .clone()
913            .foldl(
914                just(Token::And).ignore_then(base_statement).repeated(),
915                |left, right| Stmt::AndChain {
916                    left: Box::new(left),
917                    right: Box::new(right),
918                },
919            );
920
921        and_chain
922            .clone()
923            .foldl(
924                just(Token::Or).ignore_then(and_chain).repeated(),
925                |left, right| Stmt::OrChain {
926                    left: Box::new(left),
927                    right: Box::new(right),
928                },
929            )
930            .then_ignore(terminator)
931    })
932}
933
934/// Assignment: `NAME=value` (bash-style) or `local NAME = value` (scoped)
935fn assignment_parser<'tokens, I>(
936) -> impl Parser<'tokens, I, Assignment, extra::Err<Rich<'tokens, Token, Span>>> + Clone
937where
938    I: ValueInput<'tokens, Token = Token, Span = Span>,
939{
940    // local NAME = value (with spaces around =)
941    let local_assignment = just(Token::Local)
942        .ignore_then(ident_parser())
943        .then_ignore(just(Token::Eq))
944        .then(expr_parser())
945        .map(|(name, value)| Assignment {
946            name,
947            value,
948            local: true,
949        });
950
951    // Bash-style: NAME=value (no spaces around =)
952    // The lexer produces IDENT EQ EXPR, so we parse it here
953    let bash_assignment = ident_parser()
954        .then_ignore(just(Token::Eq))
955        .then(expr_parser())
956        .map(|(name, value)| Assignment {
957            name,
958            value,
959            local: false,
960        });
961
962    choice((local_assignment, bash_assignment))
963        .labelled("assignment")
964        .boxed()
965}
966
967/// POSIX-style function: `name() { body }`
968///
969/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
970fn posix_function_parser<'tokens, I, S>(
971    stmt: S,
972) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
973where
974    I: ValueInput<'tokens, Token = Token, Span = Span>,
975    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
976{
977    ident_parser()
978        .then_ignore(just(Token::LParen))
979        .then_ignore(just(Token::RParen))
980        .then_ignore(just(Token::LBrace))
981        .then_ignore(just(Token::Newline).repeated())
982        .then(
983            stmt.repeated()
984                .collect::<Vec<_>>()
985                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
986        )
987        .then_ignore(just(Token::Newline).repeated())
988        .then_ignore(just(Token::RBrace))
989        .map(|(name, body)| ToolDef { name, params: vec![], body })
990        .labelled("POSIX function")
991        .boxed()
992}
993
994/// Bash-style function: `function name { body }` (without parens)
995///
996/// Produces a ToolDef with empty params - uses positional params ($1, $2, etc.)
997fn bash_function_parser<'tokens, I, S>(
998    stmt: S,
999) -> impl Parser<'tokens, I, ToolDef, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1000where
1001    I: ValueInput<'tokens, Token = Token, Span = Span>,
1002    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1003{
1004    just(Token::Function)
1005        .ignore_then(ident_parser())
1006        .then_ignore(just(Token::LBrace))
1007        .then_ignore(just(Token::Newline).repeated())
1008        .then(
1009            stmt.repeated()
1010                .collect::<Vec<_>>()
1011                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1012        )
1013        .then_ignore(just(Token::Newline).repeated())
1014        .then_ignore(just(Token::RBrace))
1015        .map(|(name, body)| ToolDef { name, params: vec![], body })
1016        .labelled("bash function")
1017        .boxed()
1018}
1019
1020/// If statement: `if COND; then STMTS [elif COND; then STMTS]* [else STMTS] fi`
1021///
1022/// elif clauses are desugared to nested if/else:
1023///   `if A; then X elif B; then Y else Z fi`
1024/// becomes:
1025///   `if A; then X else { if B; then Y else Z fi } fi`
1026fn if_parser<'tokens, I, S>(
1027    stmt: S,
1028) -> impl Parser<'tokens, I, IfStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1029where
1030    I: ValueInput<'tokens, Token = Token, Span = Span>,
1031    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1032{
1033    // Parse a single branch: condition + then statements
1034    let branch = condition_parser()
1035        .then_ignore(just(Token::Semi).or_not())
1036        .then_ignore(just(Token::Newline).repeated())
1037        .then_ignore(just(Token::Then))
1038        .then_ignore(just(Token::Newline).repeated())
1039        .then(
1040            stmt.clone()
1041                .repeated()
1042                .collect::<Vec<_>>()
1043                .map(|stmts: Vec<Stmt>| {
1044                    stmts
1045                        .into_iter()
1046                        .filter(|s| !matches!(s, Stmt::Empty))
1047                        .collect::<Vec<_>>()
1048                }),
1049        );
1050
1051    // Parse elif branches: `elif COND; then STMTS`
1052    let elif_branch = just(Token::Elif)
1053        .ignore_then(condition_parser())
1054        .then_ignore(just(Token::Semi).or_not())
1055        .then_ignore(just(Token::Newline).repeated())
1056        .then_ignore(just(Token::Then))
1057        .then_ignore(just(Token::Newline).repeated())
1058        .then(
1059            stmt.clone()
1060                .repeated()
1061                .collect::<Vec<_>>()
1062                .map(|stmts: Vec<Stmt>| {
1063                    stmts
1064                        .into_iter()
1065                        .filter(|s| !matches!(s, Stmt::Empty))
1066                        .collect::<Vec<_>>()
1067                }),
1068        );
1069
1070    // Parse else branch: `else STMTS`
1071    let else_branch = just(Token::Else)
1072        .ignore_then(just(Token::Newline).repeated())
1073        .ignore_then(stmt.repeated().collect::<Vec<_>>())
1074        .map(|stmts: Vec<Stmt>| {
1075            stmts
1076                .into_iter()
1077                .filter(|s| !matches!(s, Stmt::Empty))
1078                .collect::<Vec<_>>()
1079        });
1080
1081    just(Token::If)
1082        .ignore_then(branch)
1083        .then(elif_branch.repeated().collect::<Vec<_>>())
1084        .then(else_branch.or_not())
1085        .then_ignore(just(Token::Fi))
1086        .map(|(((condition, then_branch), elif_branches), else_branch)| {
1087            // Build nested if/else structure from elif branches
1088            build_if_chain(condition, then_branch, elif_branches, else_branch)
1089        })
1090        .labelled("if statement")
1091        .boxed()
1092}
1093
1094/// Build a nested IfStmt chain from elif branches.
1095///
1096/// Transforms:
1097///   if A then X elif B then Y elif C then Z else W fi
1098/// Into:
1099///   IfStmt { cond: A, then: X, else: Some([IfStmt { cond: B, then: Y, else: Some([IfStmt { cond: C, then: Z, else: Some(W) }]) }]) }
1100fn build_if_chain(
1101    condition: Expr,
1102    then_branch: Vec<Stmt>,
1103    mut elif_branches: Vec<(Expr, Vec<Stmt>)>,
1104    else_branch: Option<Vec<Stmt>>,
1105) -> IfStmt {
1106    if elif_branches.is_empty() {
1107        // No elif, just if/else
1108        IfStmt {
1109            condition: Box::new(condition),
1110            then_branch,
1111            else_branch,
1112        }
1113    } else {
1114        // Pop the first elif and recursively build the rest
1115        let (elif_cond, elif_then) = elif_branches.remove(0);
1116        let nested_if = build_if_chain(elif_cond, elif_then, elif_branches, else_branch);
1117        IfStmt {
1118            condition: Box::new(condition),
1119            then_branch,
1120            else_branch: Some(vec![Stmt::If(nested_if)]),
1121        }
1122    }
1123}
1124
1125/// For loop: `for VAR in ITEMS; do STMTS done`
1126fn for_parser<'tokens, I, S>(
1127    stmt: S,
1128) -> impl Parser<'tokens, I, ForLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1129where
1130    I: ValueInput<'tokens, Token = Token, Span = Span>,
1131    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1132{
1133    just(Token::For)
1134        .ignore_then(ident_parser())
1135        .then_ignore(just(Token::In))
1136        .then(expr_parser().repeated().at_least(1).collect::<Vec<_>>())
1137        .then_ignore(just(Token::Semi).or_not())
1138        .then_ignore(just(Token::Newline).repeated())
1139        .then_ignore(just(Token::Do))
1140        .then_ignore(just(Token::Newline).repeated())
1141        .then(
1142            stmt.repeated()
1143                .collect::<Vec<_>>()
1144                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1145        )
1146        .then_ignore(just(Token::Done))
1147        .map(|((variable, items), body)| ForLoop {
1148            variable,
1149            items,
1150            body,
1151        })
1152        .labelled("for loop")
1153        .boxed()
1154}
1155
1156/// While loop: `while condition; do ...; done`
1157fn while_parser<'tokens, I, S>(
1158    stmt: S,
1159) -> impl Parser<'tokens, I, WhileLoop, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1160where
1161    I: ValueInput<'tokens, Token = Token, Span = Span>,
1162    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1163{
1164    just(Token::While)
1165        .ignore_then(condition_parser())
1166        .then_ignore(just(Token::Semi).or_not())
1167        .then_ignore(just(Token::Newline).repeated())
1168        .then_ignore(just(Token::Do))
1169        .then_ignore(just(Token::Newline).repeated())
1170        .then(
1171            stmt.repeated()
1172                .collect::<Vec<_>>()
1173                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1174        )
1175        .then_ignore(just(Token::Done))
1176        .map(|(condition, body)| WhileLoop {
1177            condition: Box::new(condition),
1178            body,
1179        })
1180        .labelled("while loop")
1181        .boxed()
1182}
1183
1184/// Case statement: `case expr in pattern) commands ;; esac`
1185///
1186/// Supports:
1187/// - Single patterns: `pattern) commands ;;`
1188/// - Multiple patterns: `pattern1|pattern2) commands ;;`
1189/// - Optional leading `(` before patterns: `(pattern) commands ;;`
1190fn case_parser<'tokens, I, S>(
1191    stmt: S,
1192) -> impl Parser<'tokens, I, CaseStmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1193where
1194    I: ValueInput<'tokens, Token = Token, Span = Span>,
1195    S: Parser<'tokens, I, Stmt, extra::Err<Rich<'tokens, Token, Span>>> + Clone + 'tokens,
1196{
1197    // Pattern part: individual tokens that make up a glob pattern
1198    // e.g., "*.rs" is Star + Dot + Ident("rs")
1199    let pattern_part = choice((
1200        select! { Token::GlobWord(s) => s },
1201        select! { Token::Ident(s) => s },
1202        select! { Token::NumberIdent(s) => s },
1203        select! { Token::DottedIdent(s) => s },
1204        select! { Token::String(s) => s },
1205        select! { Token::SingleString(s) => s },
1206        select! { Token::Int(n) => n.to_string() },
1207        select! { Token::Star => "*".to_string() },
1208        select! { Token::Question => "?".to_string() },
1209        select! { Token::Dot => ".".to_string() },
1210        select! { Token::DotDot => "..".to_string() },
1211        select! { Token::Tilde => "~".to_string() },
1212        select! { Token::TildePath(s) => s },
1213        select! { Token::RelativePath(s) => s },
1214        select! { Token::DotSlashPath(s) => s },
1215        select! { Token::Path(p) => p },
1216        select! { Token::VarRef(v) => v },
1217        select! { Token::SimpleVarRef(v) => format!("${}", v) },
1218        // Character class: [a-z], [!abc], [^abc], etc.
1219        just(Token::LBracket)
1220            .ignore_then(
1221                choice((
1222                    select! { Token::Ident(s) => s },
1223                    select! { Token::Int(n) => n.to_string() },
1224                    just(Token::Colon).to(":".to_string()),
1225                    // Negation: ! or ^ at start of char class
1226                    just(Token::Bang).to("!".to_string()),
1227                    // Range like a-z
1228                    select! { Token::ShortFlag(s) => format!("-{}", s) },
1229                ))
1230                .repeated()
1231                .at_least(1)
1232                .collect::<Vec<String>>()
1233            )
1234            .then_ignore(just(Token::RBracket))
1235            .map(|parts| format!("[{}]", parts.join(""))),
1236        // Brace expansion: {a,b,c} or {js,ts}
1237        just(Token::LBrace)
1238            .ignore_then(
1239                choice((
1240                    select! { Token::Ident(s) => s },
1241                    select! { Token::Int(n) => n.to_string() },
1242                ))
1243                .separated_by(just(Token::Comma))
1244                .at_least(1)
1245                .collect::<Vec<String>>()
1246            )
1247            .then_ignore(just(Token::RBrace))
1248            .map(|parts| format!("{{{}}}", parts.join(","))),
1249    ));
1250
1251    // A complete pattern is one or more pattern parts joined together
1252    // e.g., "*.rs" = Star + Dot + Ident
1253    let pattern = pattern_part
1254        .repeated()
1255        .at_least(1)
1256        .collect::<Vec<String>>()
1257        .map(|parts| parts.join(""))
1258        .labelled("case pattern");
1259
1260    // Multiple patterns separated by pipe: `pattern1 | pattern2`
1261    let patterns = pattern
1262        .separated_by(just(Token::Pipe))
1263        .at_least(1)
1264        .collect::<Vec<String>>()
1265        .labelled("case patterns");
1266
1267    // Branch: `[( ] patterns ) commands ;;`
1268    let branch = just(Token::LParen)
1269        .or_not()
1270        .ignore_then(just(Token::Newline).repeated())
1271        .ignore_then(patterns)
1272        .then_ignore(just(Token::RParen))
1273        .then_ignore(just(Token::Newline).repeated())
1274        .then(
1275            stmt.clone()
1276                .repeated()
1277                .collect::<Vec<_>>()
1278                .map(|stmts| stmts.into_iter().filter(|s| !matches!(s, Stmt::Empty)).collect()),
1279        )
1280        .then_ignore(just(Token::DoubleSemi))
1281        .then_ignore(just(Token::Newline).repeated())
1282        .map(|(patterns, body)| CaseBranch { patterns, body })
1283        .labelled("case branch");
1284
1285    just(Token::Case)
1286        .ignore_then(expr_parser())
1287        .then_ignore(just(Token::In))
1288        .then_ignore(just(Token::Newline).repeated())
1289        .then(branch.repeated().collect::<Vec<_>>())
1290        .then_ignore(just(Token::Esac))
1291        .map(|(expr, branches)| CaseStmt { expr, branches })
1292        .labelled("case statement")
1293        .boxed()
1294}
1295
1296/// Pipeline: `cmd | cmd | cmd [&]`
1297fn pipeline_parser<'tokens, I>(
1298) -> impl Parser<'tokens, I, Pipeline, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1299where
1300    I: ValueInput<'tokens, Token = Token, Span = Span>,
1301{
1302    command_parser()
1303        .separated_by(just(Token::Pipe))
1304        .at_least(1)
1305        .collect::<Vec<_>>()
1306        .then(just(Token::Amp).or_not())
1307        .map(|(commands, bg)| Pipeline {
1308            commands,
1309            background: bg.is_some(),
1310        })
1311        .labelled("pipeline")
1312        .boxed()
1313}
1314
1315/// Command: `name args... [redirects...]`
1316/// Command names can be identifiers, 'true', 'false', or '.' (source alias).
1317fn command_parser<'tokens, I>(
1318) -> impl Parser<'tokens, I, Command, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1319where
1320    I: ValueInput<'tokens, Token = Token, Span = Span>,
1321{
1322    // Command name can be an identifier, path, 'true', 'false', '.' (source alias), or ./path
1323    let command_name = choice((
1324        ident_parser(),
1325        path_parser(),
1326        select! { Token::DotSlashPath(s) => s },
1327        just(Token::True).to("true".to_string()),
1328        just(Token::False).to("false".to_string()),
1329        just(Token::Dot).to(".".to_string()),
1330    ));
1331
1332    command_name
1333        .then(args_list_parser())
1334        .then(redirect_parser().repeated().collect::<Vec<_>>())
1335        .try_map(|((name, args), redirects), span| {
1336            // At most one stdin-source redirect per command. Multiple `<<<`,
1337            // or mixing `<` with `<<` or `<<<`, would silently depend on
1338            // ordering — reject loudly instead.
1339            let stdin_sources = redirects
1340                .iter()
1341                .filter(|r| {
1342                    matches!(
1343                        r.kind,
1344                        RedirectKind::Stdin
1345                            | RedirectKind::HereDoc
1346                            | RedirectKind::HereString
1347                    )
1348                })
1349                .count();
1350            if stdin_sources > 1 {
1351                return Err(Rich::custom(
1352                    span,
1353                    "multiple stdin redirects on one command are ambiguous; \
1354                     use exactly one of `<`, `<<`, or `<<<`",
1355                ));
1356            }
1357            Ok(Command {
1358                name,
1359                args,
1360                redirects,
1361            })
1362        })
1363        .labelled("command")
1364        .boxed()
1365}
1366
1367/// Arguments list parser that handles `--` flag terminator.
1368///
1369/// After `--`, all subsequent flags are converted to positional string arguments.
1370fn args_list_parser<'tokens, I>(
1371) -> impl Parser<'tokens, I, Vec<Arg>, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1372where
1373    I: ValueInput<'tokens, Token = Token, Span = Span>,
1374{
1375    // Arguments before `--` (normal parsing)
1376    let pre_dash = arg_before_double_dash_parser()
1377        .repeated()
1378        .collect::<Vec<_>>();
1379
1380    // The `--` marker itself
1381    let double_dash = select! {
1382        Token::DoubleDash => Arg::DoubleDash,
1383    };
1384
1385    // Arguments after `--` (flags become positional strings)
1386    let post_dash_arg = choice((
1387        // Flags become positional strings
1388        select! {
1389            Token::ShortFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("-{}", name)))),
1390            Token::LongFlag(name) => Arg::Positional(Expr::Literal(Value::String(format!("--{}", name)))),
1391        },
1392        // Everything else stays the same
1393        primary_expr_parser().map(Arg::Positional),
1394    ));
1395
1396    let post_dash = post_dash_arg.repeated().collect::<Vec<_>>();
1397
1398    // Combine: args_before ++ [--] ++ args_after
1399    pre_dash
1400        .then(double_dash.then(post_dash).or_not())
1401        .map(|(mut args, maybe_dd)| {
1402            if let Some((dd, post)) = maybe_dd {
1403                args.push(dd);
1404                args.extend(post);
1405            }
1406            args
1407        })
1408}
1409
1410/// Argument parser for arguments before `--` (normal flag handling).
1411fn arg_before_double_dash_parser<'tokens, I>(
1412) -> impl Parser<'tokens, I, Arg, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1413where
1414    I: ValueInput<'tokens, Token = Token, Span = Span>,
1415{
1416    // Long flag with value: --name=value
1417    let long_flag_with_value = select! {
1418        Token::LongFlag(name) => name,
1419    }
1420    .then_ignore(just(Token::Eq))
1421    .then(primary_expr_parser())
1422    .map(|(key, value)| Arg::Named { key, value });
1423
1424    // Boolean long flag: --name
1425    let long_flag = select! {
1426        Token::LongFlag(name) => Arg::LongFlag(name),
1427    };
1428
1429    // Boolean short flag: -x
1430    let short_flag = select! {
1431        Token::ShortFlag(name) => Arg::ShortFlag(name),
1432    };
1433
1434    // Named argument: name=value (must not have spaces around =)
1435    // We use map_with to capture spans and validate adjacency
1436    let named = select! {
1437        Token::Ident(s) => s,
1438    }
1439    .map_with(|s, e| -> (String, Span) { (s, e.span()) })
1440    .then(just(Token::Eq).map_with(|_, e| -> Span { e.span() }))
1441    .then(primary_expr_parser().map_with(|expr, e| -> (Expr, Span) { (expr, e.span()) }))
1442    .try_map(|(((key, key_span), eq_span), (value, value_span)): (((String, Span), Span), (Expr, Span)), span| {
1443        // Check that key ends where = starts and = ends where value starts
1444        if key_span.end != eq_span.start || eq_span.end != value_span.start {
1445            Err(Rich::custom(
1446                span,
1447                "named argument must not have spaces around '=' (use 'key=value' not 'key = value')",
1448            ))
1449        } else {
1450            Ok(Arg::Named { key, value })
1451        }
1452    });
1453
1454    // Positional argument
1455    let positional = primary_expr_parser().map(Arg::Positional);
1456
1457    // Order matters: try more specific patterns first
1458    // Note: DoubleDash is NOT included here - it's handled by args_list_parser
1459    choice((
1460        long_flag_with_value,
1461        long_flag,
1462        short_flag,
1463        named,
1464        positional,
1465    ))
1466    .boxed()
1467}
1468
1469/// Redirect: `> file`, `>> file`, `< file`, `<< heredoc`, `2> file`, `&> file`, `2>&1`
1470fn redirect_parser<'tokens, I>(
1471) -> impl Parser<'tokens, I, Redirect, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1472where
1473    I: ValueInput<'tokens, Token = Token, Span = Span>,
1474{
1475    // Regular redirects: >, >>, <, 2>, &>
1476    let regular_redirect = select! {
1477        Token::GtGt => RedirectKind::StdoutAppend,
1478        Token::Gt => RedirectKind::StdoutOverwrite,
1479        Token::Lt => RedirectKind::Stdin,
1480        Token::Stderr => RedirectKind::Stderr,
1481        Token::Both => RedirectKind::Both,
1482    }
1483    .then(primary_expr_parser())
1484    .map(|(kind, target)| Redirect { kind, target });
1485
1486    // Here-doc redirect: << content
1487    // Quoted delimiters (<<'EOF' or <<"EOF") produce literal heredocs (no expansion).
1488    // Unquoted delimiters produce interpolated heredocs (variables are expanded).
1489    // For literal heredocs the `<<-EOF` tab stripping is applied here at parse
1490    // time (the body is fully known); for interpolated heredocs the stripping
1491    // is deferred to the interpreter so source byte offsets in `parts` stay
1492    // aligned with the original source for span reporting.
1493    let heredoc_redirect = just(Token::HereDocStart)
1494        .ignore_then(select! { Token::HereDoc(data) => data })
1495        .map(|data: HereDocData| {
1496            let target = if data.literal {
1497                let body = if data.strip_tabs {
1498                    crate::interpreter::strip_leading_tabs(&data.content)
1499                } else {
1500                    data.content
1501                };
1502                Expr::Literal(Value::String(body))
1503            } else {
1504                let parts = parse_interpolated_string_spanned(
1505                    &data.content,
1506                    data.body_start_offset,
1507                );
1508                // If there's only one literal part and no tab stripping is
1509                // needed, simplify to Expr::Literal — keeps the AST shape
1510                // identical to the pre-spans path for trivial bodies.
1511                if parts.len() == 1 && !data.strip_tabs {
1512                    if let StringPart::Literal(text) = &parts[0].part {
1513                        return Redirect {
1514                            kind: RedirectKind::HereDoc,
1515                            target: Expr::Literal(Value::String(text.clone())),
1516                        };
1517                    }
1518                }
1519                Expr::HereDocBody {
1520                    parts,
1521                    strip_tabs: data.strip_tabs,
1522                }
1523            };
1524            Redirect {
1525                kind: RedirectKind::HereDoc,
1526                target,
1527            }
1528        });
1529
1530    // Here-string redirect: <<< word
1531    // The target is any single expression; kaish's existing Expr machinery
1532    // handles interpolation, single-quoted literals, and command substitution.
1533    let herestring_redirect = just(Token::HereString)
1534        .ignore_then(primary_expr_parser())
1535        .map(|target| Redirect {
1536            kind: RedirectKind::HereString,
1537            target,
1538        });
1539
1540    // Merge stderr to stdout: 2>&1 (no target needed - implicit)
1541    let merge_stderr_redirect = just(Token::StderrToStdout)
1542        .map(|_| Redirect {
1543            kind: RedirectKind::MergeStderr,
1544            // Target is unused for MergeStderr, but we need something
1545            target: Expr::Literal(Value::Null),
1546        });
1547
1548    // Merge stdout to stderr: 1>&2 or >&2 (no target needed - implicit)
1549    let merge_stdout_redirect = choice((
1550        just(Token::StdoutToStderr),
1551        just(Token::StdoutToStderr2),
1552    ))
1553    .map(|_| Redirect {
1554        kind: RedirectKind::MergeStdout,
1555        // Target is unused for MergeStdout, but we need something
1556        target: Expr::Literal(Value::Null),
1557    });
1558
1559    choice((
1560        heredoc_redirect,
1561        herestring_redirect,
1562        merge_stderr_redirect,
1563        merge_stdout_redirect,
1564        regular_redirect,
1565    ))
1566    .labelled("redirect")
1567    .boxed()
1568}
1569
1570/// Test expression parser for `[[ ... ]]` syntax.
1571///
1572/// Supports:
1573/// - File tests: `[[ -f path ]]`, `[[ -d path ]]`, etc.
1574/// - String tests: `[[ -z str ]]`, `[[ -n str ]]`
1575/// - Comparisons: `[[ $X == "value" ]]`, `[[ $NUM -gt 5 ]]`
1576/// - Compound: `[[ -f a && -d b ]]`, `[[ -z x || -n y ]]`, `[[ ! -f file ]]`
1577///
1578/// Precedence (highest to lowest): `!` > `&&` > `||`
1579fn test_expr_stmt_parser<'tokens, I>(
1580) -> impl Parser<'tokens, I, TestExpr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1581where
1582    I: ValueInput<'tokens, Token = Token, Span = Span>,
1583{
1584    // File test operators: -e, -f, -d, -r, -w, -x
1585    let file_test_op = select! {
1586        Token::ShortFlag(s) if s == "e" => FileTestOp::Exists,
1587        Token::ShortFlag(s) if s == "f" => FileTestOp::IsFile,
1588        Token::ShortFlag(s) if s == "d" => FileTestOp::IsDir,
1589        Token::ShortFlag(s) if s == "r" => FileTestOp::Readable,
1590        Token::ShortFlag(s) if s == "w" => FileTestOp::Writable,
1591        Token::ShortFlag(s) if s == "x" => FileTestOp::Executable,
1592    };
1593
1594    // String test operators: -z, -n
1595    let string_test_op = select! {
1596        Token::ShortFlag(s) if s == "z" => StringTestOp::IsEmpty,
1597        Token::ShortFlag(s) if s == "n" => StringTestOp::IsNonEmpty,
1598    };
1599
1600    // Comparison operators: =, ==, !=, =~, !~, >, <, >=, <=, -gt, -lt, -ge, -le, -eq, -ne
1601    // Note: = and == are equivalent inside [[ ]] (matching bash behavior)
1602    let cmp_op = choice((
1603        just(Token::EqEq).to(TestCmpOp::Eq),
1604        just(Token::Eq).to(TestCmpOp::Eq),
1605        just(Token::NotEq).to(TestCmpOp::NotEq),
1606        just(Token::Match).to(TestCmpOp::Match),
1607        just(Token::NotMatch).to(TestCmpOp::NotMatch),
1608        just(Token::Gt).to(TestCmpOp::Gt),
1609        just(Token::Lt).to(TestCmpOp::Lt),
1610        just(Token::GtEq).to(TestCmpOp::GtEq),
1611        just(Token::LtEq).to(TestCmpOp::LtEq),
1612        select! { Token::ShortFlag(s) if s == "eq" => TestCmpOp::NumEq },
1613        select! { Token::ShortFlag(s) if s == "ne" => TestCmpOp::NumNotEq },
1614        select! { Token::ShortFlag(s) if s == "gt" => TestCmpOp::NumGt },
1615        select! { Token::ShortFlag(s) if s == "lt" => TestCmpOp::NumLt },
1616        select! { Token::ShortFlag(s) if s == "ge" => TestCmpOp::NumGtEq },
1617        select! { Token::ShortFlag(s) if s == "le" => TestCmpOp::NumLtEq },
1618    ));
1619
1620    // File test: -f path
1621    let file_test = file_test_op
1622        .then(primary_expr_parser())
1623        .map(|(op, path)| TestExpr::FileTest {
1624            op,
1625            path: Box::new(path),
1626        });
1627
1628    // String test: -z str
1629    let string_test = string_test_op
1630        .then(primary_expr_parser())
1631        .map(|(op, value)| TestExpr::StringTest {
1632            op,
1633            value: Box::new(value),
1634        });
1635
1636    // Comparison: $X == "value" or $NUM -gt 5
1637    let comparison = primary_expr_parser()
1638        .then(cmp_op)
1639        .then(primary_expr_parser())
1640        .map(|((left, op), right)| TestExpr::Comparison {
1641            left: Box::new(left),
1642            op,
1643            right: Box::new(right),
1644        });
1645
1646    // Primary test expression (atomic - no compound operators)
1647    let primary_test = choice((file_test, string_test, comparison));
1648
1649    // Build compound expressions with proper precedence:
1650    // Grammar:
1651    //   test_expr = or_expr
1652    //   or_expr   = and_expr { "||" and_expr }
1653    //   and_expr  = unary_expr { "&&" unary_expr }
1654    //   unary_expr = "!" unary_expr | primary_test
1655    //
1656    // Precedence: ! (highest) > && > ||
1657
1658    // Use recursive for the unary NOT operator
1659    let compound_test = recursive(|compound| {
1660        // Unary NOT: ! expr (can be chained: ! ! expr)
1661        let not_expr = just(Token::Bang)
1662            .ignore_then(compound.clone())
1663            .map(|expr| TestExpr::Not { expr: Box::new(expr) });
1664
1665        // Unary level: ! or primary
1666        let unary = choice((not_expr, primary_test.clone()));
1667
1668        // AND level: unary && unary && ...
1669        let and_expr = unary.clone().foldl(
1670            just(Token::And).ignore_then(unary).repeated(),
1671            |left, right| TestExpr::And {
1672                left: Box::new(left),
1673                right: Box::new(right),
1674            },
1675        );
1676
1677        // OR level: and_expr || and_expr || ...
1678        and_expr.clone().foldl(
1679            just(Token::Or).ignore_then(and_expr).repeated(),
1680            |left, right| TestExpr::Or {
1681                left: Box::new(left),
1682                right: Box::new(right),
1683            },
1684        )
1685    });
1686
1687    // [[ ]] is two consecutive bracket tokens (not a single TestStart token)
1688    // to avoid conflicts with nested array syntax like [[1, 2], [3, 4]]
1689    just(Token::LBracket)
1690        .then(just(Token::LBracket))
1691        .ignore_then(compound_test)
1692        .then_ignore(just(Token::RBracket).then(just(Token::RBracket)))
1693        .labelled("test expression")
1694        .boxed()
1695}
1696
1697/// Condition parser: supports [[ ]] test expressions and commands with && / || chaining.
1698///
1699/// Shell semantics: conditions are commands whose exit codes determine truthiness.
1700/// - `if true; then` → runs `true` builtin, exit code 0 = truthy
1701/// - `if grep -q pattern file; then` → runs command, checks exit code
1702/// - `if a && b; then` → runs `a`, if exit 0, runs `b`
1703///
1704/// Use `[[ ]]` for comparisons: `if [[ $X -gt 5 ]]; then`
1705///
1706/// Grammar (with precedence - && binds tighter than ||):
1707///   condition = or_expr
1708///   or_expr   = and_expr { "||" and_expr }
1709///   and_expr  = base { "&&" base }
1710///   base      = test_expr | command
1711fn condition_parser<'tokens, I>(
1712) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1713where
1714    I: ValueInput<'tokens, Token = Token, Span = Span>,
1715{
1716    // [[ ]] test expression - wrap as Expr::Test
1717    let test_expr_condition = test_expr_stmt_parser().map(|test| Expr::Test(Box::new(test)));
1718
1719    // Command as condition (includes true/false as command names)
1720    // The command's exit code determines truthiness (0 = true, non-zero = false)
1721    let command_condition = command_parser().map(Expr::Command);
1722
1723    // Base: test expr OR command
1724    let base = choice((test_expr_condition, command_condition));
1725
1726    // && has higher precedence than ||
1727    // First chain with && (higher precedence)
1728    let and_expr = base.clone().foldl(
1729        just(Token::And).ignore_then(base).repeated(),
1730        |left, right| Expr::BinaryOp {
1731            left: Box::new(left),
1732            op: BinaryOp::And,
1733            right: Box::new(right),
1734        },
1735    );
1736
1737    // Then chain with || (lower precedence)
1738    and_expr
1739        .clone()
1740        .foldl(
1741            just(Token::Or).ignore_then(and_expr).repeated(),
1742            |left, right| Expr::BinaryOp {
1743                left: Box::new(left),
1744                op: BinaryOp::Or,
1745                right: Box::new(right),
1746            },
1747        )
1748        .labelled("condition")
1749        .boxed()
1750}
1751
1752/// Expression parser - supports && and || binary operators.
1753fn expr_parser<'tokens, I>(
1754) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1755where
1756    I: ValueInput<'tokens, Token = Token, Span = Span>,
1757{
1758    // For now, just primary expressions. Can extend for && / || later if needed.
1759    primary_expr_parser()
1760}
1761
1762/// Primary expression: literal, variable reference, command substitution, or bare identifier.
1763///
1764/// Uses `recursive` to support nested command substitution like `$(echo $(date))`.
1765fn primary_expr_parser<'tokens, I>(
1766) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1767where
1768    I: ValueInput<'tokens, Token = Token, Span = Span>,
1769{
1770    // Positional parameters: $0-$9, $@, $#, ${#VAR}, $?, $$
1771    let positional = select! {
1772        Token::Positional(n) => Expr::Positional(n),
1773        Token::AllArgs => Expr::AllArgs,
1774        Token::ArgCount => Expr::ArgCount,
1775        Token::VarLength(name) => Expr::VarLength(name),
1776        Token::LastExitCode => Expr::LastExitCode,
1777        Token::CurrentPid => Expr::CurrentPid,
1778    };
1779
1780    // Arithmetic expression: $((expr)) - preprocessed into Arithmetic token
1781    let arithmetic = select! {
1782        Token::Arithmetic(expr_str) => Expr::Arithmetic(expr_str),
1783    };
1784
1785    // Keywords that can also be used as barewords in argument position
1786    // (e.g., `echo done` should work even though `done` is a keyword)
1787    let keyword_as_bareword = select! {
1788        Token::Done => "done",
1789        Token::Fi => "fi",
1790        Token::Then => "then",
1791        Token::Else => "else",
1792        Token::Elif => "elif",
1793        Token::In => "in",
1794        Token::Do => "do",
1795        Token::Esac => "esac",
1796    }
1797    .map(|s| Expr::Literal(Value::String(s.to_string())));
1798
1799    // Bare words starting with + or - (e.g., date +%s, cat -)
1800    let plus_minus_bare = select! {
1801        Token::PlusBare(s) => Expr::Literal(Value::String(s)),
1802        Token::MinusBare(s) => Expr::Literal(Value::String(s)),
1803        Token::MinusAlone => Expr::Literal(Value::String("-".to_string())),
1804    };
1805
1806    // Glob patterns: merged GlobWord tokens and bare Star/Question
1807    let glob_pattern = select! {
1808        Token::GlobWord(s) => Expr::GlobPattern(s),
1809        Token::Star => Expr::GlobPattern("*".to_string()),
1810        Token::Question => Expr::GlobPattern("?".to_string()),
1811    };
1812
1813    recursive(|expr| {
1814        choice((
1815            positional,
1816            arithmetic,
1817            cmd_subst_parser(expr.clone()),
1818            var_expr_parser(),
1819            interpolated_string_parser(),
1820            literal_parser().map(Expr::Literal),
1821            // Glob patterns before ident (GlobWord is more specific)
1822            glob_pattern,
1823            // Bare identifiers become string literals (shell barewords)
1824            ident_parser().map(|s| Expr::Literal(Value::String(s))),
1825            // Absolute paths become string literals
1826            path_parser().map(|s| Expr::Literal(Value::String(s))),
1827            // Bare words starting with + or - (date +%s, cat -)
1828            // Shell navigation tokens
1829            select! {
1830                Token::DotDot => Expr::Literal(Value::String("..".into())),
1831                Token::Tilde => Expr::Literal(Value::String("~".into())),
1832                Token::TildePath(s) => Expr::Literal(Value::String(s)),
1833                Token::RelativePath(s) => Expr::Literal(Value::String(s)),
1834                Token::DotSlashPath(s) => Expr::Literal(Value::String(s)),
1835                // Digit-leading bareword (SHA prefix `019dda1c`, UUIDs).
1836                Token::NumberIdent(s) => Expr::Literal(Value::String(s)),
1837                // Dot-prefixed bareword (`.gitignore`, `.parent`, `.parent.parent`).
1838                // Distinct from `Token::Dot` (the source alias), which only
1839                // matches a bare `.` and requires whitespace before its file
1840                // argument.
1841                Token::DottedIdent(s) => Expr::Literal(Value::String(s)),
1842            },
1843            plus_minus_bare,
1844            // Keywords can be used as barewords in argument position
1845            keyword_as_bareword,
1846        ))
1847        .labelled("expression")
1848    })
1849    .boxed()
1850}
1851
1852/// Variable reference: `${VAR}`, `${VAR.field}`, `${VAR:-default}`, or `$VAR` (simple form).
1853/// Returns Expr directly to support both VarRef and VarWithDefault.
1854fn var_expr_parser<'tokens, I>(
1855) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1856where
1857    I: ValueInput<'tokens, Token = Token, Span = Span>,
1858{
1859    select! {
1860        Token::VarRef(raw) => parse_var_expr(&raw),
1861        Token::SimpleVarRef(name) => Expr::VarRef(VarPath::simple(name)),
1862    }
1863    .labelled("variable reference")
1864}
1865
1866/// Command substitution: `$(pipeline)` - runs a pipeline and returns its result.
1867///
1868/// Accepts a recursive expression parser to support nested command substitution.
1869fn cmd_subst_parser<'tokens, I, E>(
1870    expr: E,
1871) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1872where
1873    I: ValueInput<'tokens, Token = Token, Span = Span>,
1874    E: Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone,
1875{
1876    // Argument parser using the recursive expression parser
1877    // Long flag with value: --name=value
1878    let long_flag_with_value = select! {
1879        Token::LongFlag(name) => name,
1880    }
1881    .then_ignore(just(Token::Eq))
1882    .then(expr.clone())
1883    .map(|(key, value)| Arg::Named { key, value });
1884
1885    // Boolean long flag: --name
1886    let long_flag = select! {
1887        Token::LongFlag(name) => Arg::LongFlag(name),
1888    };
1889
1890    // Boolean short flag: -x
1891    let short_flag = select! {
1892        Token::ShortFlag(name) => Arg::ShortFlag(name),
1893    };
1894
1895    // Named argument: name=value
1896    let named = ident_parser()
1897        .then_ignore(just(Token::Eq))
1898        .then(expr.clone())
1899        .map(|(key, value)| Arg::Named { key, value });
1900
1901    // Positional argument
1902    let positional = expr.map(Arg::Positional);
1903
1904    let arg = choice((
1905        long_flag_with_value,
1906        long_flag,
1907        short_flag,
1908        named,
1909        positional,
1910    ));
1911
1912    // Command name parser - accepts identifiers and boolean keywords (true/false are builtins)
1913    let command_name = choice((
1914        ident_parser(),
1915        just(Token::True).to("true".to_string()),
1916        just(Token::False).to("false".to_string()),
1917    ));
1918
1919    // Command parser
1920    let command = command_name
1921        .then(arg.repeated().collect::<Vec<_>>())
1922        .map(|(name, args)| Command {
1923            name,
1924            args,
1925            redirects: vec![],
1926        });
1927
1928    // Pipeline parser
1929    let pipeline = command
1930        .separated_by(just(Token::Pipe))
1931        .at_least(1)
1932        .collect::<Vec<_>>()
1933        .map(|commands| Pipeline {
1934            commands,
1935            background: false,
1936        });
1937
1938    just(Token::CmdSubstStart)
1939        .ignore_then(pipeline)
1940        .then_ignore(just(Token::RParen))
1941        .map(|pipeline| Expr::CommandSubst(Box::new(pipeline)))
1942        .labelled("command substitution")
1943}
1944
1945/// String parser - handles double-quoted strings (with interpolation) and single-quoted (literal).
1946fn interpolated_string_parser<'tokens, I>(
1947) -> impl Parser<'tokens, I, Expr, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1948where
1949    I: ValueInput<'tokens, Token = Token, Span = Span>,
1950{
1951    // Double-quoted string: may contain $VAR or ${VAR} interpolation
1952    let double_quoted = select! {
1953        Token::String(s) => s,
1954    }
1955    .map(|s| {
1956        // Check if string contains interpolation markers (${} or $NAME) or escaped dollars
1957        if s.contains('$') || s.contains("__KAISH_ESCAPED_DOLLAR__") {
1958            // Parse interpolated parts
1959            let parts = parse_interpolated_string(&s);
1960            if parts.len() == 1
1961                && let StringPart::Literal(text) = &parts[0] {
1962                    return Expr::Literal(Value::String(text.clone()));
1963                }
1964            Expr::Interpolated(parts)
1965        } else {
1966            Expr::Literal(Value::String(s))
1967        }
1968    });
1969
1970    // Single-quoted string: literal, no interpolation
1971    let single_quoted = select! {
1972        Token::SingleString(s) => Expr::Literal(Value::String(s)),
1973    };
1974
1975    choice((single_quoted, double_quoted)).labelled("string")
1976}
1977
1978/// Literal value parser (excluding strings, which are handled by interpolated_string_parser).
1979fn literal_parser<'tokens, I>(
1980) -> impl Parser<'tokens, I, Value, extra::Err<Rich<'tokens, Token, Span>>> + Clone
1981where
1982    I: ValueInput<'tokens, Token = Token, Span = Span>,
1983{
1984    choice((
1985        select! {
1986            Token::True => Value::Bool(true),
1987            Token::False => Value::Bool(false),
1988        },
1989        select! {
1990            Token::Int(n) => Value::Int(n),
1991            Token::Float(f) => Value::Float(f),
1992        },
1993    ))
1994    .labelled("literal")
1995    .boxed()
1996}
1997
1998/// Identifier parser.
1999fn ident_parser<'tokens, I>(
2000) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2001where
2002    I: ValueInput<'tokens, Token = Token, Span = Span>,
2003{
2004    select! {
2005        Token::Ident(s) => s,
2006    }
2007    .labelled("identifier")
2008}
2009
2010/// Path parser: matches absolute paths like `/tmp/out`, `/etc/hosts`.
2011fn path_parser<'tokens, I>(
2012) -> impl Parser<'tokens, I, String, extra::Err<Rich<'tokens, Token, Span>>> + Clone
2013where
2014    I: ValueInput<'tokens, Token = Token, Span = Span>,
2015{
2016    select! {
2017        Token::Path(s) => s,
2018    }
2019    .labelled("path")
2020}
2021
2022#[cfg(test)]
2023mod tests {
2024    use super::*;
2025
2026    #[test]
2027    fn parse_empty() {
2028        let result = parse("");
2029        assert!(result.is_ok());
2030        assert_eq!(result.expect("ok").statements.len(), 0);
2031    }
2032
2033    #[test]
2034    fn parse_newlines_only() {
2035        let result = parse("\n\n\n");
2036        assert!(result.is_ok());
2037    }
2038
2039    #[test]
2040    fn parse_simple_command() {
2041        let result = parse("echo");
2042        assert!(result.is_ok());
2043        let program = result.expect("ok");
2044        assert_eq!(program.statements.len(), 1);
2045        assert!(matches!(&program.statements[0], Stmt::Command(_)));
2046    }
2047
2048    #[test]
2049    fn parse_command_with_string_arg() {
2050        let result = parse(r#"echo "hello""#);
2051        assert!(result.is_ok());
2052        let program = result.expect("ok");
2053        match &program.statements[0] {
2054            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 1),
2055            _ => panic!("expected Command"),
2056        }
2057    }
2058
2059    #[test]
2060    fn parse_assignment() {
2061        let result = parse("X=5");
2062        assert!(result.is_ok());
2063        let program = result.expect("ok");
2064        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
2065    }
2066
2067    #[test]
2068    fn parse_pipeline() {
2069        let result = parse("a | b | c");
2070        assert!(result.is_ok());
2071        let program = result.expect("ok");
2072        match &program.statements[0] {
2073            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2074            _ => panic!("expected Pipeline"),
2075        }
2076    }
2077
2078    #[test]
2079    fn parse_background_job() {
2080        let result = parse("cmd &");
2081        assert!(result.is_ok());
2082        let program = result.expect("ok");
2083        match &program.statements[0] {
2084            Stmt::Pipeline(p) => assert!(p.background),
2085            _ => panic!("expected Pipeline with background"),
2086        }
2087    }
2088
2089    #[test]
2090    fn parse_if_simple() {
2091        let result = parse("if true; then echo; fi");
2092        assert!(result.is_ok());
2093        let program = result.expect("ok");
2094        assert!(matches!(&program.statements[0], Stmt::If(_)));
2095    }
2096
2097    #[test]
2098    fn parse_if_else() {
2099        let result = parse("if true; then echo; else echo; fi");
2100        assert!(result.is_ok());
2101        let program = result.expect("ok");
2102        match &program.statements[0] {
2103            Stmt::If(if_stmt) => assert!(if_stmt.else_branch.is_some()),
2104            _ => panic!("expected If"),
2105        }
2106    }
2107
2108    #[test]
2109    fn parse_elif_simple() {
2110        let result = parse("if true; then echo a; elif false; then echo b; fi");
2111        assert!(result.is_ok(), "parse failed: {:?}", result);
2112        let program = result.expect("ok");
2113        match &program.statements[0] {
2114            Stmt::If(if_stmt) => {
2115                // elif is desugared to nested if in else
2116                assert!(if_stmt.else_branch.is_some());
2117                let else_branch = if_stmt.else_branch.as_ref().unwrap();
2118                assert_eq!(else_branch.len(), 1);
2119                assert!(matches!(&else_branch[0], Stmt::If(_)));
2120            }
2121            _ => panic!("expected If"),
2122        }
2123    }
2124
2125    #[test]
2126    fn parse_elif_with_else() {
2127        let result = parse("if true; then echo a; elif false; then echo b; else echo c; fi");
2128        assert!(result.is_ok(), "parse failed: {:?}", result);
2129        let program = result.expect("ok");
2130        match &program.statements[0] {
2131            Stmt::If(outer_if) => {
2132                // Check nested structure: if -> elif -> else
2133                let else_branch = outer_if.else_branch.as_ref().expect("outer else");
2134                assert_eq!(else_branch.len(), 1);
2135                match &else_branch[0] {
2136                    Stmt::If(inner_if) => {
2137                        // The inner if (from elif) should have the final else
2138                        assert!(inner_if.else_branch.is_some());
2139                    }
2140                    _ => panic!("expected nested If from elif"),
2141                }
2142            }
2143            _ => panic!("expected If"),
2144        }
2145    }
2146
2147    #[test]
2148    fn parse_multiple_elif() {
2149        // Shell-compatible: use [[ ]] for comparisons
2150        let result = parse(
2151            "if [[ ${X} == 1 ]]; then echo one; elif [[ ${X} == 2 ]]; then echo two; elif [[ ${X} == 3 ]]; then echo three; else echo other; fi",
2152        );
2153        assert!(result.is_ok(), "parse failed: {:?}", result);
2154    }
2155
2156    #[test]
2157    fn parse_for_loop() {
2158        let result = parse("for X in items; do echo; done");
2159        assert!(result.is_ok());
2160        let program = result.expect("ok");
2161        assert!(matches!(&program.statements[0], Stmt::For(_)));
2162    }
2163
2164    #[test]
2165    fn parse_brackets_not_array_literal() {
2166        // Array literals are no longer supported, [ is just a regular char
2167        let result = parse("cmd [1");
2168        // This should fail or parse unexpectedly - arrays are removed
2169        // Just verify we don't crash
2170        let _ = result;
2171    }
2172
2173    #[test]
2174    fn parse_named_arg() {
2175        let result = parse("cmd foo=5");
2176        assert!(result.is_ok());
2177        let program = result.expect("ok");
2178        match &program.statements[0] {
2179            Stmt::Command(cmd) => {
2180                assert_eq!(cmd.args.len(), 1);
2181                assert!(matches!(&cmd.args[0], Arg::Named { .. }));
2182            }
2183            _ => panic!("expected Command"),
2184        }
2185    }
2186
2187    #[test]
2188    fn parse_short_flag() {
2189        let result = parse("ls -l");
2190        assert!(result.is_ok());
2191        let program = result.expect("ok");
2192        match &program.statements[0] {
2193            Stmt::Command(cmd) => {
2194                assert_eq!(cmd.name, "ls");
2195                assert_eq!(cmd.args.len(), 1);
2196                match &cmd.args[0] {
2197                    Arg::ShortFlag(name) => assert_eq!(name, "l"),
2198                    _ => panic!("expected ShortFlag"),
2199                }
2200            }
2201            _ => panic!("expected Command"),
2202        }
2203    }
2204
2205    #[test]
2206    fn parse_long_flag() {
2207        let result = parse("git push --force");
2208        assert!(result.is_ok());
2209        let program = result.expect("ok");
2210        match &program.statements[0] {
2211            Stmt::Command(cmd) => {
2212                assert_eq!(cmd.name, "git");
2213                assert_eq!(cmd.args.len(), 2);
2214                match &cmd.args[0] {
2215                    Arg::Positional(Expr::Literal(Value::String(s))) => assert_eq!(s, "push"),
2216                    _ => panic!("expected Positional push"),
2217                }
2218                match &cmd.args[1] {
2219                    Arg::LongFlag(name) => assert_eq!(name, "force"),
2220                    _ => panic!("expected LongFlag"),
2221                }
2222            }
2223            _ => panic!("expected Command"),
2224        }
2225    }
2226
2227    #[test]
2228    fn parse_long_flag_with_value() {
2229        let result = parse(r#"git commit --message="hello""#);
2230        assert!(result.is_ok());
2231        let program = result.expect("ok");
2232        match &program.statements[0] {
2233            Stmt::Command(cmd) => {
2234                assert_eq!(cmd.name, "git");
2235                assert_eq!(cmd.args.len(), 2);
2236                match &cmd.args[1] {
2237                    Arg::Named { key, value } => {
2238                        assert_eq!(key, "message");
2239                        match value {
2240                            Expr::Literal(Value::String(s)) => assert_eq!(s, "hello"),
2241                            _ => panic!("expected String value"),
2242                        }
2243                    }
2244                    _ => panic!("expected Named from --flag=value"),
2245                }
2246            }
2247            _ => panic!("expected Command"),
2248        }
2249    }
2250
2251    #[test]
2252    fn parse_mixed_flags_and_args() {
2253        let result = parse(r#"git commit -m "message" --amend"#);
2254        assert!(result.is_ok());
2255        let program = result.expect("ok");
2256        match &program.statements[0] {
2257            Stmt::Command(cmd) => {
2258                assert_eq!(cmd.name, "git");
2259                assert_eq!(cmd.args.len(), 4);
2260                // commit (positional)
2261                assert!(matches!(&cmd.args[0], Arg::Positional(_)));
2262                // -m (short flag)
2263                match &cmd.args[1] {
2264                    Arg::ShortFlag(name) => assert_eq!(name, "m"),
2265                    _ => panic!("expected ShortFlag -m"),
2266                }
2267                // "message" (positional)
2268                assert!(matches!(&cmd.args[2], Arg::Positional(_)));
2269                // --amend (long flag)
2270                match &cmd.args[3] {
2271                    Arg::LongFlag(name) => assert_eq!(name, "amend"),
2272                    _ => panic!("expected LongFlag --amend"),
2273                }
2274            }
2275            _ => panic!("expected Command"),
2276        }
2277    }
2278
2279    #[test]
2280    fn parse_redirect_stdout() {
2281        let result = parse("cmd > file");
2282        assert!(result.is_ok());
2283        let program = result.expect("ok");
2284        // Commands with redirects stay as Pipeline, not Command
2285        match &program.statements[0] {
2286            Stmt::Pipeline(p) => {
2287                assert_eq!(p.commands.len(), 1);
2288                let cmd = &p.commands[0];
2289                assert_eq!(cmd.redirects.len(), 1);
2290                assert!(matches!(cmd.redirects[0].kind, RedirectKind::StdoutOverwrite));
2291            }
2292            _ => panic!("expected Pipeline"),
2293        }
2294    }
2295
2296    #[test]
2297    fn parse_var_ref() {
2298        let result = parse("echo ${VAR}");
2299        assert!(result.is_ok());
2300        let program = result.expect("ok");
2301        match &program.statements[0] {
2302            Stmt::Command(cmd) => {
2303                assert_eq!(cmd.args.len(), 1);
2304                assert!(matches!(&cmd.args[0], Arg::Positional(Expr::VarRef(_))));
2305            }
2306            _ => panic!("expected Command"),
2307        }
2308    }
2309
2310    #[test]
2311    fn parse_multiple_statements() {
2312        let result = parse("a\nb\nc");
2313        assert!(result.is_ok());
2314        let program = result.expect("ok");
2315        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2316        assert_eq!(non_empty.len(), 3);
2317    }
2318
2319    #[test]
2320    fn parse_semicolon_separated() {
2321        let result = parse("a; b; c");
2322        assert!(result.is_ok());
2323        let program = result.expect("ok");
2324        let non_empty: Vec<_> = program.statements.iter().filter(|s| !matches!(s, Stmt::Empty)).collect();
2325        assert_eq!(non_empty.len(), 3);
2326    }
2327
2328    #[test]
2329    fn parse_complex_pipeline() {
2330        let result = parse(r#"cat file | grep pattern="foo" | head count=10"#);
2331        assert!(result.is_ok());
2332        let program = result.expect("ok");
2333        match &program.statements[0] {
2334            Stmt::Pipeline(p) => assert_eq!(p.commands.len(), 3),
2335            _ => panic!("expected Pipeline"),
2336        }
2337    }
2338
2339    #[test]
2340    fn parse_json_as_string_arg() {
2341        // JSON arrays/objects should be passed as string arguments
2342        let result = parse(r#"cmd '[[1, 2], [3, 4]]'"#);
2343        assert!(result.is_ok());
2344    }
2345
2346    #[test]
2347    fn parse_mixed_args() {
2348        let result = parse(r#"cmd pos1 key="val" pos2 num=42"#);
2349        assert!(result.is_ok());
2350        let program = result.expect("ok");
2351        match &program.statements[0] {
2352            Stmt::Command(cmd) => assert_eq!(cmd.args.len(), 4),
2353            _ => panic!("expected Command"),
2354        }
2355    }
2356
2357    #[test]
2358    fn error_unterminated_string() {
2359        let result = parse(r#"echo "hello"#);
2360        assert!(result.is_err());
2361    }
2362
2363    #[test]
2364    fn error_unterminated_var_ref() {
2365        let result = parse("echo ${VAR");
2366        assert!(result.is_err());
2367    }
2368
2369    #[test]
2370    fn error_missing_fi() {
2371        let result = parse("if true; then echo");
2372        assert!(result.is_err());
2373    }
2374
2375    #[test]
2376    fn error_missing_done() {
2377        let result = parse("for X in items; do echo");
2378        assert!(result.is_err());
2379    }
2380
2381    #[test]
2382    fn parse_nested_cmd_subst() {
2383        // Nested command substitution is supported
2384        let result = parse("X=$(echo $(date))").unwrap();
2385        match &result.statements[0] {
2386            Stmt::Assignment(a) => {
2387                assert_eq!(a.name, "X");
2388                match &a.value {
2389                    Expr::CommandSubst(outer) => {
2390                        assert_eq!(outer.commands[0].name, "echo");
2391                        // The argument should be another command substitution
2392                        match &outer.commands[0].args[0] {
2393                            Arg::Positional(Expr::CommandSubst(inner)) => {
2394                                assert_eq!(inner.commands[0].name, "date");
2395                            }
2396                            other => panic!("expected nested cmd subst, got {:?}", other),
2397                        }
2398                    }
2399                    other => panic!("expected cmd subst, got {:?}", other),
2400                }
2401            }
2402            other => panic!("expected assignment, got {:?}", other),
2403        }
2404    }
2405
2406    #[test]
2407    fn parse_deeply_nested_cmd_subst() {
2408        // Three levels deep
2409        let result = parse("X=$(a $(b $(c)))").unwrap();
2410        match &result.statements[0] {
2411            Stmt::Assignment(a) => match &a.value {
2412                Expr::CommandSubst(level1) => {
2413                    assert_eq!(level1.commands[0].name, "a");
2414                    match &level1.commands[0].args[0] {
2415                        Arg::Positional(Expr::CommandSubst(level2)) => {
2416                            assert_eq!(level2.commands[0].name, "b");
2417                            match &level2.commands[0].args[0] {
2418                                Arg::Positional(Expr::CommandSubst(level3)) => {
2419                                    assert_eq!(level3.commands[0].name, "c");
2420                                }
2421                                other => panic!("expected level3 cmd subst, got {:?}", other),
2422                            }
2423                        }
2424                        other => panic!("expected level2 cmd subst, got {:?}", other),
2425                    }
2426                }
2427                other => panic!("expected cmd subst, got {:?}", other),
2428            },
2429            other => panic!("expected assignment, got {:?}", other),
2430        }
2431    }
2432
2433    // ═══════════════════════════════════════════════════════════════════════════
2434    // Value Preservation Tests - These test that actual values are captured
2435    // ═══════════════════════════════════════════════════════════════════════════
2436
2437    #[test]
2438    fn value_int_preserved() {
2439        let result = parse("X=42").unwrap();
2440        match &result.statements[0] {
2441            Stmt::Assignment(a) => {
2442                assert_eq!(a.name, "X");
2443                match &a.value {
2444                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2445                    other => panic!("expected int literal, got {:?}", other),
2446                }
2447            }
2448            other => panic!("expected assignment, got {:?}", other),
2449        }
2450    }
2451
2452    #[test]
2453    fn value_negative_int_preserved() {
2454        let result = parse("X=-99").unwrap();
2455        match &result.statements[0] {
2456            Stmt::Assignment(a) => match &a.value {
2457                Expr::Literal(Value::Int(n)) => assert_eq!(*n, -99),
2458                other => panic!("expected int, got {:?}", other),
2459            },
2460            other => panic!("expected assignment, got {:?}", other),
2461        }
2462    }
2463
2464    #[test]
2465    fn value_float_preserved() {
2466        let result = parse("PI=3.14").unwrap();
2467        match &result.statements[0] {
2468            Stmt::Assignment(a) => match &a.value {
2469                Expr::Literal(Value::Float(f)) => assert!((*f - 3.14).abs() < 0.001),
2470                other => panic!("expected float, got {:?}", other),
2471            },
2472            other => panic!("expected assignment, got {:?}", other),
2473        }
2474    }
2475
2476    #[test]
2477    fn value_string_preserved() {
2478        let result = parse(r#"echo "hello world""#).unwrap();
2479        match &result.statements[0] {
2480            Stmt::Command(cmd) => {
2481                assert_eq!(cmd.name, "echo");
2482                match &cmd.args[0] {
2483                    Arg::Positional(Expr::Literal(Value::String(s))) => {
2484                        assert_eq!(s, "hello world");
2485                    }
2486                    other => panic!("expected string arg, got {:?}", other),
2487                }
2488            }
2489            other => panic!("expected command, got {:?}", other),
2490        }
2491    }
2492
2493    #[test]
2494    fn value_string_with_escapes_preserved() {
2495        let result = parse(r#"echo "line1\nline2""#).unwrap();
2496        match &result.statements[0] {
2497            Stmt::Command(cmd) => match &cmd.args[0] {
2498                Arg::Positional(Expr::Literal(Value::String(s))) => {
2499                    assert_eq!(s, "line1\nline2");
2500                }
2501                other => panic!("expected string, got {:?}", other),
2502            },
2503            other => panic!("expected command, got {:?}", other),
2504        }
2505    }
2506
2507    #[test]
2508    fn value_command_name_preserved() {
2509        let result = parse("my-command").unwrap();
2510        match &result.statements[0] {
2511            Stmt::Command(cmd) => assert_eq!(cmd.name, "my-command"),
2512            other => panic!("expected command, got {:?}", other),
2513        }
2514    }
2515
2516    #[test]
2517    fn value_assignment_name_preserved() {
2518        let result = parse("MY_VAR=1").unwrap();
2519        match &result.statements[0] {
2520            Stmt::Assignment(a) => assert_eq!(a.name, "MY_VAR"),
2521            other => panic!("expected assignment, got {:?}", other),
2522        }
2523    }
2524
2525    #[test]
2526    fn value_for_variable_preserved() {
2527        let result = parse("for ITEM in items; do echo; done").unwrap();
2528        match &result.statements[0] {
2529            Stmt::For(f) => assert_eq!(f.variable, "ITEM"),
2530            other => panic!("expected for, got {:?}", other),
2531        }
2532    }
2533
2534    #[test]
2535    fn value_varref_name_preserved() {
2536        let result = parse("echo ${MESSAGE}").unwrap();
2537        match &result.statements[0] {
2538            Stmt::Command(cmd) => match &cmd.args[0] {
2539                Arg::Positional(Expr::VarRef(path)) => {
2540                    assert_eq!(path.segments.len(), 1);
2541                    let VarSegment::Field(name) = &path.segments[0];
2542                    assert_eq!(name, "MESSAGE");
2543                }
2544                other => panic!("expected varref, got {:?}", other),
2545            },
2546            other => panic!("expected command, got {:?}", other),
2547        }
2548    }
2549
2550    #[test]
2551    fn value_varref_field_access_preserved() {
2552        let result = parse("echo ${RESULT.data}").unwrap();
2553        match &result.statements[0] {
2554            Stmt::Command(cmd) => match &cmd.args[0] {
2555                Arg::Positional(Expr::VarRef(path)) => {
2556                    assert_eq!(path.segments.len(), 2);
2557                    let VarSegment::Field(a) = &path.segments[0];
2558                    let VarSegment::Field(b) = &path.segments[1];
2559                    assert_eq!(a, "RESULT");
2560                    assert_eq!(b, "data");
2561                }
2562                other => panic!("expected varref, got {:?}", other),
2563            },
2564            other => panic!("expected command, got {:?}", other),
2565        }
2566    }
2567
2568    #[test]
2569    fn value_varref_index_ignored() {
2570        // Index segments are no longer supported - they're filtered out by parse_varpath
2571        let result = parse("echo ${ITEMS[0]}").unwrap();
2572        match &result.statements[0] {
2573            Stmt::Command(cmd) => match &cmd.args[0] {
2574                Arg::Positional(Expr::VarRef(path)) => {
2575                    // Index segment [0] is skipped, only ITEMS remains
2576                    assert_eq!(path.segments.len(), 1);
2577                    let VarSegment::Field(name) = &path.segments[0];
2578                    assert_eq!(name, "ITEMS");
2579                }
2580                other => panic!("expected varref, got {:?}", other),
2581            },
2582            other => panic!("expected command, got {:?}", other),
2583        }
2584    }
2585
2586    #[test]
2587    fn value_named_arg_preserved() {
2588        let result = parse("cmd count=42").unwrap();
2589        match &result.statements[0] {
2590            Stmt::Command(cmd) => {
2591                assert_eq!(cmd.name, "cmd");
2592                match &cmd.args[0] {
2593                    Arg::Named { key, value } => {
2594                        assert_eq!(key, "count");
2595                        match value {
2596                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 42),
2597                            other => panic!("expected int, got {:?}", other),
2598                        }
2599                    }
2600                    other => panic!("expected named arg, got {:?}", other),
2601                }
2602            }
2603            other => panic!("expected command, got {:?}", other),
2604        }
2605    }
2606
2607    #[test]
2608    fn value_function_def_name_preserved() {
2609        let result = parse("greet() { echo }").unwrap();
2610        match &result.statements[0] {
2611            Stmt::ToolDef(t) => {
2612                assert_eq!(t.name, "greet");
2613                assert!(t.params.is_empty());
2614            }
2615            other => panic!("expected function def, got {:?}", other),
2616        }
2617    }
2618
2619    // ═══════════════════════════════════════════════════════════════════════════
2620    // New Feature Tests - Comparisons, Interpolation, Nested Structures
2621    // ═══════════════════════════════════════════════════════════════════════════
2622
2623    #[test]
2624    fn parse_comparison_equals() {
2625        // Shell-compatible: use [[ ]] for comparisons
2626        let result = parse("if [[ ${X} == 5 ]]; then echo; fi").unwrap();
2627        match &result.statements[0] {
2628            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2629                Expr::Test(test) => match test.as_ref() {
2630                    TestExpr::Comparison { left, op, right } => {
2631                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2632                        assert_eq!(*op, TestCmpOp::Eq);
2633                        match right.as_ref() {
2634                            Expr::Literal(Value::Int(n)) => assert_eq!(*n, 5),
2635                            other => panic!("expected int, got {:?}", other),
2636                        }
2637                    }
2638                    other => panic!("expected comparison, got {:?}", other),
2639                },
2640                other => panic!("expected test expr, got {:?}", other),
2641            },
2642            other => panic!("expected if, got {:?}", other),
2643        }
2644    }
2645
2646    #[test]
2647    fn parse_comparison_not_equals() {
2648        let result = parse("if [[ ${X} != 0 ]]; then echo; fi").unwrap();
2649        match &result.statements[0] {
2650            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2651                Expr::Test(test) => match test.as_ref() {
2652                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotEq),
2653                    other => panic!("expected comparison, got {:?}", other),
2654                },
2655                other => panic!("expected test expr, got {:?}", other),
2656            },
2657            other => panic!("expected if, got {:?}", other),
2658        }
2659    }
2660
2661    #[test]
2662    fn parse_comparison_less_than() {
2663        let result = parse("if [[ ${COUNT} -lt 10 ]]; then echo; fi").unwrap();
2664        match &result.statements[0] {
2665            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2666                Expr::Test(test) => match test.as_ref() {
2667                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLt),
2668                    other => panic!("expected comparison, got {:?}", other),
2669                },
2670                other => panic!("expected test expr, got {:?}", other),
2671            },
2672            other => panic!("expected if, got {:?}", other),
2673        }
2674    }
2675
2676    #[test]
2677    fn parse_comparison_greater_than() {
2678        let result = parse("if [[ ${COUNT} -gt 0 ]]; then echo; fi").unwrap();
2679        match &result.statements[0] {
2680            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2681                Expr::Test(test) => match test.as_ref() {
2682                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGt),
2683                    other => panic!("expected comparison, got {:?}", other),
2684                },
2685                other => panic!("expected test expr, got {:?}", other),
2686            },
2687            other => panic!("expected if, got {:?}", other),
2688        }
2689    }
2690
2691    #[test]
2692    fn parse_comparison_less_equal() {
2693        let result = parse("if [[ ${X} -le 100 ]]; then echo; fi").unwrap();
2694        match &result.statements[0] {
2695            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2696                Expr::Test(test) => match test.as_ref() {
2697                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumLtEq),
2698                    other => panic!("expected comparison, got {:?}", other),
2699                },
2700                other => panic!("expected test expr, got {:?}", other),
2701            },
2702            other => panic!("expected if, got {:?}", other),
2703        }
2704    }
2705
2706    #[test]
2707    fn parse_comparison_greater_equal() {
2708        let result = parse("if [[ ${X} -ge 1 ]]; then echo; fi").unwrap();
2709        match &result.statements[0] {
2710            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2711                Expr::Test(test) => match test.as_ref() {
2712                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NumGtEq),
2713                    other => panic!("expected comparison, got {:?}", other),
2714                },
2715                other => panic!("expected test expr, got {:?}", other),
2716            },
2717            other => panic!("expected if, got {:?}", other),
2718        }
2719    }
2720
2721    #[test]
2722    fn parse_regex_match() {
2723        let result = parse(r#"if [[ ${NAME} =~ "^test" ]]; then echo; fi"#).unwrap();
2724        match &result.statements[0] {
2725            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2726                Expr::Test(test) => match test.as_ref() {
2727                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::Match),
2728                    other => panic!("expected comparison, got {:?}", other),
2729                },
2730                other => panic!("expected test expr, got {:?}", other),
2731            },
2732            other => panic!("expected if, got {:?}", other),
2733        }
2734    }
2735
2736    #[test]
2737    fn parse_regex_not_match() {
2738        let result = parse(r#"if [[ ${NAME} !~ "^test" ]]; then echo; fi"#).unwrap();
2739        match &result.statements[0] {
2740            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2741                Expr::Test(test) => match test.as_ref() {
2742                    TestExpr::Comparison { op, .. } => assert_eq!(*op, TestCmpOp::NotMatch),
2743                    other => panic!("expected comparison, got {:?}", other),
2744                },
2745                other => panic!("expected test expr, got {:?}", other),
2746            },
2747            other => panic!("expected if, got {:?}", other),
2748        }
2749    }
2750
2751    #[test]
2752    fn parse_string_interpolation() {
2753        let result = parse(r#"echo "Hello ${NAME}!""#).unwrap();
2754        match &result.statements[0] {
2755            Stmt::Command(cmd) => match &cmd.args[0] {
2756                Arg::Positional(Expr::Interpolated(parts)) => {
2757                    assert_eq!(parts.len(), 3);
2758                    match &parts[0] {
2759                        StringPart::Literal(s) => assert_eq!(s, "Hello "),
2760                        other => panic!("expected literal, got {:?}", other),
2761                    }
2762                    match &parts[1] {
2763                        StringPart::Var(path) => {
2764                            assert_eq!(path.segments.len(), 1);
2765                            let VarSegment::Field(name) = &path.segments[0];
2766                            assert_eq!(name, "NAME");
2767                        }
2768                        other => panic!("expected var, got {:?}", other),
2769                    }
2770                    match &parts[2] {
2771                        StringPart::Literal(s) => assert_eq!(s, "!"),
2772                        other => panic!("expected literal, got {:?}", other),
2773                    }
2774                }
2775                other => panic!("expected interpolated, got {:?}", other),
2776            },
2777            other => panic!("expected command, got {:?}", other),
2778        }
2779    }
2780
2781    #[test]
2782    fn parse_string_interpolation_multiple_vars() {
2783        let result = parse(r#"echo "${FIRST} and ${SECOND}""#).unwrap();
2784        match &result.statements[0] {
2785            Stmt::Command(cmd) => match &cmd.args[0] {
2786                Arg::Positional(Expr::Interpolated(parts)) => {
2787                    // ${FIRST} + " and " + ${SECOND} = 3 parts
2788                    assert_eq!(parts.len(), 3);
2789                    assert!(matches!(&parts[0], StringPart::Var(_)));
2790                    assert!(matches!(&parts[1], StringPart::Literal(_)));
2791                    assert!(matches!(&parts[2], StringPart::Var(_)));
2792                }
2793                other => panic!("expected interpolated, got {:?}", other),
2794            },
2795            other => panic!("expected command, got {:?}", other),
2796        }
2797    }
2798
2799    #[test]
2800    fn parse_empty_function_body() {
2801        let result = parse("empty() { }").unwrap();
2802        match &result.statements[0] {
2803            Stmt::ToolDef(t) => {
2804                assert_eq!(t.name, "empty");
2805                assert!(t.params.is_empty());
2806                assert!(t.body.is_empty());
2807            }
2808            other => panic!("expected function def, got {:?}", other),
2809        }
2810    }
2811
2812    #[test]
2813    fn parse_bash_style_function() {
2814        let result = parse("function greet { echo hello }").unwrap();
2815        match &result.statements[0] {
2816            Stmt::ToolDef(t) => {
2817                assert_eq!(t.name, "greet");
2818                assert!(t.params.is_empty());
2819                assert_eq!(t.body.len(), 1);
2820            }
2821            other => panic!("expected function def, got {:?}", other),
2822        }
2823    }
2824
2825    #[test]
2826    fn parse_comparison_string_values() {
2827        let result = parse(r#"if [[ ${STATUS} == "ok" ]]; then echo; fi"#).unwrap();
2828        match &result.statements[0] {
2829            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2830                Expr::Test(test) => match test.as_ref() {
2831                    TestExpr::Comparison { left, op, right } => {
2832                        assert!(matches!(left.as_ref(), Expr::VarRef(_)));
2833                        assert_eq!(*op, TestCmpOp::Eq);
2834                        match right.as_ref() {
2835                            Expr::Literal(Value::String(s)) => assert_eq!(s, "ok"),
2836                            other => panic!("expected string, got {:?}", other),
2837                        }
2838                    }
2839                    other => panic!("expected comparison, got {:?}", other),
2840                },
2841                other => panic!("expected test expr, got {:?}", other),
2842            },
2843            other => panic!("expected if, got {:?}", other),
2844        }
2845    }
2846
2847    // ═══════════════════════════════════════════════════════════════════════════
2848    // Command Substitution Tests
2849    // ═══════════════════════════════════════════════════════════════════════════
2850
2851    #[test]
2852    fn parse_cmd_subst_simple() {
2853        let result = parse("X=$(echo)").unwrap();
2854        match &result.statements[0] {
2855            Stmt::Assignment(a) => {
2856                assert_eq!(a.name, "X");
2857                match &a.value {
2858                    Expr::CommandSubst(pipeline) => {
2859                        assert_eq!(pipeline.commands.len(), 1);
2860                        assert_eq!(pipeline.commands[0].name, "echo");
2861                    }
2862                    other => panic!("expected command subst, got {:?}", other),
2863                }
2864            }
2865            other => panic!("expected assignment, got {:?}", other),
2866        }
2867    }
2868
2869    #[test]
2870    fn parse_cmd_subst_with_args() {
2871        let result = parse(r#"X=$(fetch url="http://example.com")"#).unwrap();
2872        match &result.statements[0] {
2873            Stmt::Assignment(a) => match &a.value {
2874                Expr::CommandSubst(pipeline) => {
2875                    assert_eq!(pipeline.commands[0].name, "fetch");
2876                    assert_eq!(pipeline.commands[0].args.len(), 1);
2877                    match &pipeline.commands[0].args[0] {
2878                        Arg::Named { key, .. } => assert_eq!(key, "url"),
2879                        other => panic!("expected named arg, got {:?}", other),
2880                    }
2881                }
2882                other => panic!("expected command subst, got {:?}", other),
2883            },
2884            other => panic!("expected assignment, got {:?}", other),
2885        }
2886    }
2887
2888    #[test]
2889    fn parse_cmd_subst_pipeline() {
2890        let result = parse("X=$(cat file | grep pattern)").unwrap();
2891        match &result.statements[0] {
2892            Stmt::Assignment(a) => match &a.value {
2893                Expr::CommandSubst(pipeline) => {
2894                    assert_eq!(pipeline.commands.len(), 2);
2895                    assert_eq!(pipeline.commands[0].name, "cat");
2896                    assert_eq!(pipeline.commands[1].name, "grep");
2897                }
2898                other => panic!("expected command subst, got {:?}", other),
2899            },
2900            other => panic!("expected assignment, got {:?}", other),
2901        }
2902    }
2903
2904    #[test]
2905    fn parse_cmd_subst_in_condition() {
2906        // Shell-compatible: conditions are commands, not command substitutions
2907        let result = parse("if kaish-validate; then echo; fi").unwrap();
2908        match &result.statements[0] {
2909            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2910                Expr::Command(cmd) => {
2911                    assert_eq!(cmd.name, "kaish-validate");
2912                }
2913                other => panic!("expected command, got {:?}", other),
2914            },
2915            other => panic!("expected if, got {:?}", other),
2916        }
2917    }
2918
2919    #[test]
2920    fn parse_cmd_subst_in_command_arg() {
2921        let result = parse("echo $(whoami)").unwrap();
2922        match &result.statements[0] {
2923            Stmt::Command(cmd) => {
2924                assert_eq!(cmd.name, "echo");
2925                match &cmd.args[0] {
2926                    Arg::Positional(Expr::CommandSubst(pipeline)) => {
2927                        assert_eq!(pipeline.commands[0].name, "whoami");
2928                    }
2929                    other => panic!("expected command subst, got {:?}", other),
2930                }
2931            }
2932            other => panic!("expected command, got {:?}", other),
2933        }
2934    }
2935
2936    // ═══════════════════════════════════════════════════════════════════════════
2937    // Logical Operator Tests (&&, ||)
2938    // ═══════════════════════════════════════════════════════════════════════════
2939
2940    #[test]
2941    fn parse_condition_and() {
2942        // Shell-compatible: commands chained with &&
2943        let result = parse("if check-a && check-b; then echo; fi").unwrap();
2944        match &result.statements[0] {
2945            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2946                Expr::BinaryOp { left, op, right } => {
2947                    assert_eq!(*op, BinaryOp::And);
2948                    assert!(matches!(left.as_ref(), Expr::Command(_)));
2949                    assert!(matches!(right.as_ref(), Expr::Command(_)));
2950                }
2951                other => panic!("expected binary op, got {:?}", other),
2952            },
2953            other => panic!("expected if, got {:?}", other),
2954        }
2955    }
2956
2957    #[test]
2958    fn parse_condition_or() {
2959        let result = parse("if try-a || try-b; then echo; fi").unwrap();
2960        match &result.statements[0] {
2961            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2962                Expr::BinaryOp { left, op, right } => {
2963                    assert_eq!(*op, BinaryOp::Or);
2964                    assert!(matches!(left.as_ref(), Expr::Command(_)));
2965                    assert!(matches!(right.as_ref(), Expr::Command(_)));
2966                }
2967                other => panic!("expected binary op, got {:?}", other),
2968            },
2969            other => panic!("expected if, got {:?}", other),
2970        }
2971    }
2972
2973    #[test]
2974    fn parse_condition_and_or_precedence() {
2975        // a && b || c should parse as (a && b) || c
2976        let result = parse("if cmd-a && cmd-b || cmd-c; then echo; fi").unwrap();
2977        match &result.statements[0] {
2978            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
2979                Expr::BinaryOp { left, op, right } => {
2980                    // Top level should be ||
2981                    assert_eq!(*op, BinaryOp::Or);
2982                    // Left side should be && expression
2983                    match left.as_ref() {
2984                        Expr::BinaryOp { op: inner_op, .. } => {
2985                            assert_eq!(*inner_op, BinaryOp::And);
2986                        }
2987                        other => panic!("expected binary op (&&), got {:?}", other),
2988                    }
2989                    // Right side should be command
2990                    assert!(matches!(right.as_ref(), Expr::Command(_)));
2991                }
2992                other => panic!("expected binary op, got {:?}", other),
2993            },
2994            other => panic!("expected if, got {:?}", other),
2995        }
2996    }
2997
2998    #[test]
2999    fn parse_condition_multiple_and() {
3000        let result = parse("if cmd-a && cmd-b && cmd-c; then echo; fi").unwrap();
3001        match &result.statements[0] {
3002            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3003                Expr::BinaryOp { left, op, .. } => {
3004                    assert_eq!(*op, BinaryOp::And);
3005                    // Left side should also be &&
3006                    match left.as_ref() {
3007                        Expr::BinaryOp { op: inner_op, .. } => {
3008                            assert_eq!(*inner_op, BinaryOp::And);
3009                        }
3010                        other => panic!("expected binary op, got {:?}", other),
3011                    }
3012                }
3013                other => panic!("expected binary op, got {:?}", other),
3014            },
3015            other => panic!("expected if, got {:?}", other),
3016        }
3017    }
3018
3019    #[test]
3020    fn parse_condition_mixed_comparison_and_logical() {
3021        // Shell-compatible: use [[ ]] for comparisons, && to chain them
3022        let result = parse("if [[ ${X} == 5 ]] && [[ ${Y} -gt 0 ]]; then echo; fi").unwrap();
3023        match &result.statements[0] {
3024            Stmt::If(if_stmt) => match if_stmt.condition.as_ref() {
3025                Expr::BinaryOp { left, op, right } => {
3026                    assert_eq!(*op, BinaryOp::And);
3027                    // Left: [[ ${X} == 5 ]]
3028                    match left.as_ref() {
3029                        Expr::Test(test) => match test.as_ref() {
3030                            TestExpr::Comparison { op: left_op, .. } => {
3031                                assert_eq!(*left_op, TestCmpOp::Eq);
3032                            }
3033                            other => panic!("expected comparison, got {:?}", other),
3034                        },
3035                        other => panic!("expected test, got {:?}", other),
3036                    }
3037                    // Right: [[ ${Y} -gt 0 ]]
3038                    match right.as_ref() {
3039                        Expr::Test(test) => match test.as_ref() {
3040                            TestExpr::Comparison { op: right_op, .. } => {
3041                                assert_eq!(*right_op, TestCmpOp::NumGt);
3042                            }
3043                            other => panic!("expected comparison, got {:?}", other),
3044                        },
3045                        other => panic!("expected test, got {:?}", other),
3046                    }
3047                }
3048                other => panic!("expected binary op, got {:?}", other),
3049            },
3050            other => panic!("expected if, got {:?}", other),
3051        }
3052    }
3053
3054    // ═══════════════════════════════════════════════════════════════════════════
3055    // Integration Tests - Complete Scripts
3056    // ═══════════════════════════════════════════════════════════════════════════
3057
3058    /// Level 1: Linear script using core features
3059    #[test]
3060    fn script_level1_linear() {
3061        let script = r#"
3062NAME="kaish"
3063VERSION=1
3064TIMEOUT=30
3065ITEMS="alpha beta gamma"
3066
3067echo "Starting ${NAME} v${VERSION}"
3068cat "README.md" | grep pattern="install" | head count=5
3069fetch url="https://api.example.com/status" timeout=${TIMEOUT} > "/tmp/status.json"
3070echo "Items: ${ITEMS}"
3071"#;
3072        let result = parse(script).unwrap();
3073        let stmts: Vec<_> = result.statements.iter()
3074            .filter(|s| !matches!(s, Stmt::Empty))
3075            .collect();
3076
3077        assert_eq!(stmts.len(), 8);
3078        assert!(matches!(stmts[0], Stmt::Assignment(_)));  // set NAME
3079        assert!(matches!(stmts[1], Stmt::Assignment(_)));  // set VERSION
3080        assert!(matches!(stmts[2], Stmt::Assignment(_)));  // set TIMEOUT
3081        assert!(matches!(stmts[3], Stmt::Assignment(_)));  // set ITEMS
3082        assert!(matches!(stmts[4], Stmt::Command(_)));     // echo "Starting..."
3083        assert!(matches!(stmts[5], Stmt::Pipeline(_)));    // cat | grep | head
3084        assert!(matches!(stmts[6], Stmt::Pipeline(_)));    // fetch (with redirect - Pipeline since it has redirects)
3085        assert!(matches!(stmts[7], Stmt::Command(_)));     // echo "Items: ${ITEMS}"
3086    }
3087
3088    /// Level 2: Script with conditionals (shell-compatible syntax)
3089    #[test]
3090    fn script_level2_branching() {
3091        let script = r#"
3092RESULT=$(kaish-validate "input.json")
3093
3094if [[ ${RESULT.ok} == true ]]; then
3095    echo "Validation passed"
3096    process "input.json" > "output.json"
3097else
3098    echo "Validation failed: ${RESULT.err}"
3099fi
3100
3101if [[ ${COUNT} -gt 0 ]] && [[ ${COUNT} -le 100 ]]; then
3102    echo "Count in valid range"
3103fi
3104
3105if check-network || check-cache; then
3106    fetch url=${URL}
3107fi
3108"#;
3109        let result = parse(script).unwrap();
3110        let stmts: Vec<_> = result.statements.iter()
3111            .filter(|s| !matches!(s, Stmt::Empty))
3112            .collect();
3113
3114        assert_eq!(stmts.len(), 4);
3115
3116        // First: assignment with command substitution
3117        match stmts[0] {
3118            Stmt::Assignment(a) => {
3119                assert_eq!(a.name, "RESULT");
3120                assert!(matches!(&a.value, Expr::CommandSubst(_)));
3121            }
3122            other => panic!("expected assignment, got {:?}", other),
3123        }
3124
3125        // Second: if/else
3126        match stmts[1] {
3127            Stmt::If(if_stmt) => {
3128                assert_eq!(if_stmt.then_branch.len(), 2);
3129                assert!(if_stmt.else_branch.is_some());
3130                assert_eq!(if_stmt.else_branch.as_ref().unwrap().len(), 1);
3131            }
3132            other => panic!("expected if, got {:?}", other),
3133        }
3134
3135        // Third: if with && condition
3136        match stmts[2] {
3137            Stmt::If(if_stmt) => {
3138                match if_stmt.condition.as_ref() {
3139                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3140                    other => panic!("expected && condition, got {:?}", other),
3141                }
3142            }
3143            other => panic!("expected if, got {:?}", other),
3144        }
3145
3146        // Fourth: if with || of commands
3147        match stmts[3] {
3148            Stmt::If(if_stmt) => {
3149                match if_stmt.condition.as_ref() {
3150                    Expr::BinaryOp { op, left, right } => {
3151                        assert_eq!(*op, BinaryOp::Or);
3152                        assert!(matches!(left.as_ref(), Expr::Command(_)));
3153                        assert!(matches!(right.as_ref(), Expr::Command(_)));
3154                    }
3155                    other => panic!("expected || condition, got {:?}", other),
3156                }
3157            }
3158            other => panic!("expected if, got {:?}", other),
3159        }
3160    }
3161
3162    /// Level 3: Script with loops and function definitions
3163    #[test]
3164    fn script_level3_loops_and_functions() {
3165        let script = r#"
3166greet() {
3167    echo "Hello, $1!"
3168}
3169
3170fetch_all() {
3171    for URL in $@; do
3172        fetch url=${URL}
3173    done
3174}
3175
3176USERS="alice bob charlie"
3177
3178for USER in ${USERS}; do
3179    greet ${USER}
3180    if [[ ${USER} == "bob" ]]; then
3181        echo "Found Bob!"
3182    fi
3183done
3184
3185long-running-task &
3186"#;
3187        let result = parse(script).unwrap();
3188        let stmts: Vec<_> = result.statements.iter()
3189            .filter(|s| !matches!(s, Stmt::Empty))
3190            .collect();
3191
3192        assert_eq!(stmts.len(), 5);
3193
3194        // First function def
3195        match stmts[0] {
3196            Stmt::ToolDef(t) => {
3197                assert_eq!(t.name, "greet");
3198                assert!(t.params.is_empty());
3199            }
3200            other => panic!("expected function def, got {:?}", other),
3201        }
3202
3203        // Second function def with nested for loop
3204        match stmts[1] {
3205            Stmt::ToolDef(t) => {
3206                assert_eq!(t.name, "fetch_all");
3207                assert_eq!(t.body.len(), 1);
3208                assert!(matches!(&t.body[0], Stmt::For(_)));
3209            }
3210            other => panic!("expected function def, got {:?}", other),
3211        }
3212
3213        // Assignment
3214        assert!(matches!(stmts[2], Stmt::Assignment(_)));
3215
3216        // For loop with nested if
3217        match stmts[3] {
3218            Stmt::For(f) => {
3219                assert_eq!(f.variable, "USER");
3220                assert_eq!(f.body.len(), 2);
3221                assert!(matches!(&f.body[0], Stmt::Command(_)));
3222                assert!(matches!(&f.body[1], Stmt::If(_)));
3223            }
3224            other => panic!("expected for loop, got {:?}", other),
3225        }
3226
3227        // Background job
3228        match stmts[4] {
3229            Stmt::Pipeline(p) => {
3230                assert!(p.background);
3231                assert_eq!(p.commands[0].name, "long-running-task");
3232            }
3233            other => panic!("expected pipeline (background), got {:?}", other),
3234        }
3235    }
3236
3237    /// Level 4: Complex nested control flow (shell-compatible syntax)
3238    #[test]
3239    fn script_level4_complex_nesting() {
3240        let script = r#"
3241RESULT=$(cat "config.json" | jq query=".servers" | kaish-validate schema="server-schema.json")
3242
3243if ping host=${HOST} && [[ ${RESULT} == true ]]; then
3244    for SERVER in "prod-1 prod-2"; do
3245        deploy target=${SERVER} port=8080
3246        if [[ $? -ne 0 ]]; then
3247            notify channel="ops" message="Deploy failed"
3248        fi
3249    done
3250fi
3251"#;
3252        let result = parse(script).unwrap();
3253        let stmts: Vec<_> = result.statements.iter()
3254            .filter(|s| !matches!(s, Stmt::Empty))
3255            .collect();
3256
3257        assert_eq!(stmts.len(), 2);
3258
3259        // Command substitution with pipeline
3260        match stmts[0] {
3261            Stmt::Assignment(a) => {
3262                assert_eq!(a.name, "RESULT");
3263                match &a.value {
3264                    Expr::CommandSubst(pipeline) => {
3265                        assert_eq!(pipeline.commands.len(), 3);
3266                    }
3267                    other => panic!("expected command subst, got {:?}", other),
3268                }
3269            }
3270            other => panic!("expected assignment, got {:?}", other),
3271        }
3272
3273        // If with && condition, containing for loop with nested if
3274        match stmts[1] {
3275            Stmt::If(if_stmt) => {
3276                match if_stmt.condition.as_ref() {
3277                    Expr::BinaryOp { op, .. } => assert_eq!(*op, BinaryOp::And),
3278                    other => panic!("expected && condition, got {:?}", other),
3279                }
3280                assert_eq!(if_stmt.then_branch.len(), 1);
3281                match &if_stmt.then_branch[0] {
3282                    Stmt::For(f) => {
3283                        assert_eq!(f.body.len(), 2);
3284                        assert!(matches!(&f.body[1], Stmt::If(_)));
3285                    }
3286                    other => panic!("expected for in if body, got {:?}", other),
3287                }
3288            }
3289            other => panic!("expected if, got {:?}", other),
3290        }
3291    }
3292
3293    /// Level 5: Edge cases and parser stress test
3294    #[test]
3295    fn script_level5_edge_cases() {
3296        let script = r#"
3297echo ""
3298echo "quotes: \"nested\" here"
3299echo "escapes: \n\t\r\\"
3300echo "unicode: \u2764"
3301
3302X=-99999
3303Y=3.14159265358979
3304Z=-0.001
3305
3306cmd a=1 b="two" c=true d=false e=null
3307
3308if true; then
3309    if false; then
3310        echo "inner"
3311    else
3312        echo "else"
3313    fi
3314fi
3315
3316for I in "a b c"; do
3317    echo ${I}
3318done
3319
3320no_params() {
3321    echo "no params"
3322}
3323
3324function all_args {
3325    echo "args: $@"
3326}
3327
3328a | b | c | d | e &
3329cmd 2> "errors.log"
3330cmd &> "all.log"
3331cmd >> "append.log"
3332cmd < "input.txt"
3333"#;
3334        let result = parse(script).unwrap();
3335        let stmts: Vec<_> = result.statements.iter()
3336            .filter(|s| !matches!(s, Stmt::Empty))
3337            .collect();
3338
3339        // Verify it parses without error
3340        assert!(stmts.len() >= 10, "expected many statements, got {}", stmts.len());
3341
3342        // Background pipeline
3343        let bg_stmt = stmts.iter().find(|s| matches!(s, Stmt::Pipeline(p) if p.background));
3344        assert!(bg_stmt.is_some(), "expected background pipeline");
3345
3346        match bg_stmt.unwrap() {
3347            Stmt::Pipeline(p) => {
3348                assert_eq!(p.commands.len(), 5);
3349                assert!(p.background);
3350            }
3351            _ => unreachable!(),
3352        }
3353    }
3354
3355    // ═══════════════════════════════════════════════════════════════════════════
3356    // Edge Case Tests: Ambiguity Resolution
3357    // ═══════════════════════════════════════════════════════════════════════════
3358
3359    #[test]
3360    fn parse_keyword_as_variable_rejected() {
3361        // Keywords CANNOT be used as variable names - this is intentional
3362        // to avoid ambiguity. Use different names instead.
3363        let result = parse(r#"if="value""#);
3364        assert!(result.is_err(), "if= should fail - 'if' is a keyword");
3365
3366        let result = parse("while=true");
3367        assert!(result.is_err(), "while= should fail - 'while' is a keyword");
3368
3369        let result = parse(r#"then="next""#);
3370        assert!(result.is_err(), "then= should fail - 'then' is a keyword");
3371    }
3372
3373    #[test]
3374    fn parse_set_command_with_flag() {
3375        let result = parse("set -e");
3376        assert!(result.is_ok(), "failed to parse set -e: {:?}", result);
3377        let program = result.unwrap();
3378        match &program.statements[0] {
3379            Stmt::Command(cmd) => {
3380                assert_eq!(cmd.name, "set");
3381                assert_eq!(cmd.args.len(), 1);
3382                match &cmd.args[0] {
3383                    Arg::ShortFlag(f) => assert_eq!(f, "e"),
3384                    other => panic!("expected ShortFlag, got {:?}", other),
3385                }
3386            }
3387            other => panic!("expected Command, got {:?}", other),
3388        }
3389    }
3390
3391    #[test]
3392    fn parse_set_command_no_args() {
3393        let result = parse("set");
3394        assert!(result.is_ok(), "failed to parse set: {:?}", result);
3395        let program = result.unwrap();
3396        match &program.statements[0] {
3397            Stmt::Command(cmd) => {
3398                assert_eq!(cmd.name, "set");
3399                assert_eq!(cmd.args.len(), 0);
3400            }
3401            other => panic!("expected Command, got {:?}", other),
3402        }
3403    }
3404
3405    #[test]
3406    fn parse_set_assignment_vs_command() {
3407        // X=5 should be assignment
3408        let result = parse("X=5");
3409        assert!(result.is_ok());
3410        let program = result.unwrap();
3411        assert!(matches!(&program.statements[0], Stmt::Assignment(_)));
3412
3413        // set -e should be command
3414        let result = parse("set -e");
3415        assert!(result.is_ok());
3416        let program = result.unwrap();
3417        assert!(matches!(&program.statements[0], Stmt::Command(_)));
3418    }
3419
3420    #[test]
3421    fn parse_true_as_command() {
3422        let result = parse("true");
3423        assert!(result.is_ok());
3424        let program = result.unwrap();
3425        match &program.statements[0] {
3426            Stmt::Command(cmd) => assert_eq!(cmd.name, "true"),
3427            other => panic!("expected Command(true), got {:?}", other),
3428        }
3429    }
3430
3431    #[test]
3432    fn parse_false_as_command() {
3433        let result = parse("false");
3434        assert!(result.is_ok());
3435        let program = result.unwrap();
3436        match &program.statements[0] {
3437            Stmt::Command(cmd) => assert_eq!(cmd.name, "false"),
3438            other => panic!("expected Command(false), got {:?}", other),
3439        }
3440    }
3441
3442    #[test]
3443    fn parse_dot_as_source_alias() {
3444        let result = parse(". script.kai");
3445        assert!(result.is_ok(), "failed to parse . script.kai: {:?}", result);
3446        let program = result.unwrap();
3447        match &program.statements[0] {
3448            Stmt::Command(cmd) => {
3449                assert_eq!(cmd.name, ".");
3450                assert_eq!(cmd.args.len(), 1);
3451            }
3452            other => panic!("expected Command(.), got {:?}", other),
3453        }
3454    }
3455
3456    #[test]
3457    fn parse_source_command() {
3458        let result = parse("source utils.kai");
3459        assert!(result.is_ok(), "failed to parse source: {:?}", result);
3460        let program = result.unwrap();
3461        match &program.statements[0] {
3462            Stmt::Command(cmd) => {
3463                assert_eq!(cmd.name, "source");
3464                assert_eq!(cmd.args.len(), 1);
3465            }
3466            other => panic!("expected Command(source), got {:?}", other),
3467        }
3468    }
3469
3470    #[test]
3471    fn parse_test_expr_file_test() {
3472        // Paths must be quoted strings in test expressions
3473        let result = parse(r#"[[ -f "/path/file" ]]"#);
3474        assert!(result.is_ok(), "failed to parse file test: {:?}", result);
3475    }
3476
3477    #[test]
3478    fn parse_test_expr_comparison() {
3479        let result = parse(r#"[[ $X == "value" ]]"#);
3480        assert!(result.is_ok(), "failed to parse comparison test: {:?}", result);
3481    }
3482
3483    #[test]
3484    fn parse_test_expr_single_eq() {
3485        // = and == are equivalent inside [[ ]] (matching bash behavior)
3486        let result = parse(r#"[[ $X = "value" ]]"#);
3487        assert!(result.is_ok(), "failed to parse single-= comparison: {:?}", result);
3488        let program = result.unwrap();
3489        match &program.statements[0] {
3490            Stmt::Test(TestExpr::Comparison { op, .. }) => {
3491                assert_eq!(op, &TestCmpOp::Eq);
3492            }
3493            other => panic!("expected Test(Comparison), got {:?}", other),
3494        }
3495    }
3496
3497    #[test]
3498    fn parse_while_loop() {
3499        let result = parse("while true; do echo; done");
3500        assert!(result.is_ok(), "failed to parse while loop: {:?}", result);
3501        let program = result.unwrap();
3502        assert!(matches!(&program.statements[0], Stmt::While(_)));
3503    }
3504
3505    #[test]
3506    fn parse_break_with_level() {
3507        let result = parse("break 2");
3508        assert!(result.is_ok());
3509        let program = result.unwrap();
3510        match &program.statements[0] {
3511            Stmt::Break(Some(n)) => assert_eq!(*n, 2),
3512            other => panic!("expected Break(2), got {:?}", other),
3513        }
3514    }
3515
3516    #[test]
3517    fn parse_continue_with_level() {
3518        let result = parse("continue 3");
3519        assert!(result.is_ok());
3520        let program = result.unwrap();
3521        match &program.statements[0] {
3522            Stmt::Continue(Some(n)) => assert_eq!(*n, 3),
3523            other => panic!("expected Continue(3), got {:?}", other),
3524        }
3525    }
3526
3527    #[test]
3528    fn parse_exit_with_code() {
3529        let result = parse("exit 1");
3530        assert!(result.is_ok());
3531        let program = result.unwrap();
3532        match &program.statements[0] {
3533            Stmt::Exit(Some(expr)) => {
3534                match expr.as_ref() {
3535                    Expr::Literal(Value::Int(n)) => assert_eq!(*n, 1),
3536                    other => panic!("expected Int(1), got {:?}", other),
3537                }
3538            }
3539            other => panic!("expected Exit(1), got {:?}", other),
3540        }
3541    }
3542
3543    // ========================================================================
3544    // parse_interpolated_string_spanned — body-internal span tracking for
3545    // heredoc bodies. The byte offsets these tests pin become validator
3546    // issue spans via the HereDocBody → SpannedPart flow.
3547    // ========================================================================
3548
3549    #[test]
3550    fn spanned_literal_only_records_byte_range() {
3551        let parts = parse_interpolated_string_spanned("hello world", 100);
3552        assert_eq!(parts.len(), 1);
3553        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello world"));
3554        assert_eq!(parts[0].offset, 100, "base_offset must propagate to literals");
3555        assert_eq!(parts[0].len, 11);
3556    }
3557
3558    #[test]
3559    fn spanned_braced_var_at_zero() {
3560        let parts = parse_interpolated_string_spanned("${X}", 50);
3561        assert_eq!(parts.len(), 1);
3562        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3563        assert_eq!(parts[0].offset, 50);
3564        assert_eq!(parts[0].len, 4); // "${X}"
3565    }
3566
3567    #[test]
3568    fn spanned_simple_var_then_literal() {
3569        let parts = parse_interpolated_string_spanned("$X end", 10);
3570        assert_eq!(parts.len(), 2);
3571        assert!(matches!(&parts[0].part, StringPart::Var(_)));
3572        assert_eq!(parts[0].offset, 10);
3573        assert_eq!(parts[0].len, 2); // "$X"
3574        assert!(matches!(&parts[1].part, StringPart::Literal(s) if s == " end"));
3575        assert_eq!(parts[1].offset, 12);
3576        assert_eq!(parts[1].len, 4);
3577    }
3578
3579    #[test]
3580    fn spanned_mixed_literal_var_literal() {
3581        let parts = parse_interpolated_string_spanned("hi ${X} bye", 0);
3582        assert_eq!(parts.len(), 3);
3583        // "hi "
3584        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hi "));
3585        assert_eq!(parts[0].offset, 0);
3586        assert_eq!(parts[0].len, 3);
3587        // ${X}
3588        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3589        assert_eq!(parts[1].offset, 3);
3590        assert_eq!(parts[1].len, 4);
3591        // " bye"
3592        assert!(matches!(&parts[2].part, StringPart::Literal(s) if s == " bye"));
3593        assert_eq!(parts[2].offset, 7);
3594        assert_eq!(parts[2].len, 4);
3595    }
3596
3597    #[test]
3598    fn spanned_positional_param() {
3599        let parts = parse_interpolated_string_spanned("$1 done", 0);
3600        assert_eq!(parts.len(), 2);
3601        assert!(matches!(&parts[0].part, StringPart::Positional(1)));
3602        assert_eq!(parts[0].offset, 0);
3603        assert_eq!(parts[0].len, 2); // "$1"
3604    }
3605
3606    #[test]
3607    fn spanned_special_dollar_dollar() {
3608        let parts = parse_interpolated_string_spanned("$$", 5);
3609        assert_eq!(parts.len(), 1);
3610        assert!(matches!(&parts[0].part, StringPart::CurrentPid));
3611        assert_eq!(parts[0].offset, 5);
3612        assert_eq!(parts[0].len, 2);
3613    }
3614
3615    #[test]
3616    fn spanned_arithmetic_marker_recognised() {
3617        // The lexer wraps arithmetic markers as ${__ARITH:expr__} for
3618        // interpolated heredocs; the spanned parser must produce
3619        // StringPart::Arithmetic for that shape.
3620        let parts = parse_interpolated_string_spanned("${__ARITH:1+2__}", 0);
3621        assert_eq!(parts.len(), 1);
3622        assert!(matches!(&parts[0].part, StringPart::Arithmetic(e) if e == "1+2"));
3623    }
3624
3625    #[test]
3626    fn spanned_default_separator_yields_var_with_default() {
3627        let parts = parse_interpolated_string_spanned("${X:-fallback}", 0);
3628        assert_eq!(parts.len(), 1);
3629        assert!(matches!(&parts[0].part, StringPart::VarWithDefault { .. }));
3630        assert_eq!(parts[0].offset, 0);
3631        assert_eq!(parts[0].len, 14); // "${X:-fallback}"
3632    }
3633
3634    #[test]
3635    fn spanned_no_dollar_runs_one_literal() {
3636        let parts = parse_interpolated_string_spanned("plain text only", 7);
3637        assert_eq!(parts.len(), 1);
3638        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "plain text only"));
3639        assert_eq!(parts[0].offset, 7);
3640        assert_eq!(parts[0].len, 15);
3641    }
3642
3643    #[test]
3644    fn spanned_matches_unspanned_part_count() {
3645        // Spanned and spanless variants must agree on the part decomposition.
3646        // Bug fixes in one should land in the other.
3647        let cases = [
3648            "hello",
3649            "$X",
3650            "${X}",
3651            "${X:-d}",
3652            "hi $A and $B",
3653            "$0 $1 $2",
3654            "$$ $? $#",
3655        ];
3656        for s in &cases {
3657            let unspanned = parse_interpolated_string(s);
3658            let spanned = parse_interpolated_string_spanned(s, 0);
3659            assert_eq!(
3660                unspanned.len(),
3661                spanned.len(),
3662                "part count differs for {:?}",
3663                s
3664            );
3665        }
3666    }
3667
3668    #[test]
3669    fn spanned_multibyte_utf8_before_var_uses_byte_offsets() {
3670        // 🚀 is 4 bytes in UTF-8 and a space is 1 byte, so the literal
3671        // prefix is 5 bytes total. `${X}` then sits at byte offset 5.
3672        // Right-by-luck for char-vs-byte indexing is precisely what this
3673        // test catches: if someone swaps .len_utf8() for 1, offset becomes 2.
3674        let parts = parse_interpolated_string_spanned("🚀 ${X}", 0);
3675        assert_eq!(parts.len(), 2);
3676
3677        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "🚀 "));
3678        assert_eq!(parts[0].offset, 0);
3679        assert_eq!(parts[0].len, 5, "literal len must be bytes, not chars");
3680
3681        assert!(matches!(&parts[1].part, StringPart::Var(_)));
3682        assert_eq!(parts[1].offset, 5, "var offset must be bytes, not chars");
3683        assert_eq!(parts[1].len, 4);
3684    }
3685
3686    #[test]
3687    fn spanned_multibyte_utf8_pure_literal_is_byte_length() {
3688        // "hello 世界 world": 5 + 1 + 6 (3 per CJK char) + 1 + 5 = 18 bytes,
3689        // 13 chars. The `len` field must report 18, not 13.
3690        let parts = parse_interpolated_string_spanned("hello 世界 world", 0);
3691        assert_eq!(parts.len(), 1);
3692        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "hello 世界 world"));
3693        assert_eq!(parts[0].offset, 0);
3694        assert_eq!(parts[0].len, 18);
3695    }
3696
3697    #[test]
3698    fn spanned_escape_dollar_consumes_two_bytes_emits_one_char() {
3699        // `\$` is 2 source bytes and resolves to a single literal `$`.
3700        // The literal part's `len` should reflect the SOURCE length (2).
3701        let parts = parse_interpolated_string_spanned("\\$", 0);
3702        assert_eq!(parts.len(), 1);
3703        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "$"));
3704        assert_eq!(parts[0].offset, 0);
3705        assert_eq!(parts[0].len, 2, "len is source byte length, not rendered length");
3706    }
3707
3708    #[test]
3709    fn spanned_escape_backslash_collapses_pair_to_one() {
3710        let parts = parse_interpolated_string_spanned("\\\\", 0);
3711        assert_eq!(parts.len(), 1);
3712        assert!(matches!(&parts[0].part, StringPart::Literal(s) if s == "\\"));
3713        assert_eq!(parts[0].len, 2);
3714    }
3715}
kaish_kernel/parser.rs

kaish_kernel/
parser.rs