zshrs_parse/
parser.rs

1//! Zsh parser - Direct port from zsh/Src/parse.c
2//!
3//! This parser takes tokens from the ZshLexer and builds an AST.
4//! It follows the zsh grammar closely, producing structures that
5//! can be executed by the shell executor.
6
7use crate::lexer::ZshLexer;
8use crate::tokens::LexTok;
9use serde::{Deserialize, Serialize};
10
11/// AST node for a complete program (list of commands)
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ZshProgram {
14    pub lists: Vec<ZshList>,
15}
16
17/// A list is a sequence of sublists separated by ; or & or newline
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ZshList {
20    pub sublist: ZshSublist,
21    pub flags: ListFlags,
22}
23
24#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
25pub struct ListFlags {
26    /// Run asynchronously (&)
27    pub async_: bool,
28    /// Disown after running (&| or &!)
29    pub disown: bool,
30}
31
32/// A sublist is pipelines connected by && or ||
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ZshSublist {
35    pub pipe: ZshPipe,
36    pub next: Option<(SublistOp, Box<ZshSublist>)>,
37    pub flags: SublistFlags,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum SublistOp {
42    And, // &&
43    Or,  // ||
44}
45
46#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
47pub struct SublistFlags {
48    /// Coproc
49    pub coproc: bool,
50    /// Negated with !
51    pub not: bool,
52}
53
54/// A pipeline is commands connected by |
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ZshPipe {
57    pub cmd: ZshCommand,
58    pub next: Option<Box<ZshPipe>>,
59    pub lineno: u64,
60    /// `|&` between this stage and the next — merge stderr into the
61    /// pipe so the next stage's stdin sees both stdout AND stderr from
62    /// this stage. When `next` is None this flag is meaningless.
63    #[serde(default)]
64    pub merge_stderr: bool,
65}
66
67/// A command
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum ZshCommand {
70    Simple(ZshSimple),
71    Subsh(Box<ZshProgram>), // (list)
72    Cursh(Box<ZshProgram>), // {list}
73    For(ZshFor),
74    Case(ZshCase),
75    If(ZshIf),
76    While(ZshWhile),
77    Until(ZshWhile),
78    Repeat(ZshRepeat),
79    FuncDef(ZshFuncDef),
80    Time(Option<Box<ZshSublist>>),
81    Cond(ZshCond), // [[ ... ]]
82    Arith(String), // (( ... ))
83    Try(ZshTry),   // { ... } always { ... }
84    /// Compound command with trailing redirects:
85    /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
86    /// Simple commands carry redirects in their own struct; this wrapper
87    /// is only used for compound forms.
88    Redirected(Box<ZshCommand>, Vec<ZshRedir>),
89}
90
91/// A simple command (assignments, words, redirections)
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ZshSimple {
94    pub assigns: Vec<ZshAssign>,
95    pub words: Vec<String>,
96    pub redirs: Vec<ZshRedir>,
97}
98
99/// An assignment
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ZshAssign {
102    pub name: String,
103    pub value: ZshAssignValue,
104    pub append: bool, // +=
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum ZshAssignValue {
109    Scalar(String),
110    Array(Vec<String>),
111}
112
113/// A redirection
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshRedir {
116    pub rtype: RedirType,
117    pub fd: i32,
118    pub name: String,
119    pub heredoc: Option<HereDocInfo>,
120    pub varid: Option<String>, // {var}>file
121    /// Index into ZshLexer.heredocs[] for body lookup. Filled in by
122    /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
123    /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
124    /// has run for the line.
125    #[serde(skip)]
126    pub heredoc_idx: Option<usize>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HereDocInfo {
131    pub content: String,
132    pub terminator: String,
133    /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true the
134    /// body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
135    /// expansion. Plain `<<EOF` runs all expansions.
136    #[serde(default)]
137    pub quoted: bool,
138}
139
140/// Redirection type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
142pub enum RedirType {
143    Write,        // >
144    Writenow,     // >|
145    Append,       // >>
146    Appendnow,    // >>|
147    Read,         // <
148    ReadWrite,    // <>
149    Heredoc,      // <<
150    HeredocDash,  // <<-
151    Herestr,      // <<<
152    MergeIn,      // <&
153    MergeOut,     // >&
154    ErrWrite,     // &>
155    ErrWritenow,  // &>|
156    ErrAppend,    // >>&
157    ErrAppendnow, // >>&|
158    InPipe,       // < <(...)
159    OutPipe,      // > >(...)
160}
161
162/// For loop
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct ZshFor {
165    pub var: String,
166    pub list: ForList,
167    pub body: Box<ZshProgram>,
168    /// True if this was parsed as `select` rather than `for`. Both share
169    /// the same parser, so the compiler routes on this flag.
170    #[serde(default)]
171    pub is_select: bool,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub enum ForList {
176    Words(Vec<String>),
177    CStyle {
178        init: String,
179        cond: String,
180        step: String,
181    },
182    Positional,
183}
184
185/// Case statement
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ZshCase {
188    pub word: String,
189    pub arms: Vec<CaseArm>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct CaseArm {
194    pub patterns: Vec<String>,
195    pub body: ZshProgram,
196    pub terminator: CaseTerm,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CaseTerm {
201    Break,    // ;;
202    Continue, // ;&
203    TestNext, // ;|
204}
205
206/// If statement
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct ZshIf {
209    pub cond: Box<ZshProgram>,
210    pub then: Box<ZshProgram>,
211    pub elif: Vec<(ZshProgram, ZshProgram)>,
212    pub else_: Option<Box<ZshProgram>>,
213}
214
215/// While/Until loop
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ZshWhile {
218    pub cond: Box<ZshProgram>,
219    pub body: Box<ZshProgram>,
220    pub until: bool,
221}
222
223/// Repeat loop
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ZshRepeat {
226    pub count: String,
227    pub body: Box<ZshProgram>,
228}
229
230/// Function definition
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct ZshFuncDef {
233    pub names: Vec<String>,
234    pub body: Box<ZshProgram>,
235    pub tracing: bool,
236    /// Anonymous-function call args. `() { body } a b` parses as a
237    /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
238    /// compile_funcdef registers the function then emits a Simple call
239    /// with these args.
240    #[serde(default)]
241    pub auto_call_args: Option<Vec<String>>,
242    /// Original source text of the function body (the bytes between
243    /// `{` and `}`, without the braces themselves), captured at parse
244    /// time. Populated for `function name { body }` and `function name() { body }`
245    /// forms; left None for the synthesized inline-funcdef recovery
246    /// path. ZshCompiler::compile_funcdef forwards it to
247    /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
248    /// `${functions[name]}`) has canonical source text.
249    #[serde(default)]
250    pub body_source: Option<String>,
251}
252
253/// Conditional expression [[ ... ]]
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub enum ZshCond {
256    Not(Box<ZshCond>),
257    And(Box<ZshCond>, Box<ZshCond>),
258    Or(Box<ZshCond>, Box<ZshCond>),
259    Unary(String, String),          // -f file, -n str, etc.
260    Binary(String, String, String), // str = pat, a -eq b, etc.
261    Regex(String, String),          // str =~ regex
262}
263
264/// Try/always block
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct ZshTry {
267    pub try_block: Box<ZshProgram>,
268    pub always: Box<ZshProgram>,
269}
270
271/// Zsh parameter expansion flags
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub enum ZshParamFlag {
274    Lower,                 // L - lowercase
275    Upper,                 // U - uppercase
276    Capitalize,            // C - capitalize words
277    Join(String),          // j:sep: - join array with separator
278    JoinNewline,           // F - join with newlines
279    Split(String),         // s:sep: - split string into array
280    SplitLines,            // f - split on newlines
281    SplitWords,            // z - split into words (shell parsing)
282    Type,                  // t - type of variable
283    Words,                 // w - word splitting
284    Quote,                 // qq - single-quote always
285    QuoteIfNeeded,         // q+ - single-quote only if needed
286    DoubleQuote,           // qqq - double-quote
287    DollarQuote,           // qqqq - $'...' style
288    QuoteBackslash,        // q / b / B - backslash-escape special chars
289    Unique,                // u - unique elements only
290    Reverse,               // O - reverse sort
291    Sort,                  // o - sort
292    NumericSort,           // n - numeric sort
293    IndexSort,             // a - sort in array index order
294    Keys,                  // k - associative array keys
295    Values,                // v - associative array values
296    Length,                // # - length (character codes)
297    CountChars,            // c - count total characters
298    Expand,                // e - perform shell expansions
299    PromptExpand,          // % - expand prompt escapes
300    PromptExpandFull,      // %% - full prompt expansion
301    Visible,               // V - make non-printable chars visible
302    Directory,             // D - substitute directory names
303    Head(usize),           // [1,n] - first n elements
304    Tail(usize),           // [-n,-1] - last n elements
305    PadLeft(usize, char),  // l:len:fill: - pad left
306    PadRight(usize, char), // r:len:fill: - pad right
307    Width(usize),          // m - use width for padding
308    Match,                 // M - include matched portion
309    Remove,                // R - include non-matched portion (complement of M)
310    Subscript,             // S - subscript scanning
311    Parameter,             // P - use value as parameter name (indirection)
312    Glob,                  // ~ - glob patterns in pattern
313    /// `@` flag — force array-context behavior even inside DQ. zsh's
314    /// `"${(@o)arr}"` keeps the sort active and splices each element as
315    /// its own word. Without this, the array-only flags became no-ops
316    /// in DQ.
317    At,
318}
319
320/// List operator (for shell command lists)
321#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
322pub enum ListOp {
323    And,     // &&
324    Or,      // ||
325    Semi,    // ;
326    Amp,     // &
327    Newline, // \n
328}
329
330/// Shell word - can be simple literal or complex expansion
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum ShellWord {
333    /// Plain text token. Most ZWC-decoded words land here. Goes through
334    /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
335    /// final output.
336    Literal(String),
337    /// Concatenation of sub-words. ZWC array decoding produces this with
338    /// child Literals; nothing else constructs it now that the legacy
339    /// hand-rolled parser is gone.
340    Concat(Vec<ShellWord>),
341}
342
343/// Variable modifier for parameter expansion
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub enum VarModifier {
346    Default(ShellWord),
347    DefaultAssign(ShellWord),
348    Error(ShellWord),
349    Alternate(ShellWord),
350    Length,
351    Substring(i64, Option<i64>),
352    RemovePrefix(ShellWord),
353    RemovePrefixLong(ShellWord),
354    RemoveSuffix(ShellWord),
355    RemoveSuffixLong(ShellWord),
356    Replace(ShellWord, ShellWord),
357    ReplaceAll(ShellWord, ShellWord),
358    /// `${var/#pat/repl}` — anchored at start (prefix only).
359    /// Per Src/subst.c paramsubst's `/`-arm with SUB_START.
360    ReplacePrefix(ShellWord, ShellWord),
361    /// `${var/%pat/repl}` — anchored at end (suffix only).
362    /// Per Src/subst.c paramsubst's `/`-arm with SUB_END.
363    ReplaceSuffix(ShellWord, ShellWord),
364    Upper,
365    Lower,
366}
367
368/// Shell command - the old shell_ast compatible type
369#[derive(Debug, Clone, Serialize, Deserialize)]
370pub enum ShellCommand {
371    Simple(SimpleCommand),
372    Pipeline(Vec<ShellCommand>, bool),
373    List(Vec<(ShellCommand, ListOp)>),
374    Compound(CompoundCommand),
375    FunctionDef(String, Box<ShellCommand>),
376}
377
378/// Simple command with assignments, words, and redirects
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct SimpleCommand {
381    pub assignments: Vec<(String, ShellWord, bool)>,
382    pub words: Vec<ShellWord>,
383    pub redirects: Vec<Redirect>,
384}
385
386/// Redirect
387#[derive(Debug, Clone, Serialize, Deserialize)]
388pub struct Redirect {
389    pub fd: Option<i32>,
390    pub op: RedirectOp,
391    pub target: ShellWord,
392    pub heredoc_content: Option<String>,
393    pub fd_var: Option<String>,
394}
395
396/// Redirect operator
397#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
398pub enum RedirectOp {
399    Write,
400    Append,
401    Read,
402    ReadWrite,
403    Clobber,
404    DupRead,
405    DupWrite,
406    HereDoc,
407    HereString,
408    WriteBoth,
409    AppendBoth,
410}
411
412/// Compound command
413#[derive(Debug, Clone, Serialize, Deserialize)]
414pub enum CompoundCommand {
415    BraceGroup(Vec<ShellCommand>),
416    Subshell(Vec<ShellCommand>),
417    If {
418        conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
419        else_part: Option<Vec<ShellCommand>>,
420    },
421    For {
422        var: String,
423        words: Option<Vec<ShellWord>>,
424        body: Vec<ShellCommand>,
425    },
426    ForArith {
427        init: String,
428        cond: String,
429        step: String,
430        body: Vec<ShellCommand>,
431    },
432    While {
433        condition: Vec<ShellCommand>,
434        body: Vec<ShellCommand>,
435    },
436    Until {
437        condition: Vec<ShellCommand>,
438        body: Vec<ShellCommand>,
439    },
440    Case {
441        word: ShellWord,
442        cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
443    },
444    Select {
445        var: String,
446        words: Option<Vec<ShellWord>>,
447        body: Vec<ShellCommand>,
448    },
449    Coproc {
450        name: Option<String>,
451        body: Box<ShellCommand>,
452    },
453    /// repeat N do ... done
454    Repeat {
455        count: String,
456        body: Vec<ShellCommand>,
457    },
458    /// { try-block } always { always-block }
459    Try {
460        try_body: Vec<ShellCommand>,
461        always_body: Vec<ShellCommand>,
462    },
463    Arith(String),
464    WithRedirects(Box<ShellCommand>, Vec<Redirect>),
465}
466
467/// Case terminator
468#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
469pub enum CaseTerminator {
470    Break,
471    Fallthrough,
472    Continue,
473}
474
475/// Parse errors
476#[derive(Debug, Clone, Serialize, Deserialize)]
477pub struct ParseError {
478    pub message: String,
479    pub line: u64,
480}
481
482impl std::fmt::Display for ParseError {
483    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
484        write!(f, "parse error at line {}: {}", self.line, self.message)
485    }
486}
487
488impl std::error::Error for ParseError {}
489
490/// The Zsh Parser
491pub struct ZshParser<'a> {
492    lexer: ZshLexer<'a>,
493    errors: Vec<ParseError>,
494    /// Global iteration counter to prevent infinite loops
495    global_iterations: usize,
496    /// Recursion depth counter to prevent stack overflow
497    recursion_depth: usize,
498}
499
500const MAX_RECURSION_DEPTH: usize = 500;
501
502/// Saved parse context. Direct port of zsh's `struct parse_stack`
503/// declared in zsh/Src/zsh.h and used by parse.c:295-355
504/// (`parse_context_save` / `parse_context_restore`). Pushes per-
505/// parse-call state so a nested parse (e.g. inside command
506/// substitution) doesn't clobber the outer parse.
507///
508/// zshrs port note: zsh's parse_stack tracks wordcode-buffer state
509/// (ecbuf, eclen, ecused, ecnpats, ecstrs, ecsoffs, ecssub, ecnfunc).
510/// zshrs builds AST trees instead so those fields collapse to a
511/// recursion_depth + global_iterations save. The lexer-side fields
512/// (incmdpos, incond, etc.) live on ZshLexer here so they get saved
513/// via the lexer's own `LexStack` rather than being duplicated here.
514#[derive(Debug, Default, Clone)]
515pub struct ParseStack {
516    pub recursion_depth: usize,
517    pub global_iterations: usize,
518}
519
520/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
521/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
522/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
523/// during scanning (in source order).
524fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
525    for list in &mut prog.lists {
526        fill_in_sublist(&mut list.sublist, bodies);
527    }
528}
529
530fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
531    fill_in_pipe(&mut sub.pipe, bodies);
532    if let Some(next) = &mut sub.next {
533        fill_in_sublist(&mut next.1, bodies);
534    }
535}
536
537fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
538    fill_in_command(&mut pipe.cmd, bodies);
539    if let Some(next) = &mut pipe.next {
540        fill_in_pipe(next, bodies);
541    }
542}
543
544fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
545    match cmd {
546        ZshCommand::Simple(s) => {
547            for r in &mut s.redirs {
548                resolve_redir(r, bodies);
549            }
550        }
551        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
552        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
553        ZshCommand::If(i) => {
554            fill_heredoc_bodies(&mut i.cond, bodies);
555            fill_heredoc_bodies(&mut i.then, bodies);
556            for (c, b) in &mut i.elif {
557                fill_heredoc_bodies(c, bodies);
558                fill_heredoc_bodies(b, bodies);
559            }
560            if let Some(e) = &mut i.else_ {
561                fill_heredoc_bodies(e, bodies);
562            }
563        }
564        ZshCommand::While(w) | ZshCommand::Until(w) => {
565            fill_heredoc_bodies(&mut w.cond, bodies);
566            fill_heredoc_bodies(&mut w.body, bodies);
567        }
568        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
569        ZshCommand::Case(c) => {
570            for arm in &mut c.arms {
571                fill_heredoc_bodies(&mut arm.body, bodies);
572            }
573        }
574        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
575        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
576        ZshCommand::Try(t) => {
577            fill_heredoc_bodies(&mut t.try_block, bodies);
578            fill_heredoc_bodies(&mut t.always, bodies);
579        }
580        ZshCommand::Redirected(inner, redirs) => {
581            for r in redirs {
582                resolve_redir(r, bodies);
583            }
584            fill_in_command(inner, bodies);
585        }
586        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
587    }
588}
589
590fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
591    if let Some(idx) = r.heredoc_idx {
592        if let Some(info) = bodies.get(idx) {
593            r.heredoc = Some(info.clone());
594        }
595    }
596}
597
598/// If `list` is a Simple containing one word that ends in the
599/// `<INPAR><OUTPAR>` token pair (the lexer-port encoding of `()`),
600/// return the bare name. Used by `parse_program_until` to detect
601/// `name() {body}` style function definitions where the lexer
602/// hasn't split the `()` from the name.
603/// Detect the `name() …` shape inside a Simple. Returns the function
604/// name and (when the body was already inlined into the same Simple,
605/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
606/// Returns None for non-funcdef shapes.
607fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
608    if list.flags.async_ || list.sublist.next.is_some() {
609        return None;
610    }
611    let pipe = &list.sublist.pipe;
612    if pipe.next.is_some() {
613        return None;
614    }
615    let simple = match &pipe.cmd {
616        ZshCommand::Simple(s) => s,
617        _ => return None,
618    };
619    if simple.words.is_empty() || !simple.assigns.is_empty() {
620        return None;
621    }
622    let suffix = "\u{88}\u{8a}"; // INPAR + OUTPAR
623                                 // Find the FIRST word ending in `()`. zsh accepts the
624                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
625                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
626                                 // words[i] is `lastname()`. Words after are the body argv
627                                 // (one-line shorthand, `name() cmd args`).
628    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
629    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
630    for w in &simple.words[..par_idx] {
631        // Earlier names must be bare identifiers, NOT contain
632        // tokens that imply they're not function names (no `()`,
633        // no quotes, no expansions). zsh's lexer enforces this
634        // at the wordlist level; we approximate by requiring the
635        // word be an identifier-shaped token after untokenize.
636        let bare = crate::lexer::untokenize(w);
637        let valid = !bare.is_empty()
638            && bare
639                .chars()
640                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
641        if !valid {
642            return None;
643        }
644        names.push(bare);
645    }
646    let last = &simple.words[par_idx];
647    let bare = &last[..last.len() - suffix.len()];
648    if bare.is_empty() {
649        return None;
650    }
651    names.push(crate::lexer::untokenize(bare));
652    let rest = simple.words[par_idx + 1..].to_vec();
653    Some((names, rest))
654}
655
656impl<'a> ZshParser<'a> {
657    /// Create a new parser
658    pub fn new(input: &'a str) -> Self {
659        ZshParser {
660            lexer: ZshLexer::new(input),
661            errors: Vec::new(),
662            global_iterations: 0,
663            recursion_depth: 0,
664        }
665    }
666
667    /// Check iteration limit; returns true if exceeded
668    #[inline]
669    fn check_limit(&mut self) -> bool {
670        self.global_iterations += 1;
671        self.global_iterations > 10_000
672    }
673
674    /// Check recursion depth; returns true if exceeded
675    #[inline]
676    fn check_recursion(&mut self) -> bool {
677        self.recursion_depth > MAX_RECURSION_DEPTH
678    }
679
680    /// Save parse context onto a `ParseStack`. Direct port of
681    /// zsh/Src/parse.c:295-320 `parse_context_save`. Pushes
682    /// recursion_depth + global_iterations and resets to zero so
683    /// a nested parse can't trigger the outer parse's limits.
684    /// Lexer-side state (incmdpos / incond / etc.) saves via the
685    /// lexer's own `LexStack` since those fields live on ZshLexer.
686    pub fn parse_context_save(&mut self, ps: &mut ParseStack) {
687        // parse.c:299-317 — save parser state. zshrs collapses zsh's
688        // wordcode-buffer fields (ecbuf/eclen/ecused/ecnpats/ecstrs/
689        // ecsoffs/ecssub/ecnfunc) into the recursion+iteration pair
690        // since the AST builder doesn't use a flat wordcode buffer.
691        ps.recursion_depth = self.recursion_depth;
692        ps.global_iterations = self.global_iterations;
693        // parse.c:318-319 — clear the buffer + heredoc list so a
694        // nested parse starts from a clean slate.
695        self.recursion_depth = 0;
696        self.global_iterations = 0;
697    }
698
699    /// Restore parse context from a `ParseStack`. Direct port of
700    /// zsh/Src/parse.c:326-355 `parse_context_restore`. Inverse of
701    /// `parse_context_save`. Also clears any half-built AST state
702    /// to prevent leaking into the outer parse.
703    pub fn parse_context_restore(&mut self, ps: &ParseStack) {
704        // parse.c:330-331 — free any in-progress wordcode buffer.
705        // zshrs has no equivalent — AST nodes are owned by their
706        // parent so dropping the parser frees them.
707
708        // parse.c:333-352 — restore saved state.
709        self.recursion_depth = ps.recursion_depth;
710        self.global_iterations = ps.global_iterations;
711
712        // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
713        // error flag so the outer parse sees a clean state. zshrs
714        // tracks errors per-parser; clearing means dropping any
715        // partial errors collected during the nested parse.
716        self.errors.clear();
717    }
718
719    /// Initialize parser status. Direct port of zsh/Src/parse.c:489-503
720    /// `init_parse_status`. Clears the per-parse-call lexer flags
721    /// so a fresh parse starts from cmd-position with no nesting
722    /// state inherited from a prior parse.
723    pub fn init_parse_status(&mut self) {
724        // parse.c:500-502 — `incasepat = incond = inredir = infor =
725        // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
726        self.lexer.incasepat = 0;
727        self.lexer.incond = 0;
728        self.lexer.inredir = false;
729        self.lexer.infor = 0;
730        self.lexer.intypeset = false;
731        self.lexer.incmdpos = true;
732    }
733
734    /// Initialize parser for a fresh parse. Direct port of
735    /// zsh/Src/parse.c:507-525 `init_parse`. C source allocates a
736    /// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
737    /// per-parse-call counters, and calls init_parse_status. zshrs
738    /// has no flat wordcode buffer (AST is built inline) so this
739    /// function reduces to init_parse_status + recursion_depth/
740    /// global_iterations clear.
741    pub fn init_parse(&mut self) {
742        // parse.c:513-520 — init wordcode buffer. zshrs no-op.
743        self.recursion_depth = 0;
744        self.global_iterations = 0;
745        // parse.c:522 — `init_parse_status();`
746        self.init_parse_status();
747    }
748
749    /// Check whether the parsed program is empty. Direct port of
750    /// zsh/Src/parse.c:583-587 `empty_eprog`. C version checks
751    /// `*p->prog == WCB_END()` (single end-of-wordcode marker).
752    /// zshrs version checks the AST node count.
753    pub fn empty_eprog(prog: &ZshProgram) -> bool {
754        prog.lists.is_empty()
755    }
756
757    /// Clear pending here-document list. Direct port of
758    /// zsh/Src/parse.c:589-600 `clear_hdocs`. The C version walks
759    /// the global `hdocs` linked list and frees each node. zshrs
760    /// stores pending heredocs on the lexer's `heredocs` Vec —
761    /// truncating it has the same effect.
762    pub fn clear_hdocs(&mut self) {
763        self.lexer.heredocs.clear();
764    }
765
766    /// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
767    /// 612-631 `parse_event`. Reads one event from the lexer (a
768    /// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
769    /// returns the resulting ZshProgram.
770    ///
771    /// `endtok` is the token that terminates the event — usually
772    /// ENDINPUT, but for command-style substitutions the closing
773    /// `)` (zsh's CMD_SUBST_CLOSE).
774    ///
775    /// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
776    /// allocated wordcode program). zshrs returns a `ZshProgram`
777    /// (AST root). Same role at the parse-output boundary.
778    pub fn parse_event(&mut self, endtok: LexTok) -> Option<ZshProgram> {
779        // parse.c:616-619 — reset state and prime the lexer.
780        self.lexer.tok = LexTok::Endinput;
781        self.lexer.incmdpos = true;
782        self.lexer.zshlex();
783        // parse.c:620 — `init_parse();`
784        self.init_parse();
785
786        // parse.c:622-625 — drive par_event; on failure clear hdocs.
787        if !self.par_event(endtok) {
788            self.clear_hdocs();
789            return None;
790        }
791        // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
792        // parse for a substitution that doesn't need its own eprog.
793        // zshrs returns an empty program in that case (caller
794        // discards).
795        if endtok != LexTok::Endinput {
796            return Some(ZshProgram { lists: Vec::new() });
797        }
798        // parse.c:630 — `bld_eprog(1);` — build the final eprog.
799        // zshrs has already built the AST via parse_program_until,
800        // but parse_event uses par_event directly so we need to
801        // collect what par_event accumulated.
802        Some(self.parse_program_until(None))
803    }
804
805    /// Parse one event (sublist with optional separator). Direct
806    /// port of zsh/Src/parse.c:633-695 `par_event`. Returns true if
807    /// an event was successfully parsed, false on EOF / endtok.
808    ///
809    /// zshrs port note: the C version emits wordcodes via ecadd/
810    /// set_list_code; zshrs's parser builds AST nodes via
811    /// parse_sublist + parse_list. Same flow, different output.
812    pub fn par_event(&mut self, endtok: LexTok) -> bool {
813        // parse.c:639-643 — skip leading SEPERs.
814        while self.lexer.tok == LexTok::Seper {
815            // parse.c:640-641 — at top-level (endtok == ENDINPUT),
816            // a SEPER on a fresh line ends the event.
817            if self.lexer.isnewlin > 0 && endtok == LexTok::Endinput {
818                return false;
819            }
820            self.lexer.zshlex();
821        }
822        // parse.c:644-647 — terminate on EOF or matching close-token.
823        if self.lexer.tok == LexTok::Endinput {
824            return false;
825        }
826        if self.lexer.tok == endtok {
827            return true;
828        }
829        // parse.c:649-... — drive parse_sublist + handle terminator.
830        // zshrs's parse_sublist already builds the AST node directly.
831        match self.parse_sublist() {
832            Some(_) => {
833                // parse.c:651-693 — terminator handling. zshrs's
834                // parse_list wraps this; for parse_event we just
835                // confirm the sublist parsed.
836                true
837            }
838            None => false,
839        }
840    }
841
842    /// Parse one list — non-recursing variant. Direct port of
843    /// zsh/Src/parse.c:807-817 `par_list1`. Like par_list but
844    /// doesn't recurse on the trailing-separator path; used by
845    /// callers that only want one statement (e.g. each arm of a
846    /// case body).
847    pub fn par_list1(&mut self) -> Option<ZshSublist> {
848        // parse.c:810-816 — body is a single par_sublist call wrapped
849        // in the eu/ecused tracking that zshrs doesn't need (no
850        // wordcode buffer).
851        self.parse_sublist()
852    }
853
854    /// Wire a here-document body onto the redirection token that
855    /// requested it. Direct port of zsh/Src/parse.c:2347-2361
856    /// `setheredoc`. Called when a heredoc terminator has been
857    /// matched and the body is ready to be attached to the redir.
858    ///
859    /// zshrs port note: zsh's setheredoc patches the wordcode
860    /// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
861    /// zshrs threads heredoc bodies through `HereDocInfo` structs
862    /// that resolve_redir applies during the post-parse fill_in pass.
863    /// This method is the AST-side equivalent: writes back to the
864    /// matching redir node by index.
865    pub fn setheredoc(
866        &mut self,
867        _pc: usize,
868        _redir_type: i32,
869        _doc: &str,
870        _term: &str,
871        _munged_term: &str,
872    ) {
873        // zshrs's heredoc resolution happens in fill_in_command /
874        // resolve_redir at parser.rs top. This stub exists for API
875        // parity with the C signature; live wiring happens via
876        // self.lexer.heredocs which the post-parse pass consumes.
877    }
878
879    /// Parse a wordlist for `for ... in WORDS;`. Direct port of
880    /// zsh/Src/parse.c:2362-2378 `par_wordlist`. Reads STRING tokens
881    /// until the next SEPER / SEMI / NEWLIN.
882    pub fn par_wordlist(&mut self) -> Vec<String> {
883        let mut out = Vec::new();
884        // parse.c:2362-2378 — collect STRINGs into the wordlist.
885        while self.lexer.tok == LexTok::String {
886            if let Some(text) = self.lexer.tokstr.clone() {
887                out.push(text);
888            }
889            self.lexer.zshlex();
890        }
891        out
892    }
893
894    /// Parse a newline-separated wordlist. Direct port of
895    /// zsh/Src/parse.c:2379-2398 `par_nl_wordlist`. Like
896    /// par_wordlist but tolerates leading/trailing newlines.
897    pub fn par_nl_wordlist(&mut self) -> Vec<String> {
898        // parse.c:2380-2381 — skip leading newlines.
899        while self.lexer.tok == LexTok::Newlin {
900            self.lexer.zshlex();
901        }
902        let out = self.par_wordlist();
903        // parse.c:2395-2397 — skip trailing newlines.
904        while self.lexer.tok == LexTok::Newlin {
905            self.lexer.zshlex();
906        }
907        out
908    }
909
910    /// Get the integer value of the next token in a cond expression.
911    /// Direct port of zsh/Src/parse.c:2643-2658 `get_cond_num`.
912    /// Used for `[[ N OP M ]]` numeric tests where N/M are integer
913    /// literals or variable references.
914    pub fn get_cond_num(&mut self) -> Option<i64> {
915        if self.lexer.tok != LexTok::String {
916            return None;
917        }
918        let text = self.lexer.tokstr.as_ref()?.clone();
919        // parse.c:2647-2655 — parse as integer with optional sign.
920        let parsed = text.parse::<i64>().ok()?;
921        self.lexer.zshlex();
922        Some(parsed)
923    }
924
925    /// Emit a parser-level error. Direct port of zsh/Src/parse.c
926    /// 2733-2766 `yyerror`. C version fills a per-event error buffer
927    /// and sets errflag. zshrs pushes onto self.errors which the
928    /// caller drains via parse()'s Result return.
929    pub fn yyerror(&mut self, msg: &str) {
930        // parse.c:2735-2765 — zsh's yyerror collects the offending
931        // token's literal text + line number. zshrs already does
932        // this via self.error() with the lexer's toklineno.
933        self.error(msg);
934    }
935
936    // ============================================================
937    // Wordcode emission stubs (parse.c private helpers)
938    //
939    // The following functions are direct counterparts of zsh's
940    // private wordcode-emission helpers in parse.c. zsh uses these
941    // to write u32 opcodes into a flat `ecbuf` array; zshrs builds
942    // an AST tree and never emits wordcode at the parse layer.
943    // The implementations are documented stubs that preserve the
944    // function signatures + cite the C source. Real wordcode would
945    // be emitted later by compile_zsh.rs walking the AST.
946    //
947    // Listed for port-surface completeness so every parse.c symbol
948    // has a Rust counterpart even when the algorithm is moot in the
949    // AST architecture.
950    // ============================================================
951
952    /// Patch a list-placeholder wordcode with its actual opcode +
953    /// jump distance. Direct port of zsh/Src/parse.c:736-749
954    /// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
955    /// par_sublist runs, then comes back through set_list_code to
956    /// rewrite the slot with WCB_LIST(type, distance) once the
957    /// sublist's final length is known.
958    ///
959    /// zshrs port note: zshrs builds AST nodes inline so there's
960    /// no placeholder to patch. The ZshList { sublist, flags }
961    /// node is created with the right flags from the start.
962    /// Stub provided for port-surface completeness.
963    pub fn set_list_code(_p: usize, _type_code: i32, _cmplx: bool) {
964        // parse.c:740-748 — wordcode patching. zshrs no-op.
965    }
966
967    /// Patch a sublist-placeholder wordcode with its actual opcode.
968    /// Direct port of zsh/Src/parse.c:753-763 `set_sublist_code`.
969    /// Same role as set_list_code at the sublist level.
970    pub fn set_sublist_code(_p: usize, _type_code: i32, _flags: i32, _skip: i32, _cmplx: bool) {
971        // parse.c:757-762 — wordcode patching. zshrs no-op.
972    }
973
974    /// Add one wordcode opcode to the buffer. Direct port of
975    /// zsh/Src/parse.c:396-408 `ecadd`. Returns the index of the
976    /// new opcode. zshrs no-op since the AST is built inline.
977    pub fn ecadd(_c: u32) -> usize {
978        // parse.c:399-407 — append to ecbuf with grow-on-demand.
979        // zshrs no-op.
980        0
981    }
982
983    /// Delete a wordcode at position p. Direct port of
984    /// zsh/Src/parse.c:412-421 `ecdel`. zshrs no-op.
985    pub fn ecdel(_p: usize) {
986        // parse.c:415-420 — memmove + decrement ecused. zshrs no-op.
987    }
988
989    /// Encode a string into a wordcode value. Direct port of
990    /// zsh/Src/parse.c:425-471 `ecstrcode`. C source packs short
991    /// strings (≤4 chars) into a single wordcode + uses a binary
992    /// tree (Eccstr) for longer strings; long-string slots are
993    /// de-duplicated via hasher + strcmp. zshrs no-op since the
994    /// AST stores strings directly.
995    pub fn ecstrcode(_s: &str) -> u32 {
996        // parse.c:432-470 — the actual encoding logic. zshrs no-op.
997        0
998    }
999
1000    /// Insert N empty wordcode slots at position p. Direct port of
1001    /// zsh/Src/parse.c:371-388 `ecispace`. Used to reserve space
1002    /// for a forward-jump opcode that will be patched once the
1003    /// jump target is known. zshrs no-op since AST jumps are
1004    /// resolved at compile_zsh time.
1005    pub fn ecispace(_p: usize, _n: usize) {
1006        // parse.c:376-387 — grow + memmove + adjust hdocs. zshrs no-op.
1007    }
1008
1009    /// Adjust pending heredoc pointers when wordcodes shift. Direct
1010    /// port of zsh/Src/parse.c:359-367 `ecadjusthere`. Called
1011    /// internally by ecispace / ecdel after they shift the buffer.
1012    /// zshrs no-op since heredocs are tracked by index in the
1013    /// lexer's Vec, not by absolute wordcode offset.
1014    pub fn ecadjusthere(_p: usize, _d: i32) {
1015        // parse.c:362-366 — walk hdocs list, bump pc by d. zshrs no-op.
1016    }
1017
1018    // ============================================================
1019    // Eprog runtime ops (parse.c:2767-2853)
1020    //
1021    // dupeprog / useeprog / freeeprog are zsh's reference-counting
1022    // helpers for executable programs. zshrs's AST is owned by
1023    // value (Rust ownership); cloning is a tree-deep copy via
1024    // Clone, "use" is a no-op (the executor borrows the AST), and
1025    // "free" is automatic on drop.
1026    // ============================================================
1027
1028    /// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2767-2812
1029    /// `dupeprog`. C version deep-copies the wordcode array + string
1030    /// table + pattern progs. zshrs uses Clone on the AST.
1031    pub fn dupeprog(prog: &ZshProgram) -> ZshProgram {
1032        prog.clone()
1033    }
1034
1035    /// Increment an Eprog's reference count. Direct port of
1036    /// zsh/Src/parse.c:2813-2822 `useeprog`. zshrs no-op (Rust
1037    /// ownership).
1038    pub fn useeprog(_prog: &ZshProgram) {
1039        // parse.c:2815-2821 — `prog->nref++` if not heap-allocated.
1040        // zshrs no-op.
1041    }
1042
1043    /// Decrement / free an Eprog. Direct port of
1044    /// zsh/Src/parse.c:2823-2854 `freeeprog`. zshrs no-op (drop on
1045    /// scope-exit).
1046    pub fn freeeprog(_prog: ZshProgram) {
1047        // parse.c:2825-2853 — decrement nref, free if zero. zshrs
1048        // drops via Rust ownership.
1049    }
1050
1051    // ============================================================
1052    // Wordcode runtime getters (parse.c:2853-3060)
1053    //
1054    // These read packed wordcode out of a running Eprog at execution
1055    // time. zshrs's executor walks the AST directly so these are
1056    // stubs that preserve the C signatures + cite the source.
1057    // ============================================================
1058
1059    /// Read a packed string from the wordcode stream. Direct port of
1060    /// zsh/Src/parse.c:2853-2887 `ecgetstr`. C version unpacks
1061    /// 4-char inline strings + indexes into the strs table for
1062    /// longer ones. zshrs no-op (AST stores strings directly).
1063    pub fn ecgetstr(_dup: bool) -> String {
1064        // parse.c:2858-2886 — wordcode unpack logic. zshrs no-op.
1065        String::new()
1066    }
1067
1068    /// Read a packed string without consuming the wordcode pointer.
1069    /// Direct port of zsh/Src/parse.c:2890-2913 `ecrawstr`. zshrs
1070    /// no-op.
1071    pub fn ecrawstr() -> String {
1072        String::new()
1073    }
1074
1075    /// Read a NUL-terminated string array from wordcode. Direct port
1076    /// of zsh/Src/parse.c:2916-2933 `ecgetarr`. zshrs no-op.
1077    pub fn ecgetarr(_num: usize, _dup: bool) -> Vec<String> {
1078        Vec::new()
1079    }
1080
1081    /// Read a linked-list of strings from wordcode. Direct port of
1082    /// zsh/Src/parse.c:2936-2955 `ecgetlist`. zshrs no-op.
1083    pub fn ecgetlist(_num: usize, _dup: bool) -> Vec<String> {
1084        Vec::new()
1085    }
1086
1087    /// Read a sequence of redirection wordcodes. Direct port of
1088    /// zsh/Src/parse.c:2958-2991 `ecgetredirs`. zshrs no-op
1089    /// (redirections live as AST ZshRedir nodes).
1090    pub fn ecgetredirs() -> Vec<ZshRedir> {
1091        Vec::new()
1092    }
1093
1094    /// Copy consecutive redirection wordcodes into a new Eprog.
1095    /// Direct port of zsh/Src/parse.c:3001-3060 `eccopyredirs`.
1096    /// zshrs no-op.
1097    pub fn eccopyredirs() -> Option<ZshProgram> {
1098        None
1099    }
1100
1101    /// Initialize the dummy Eprog used as a placeholder. Direct port
1102    /// of zsh/Src/parse.c:3068-3075 `init_eprog`. zshrs no-op since
1103    /// the AST has no equivalent dummy node — empty programs are
1104    /// just `ZshProgram { lists: vec![] }`.
1105    pub fn init_eprog() {
1106        // parse.c:3071-3074 — set up dummy_eprog_code = WCB_END().
1107        // zshrs no-op.
1108    }
1109
1110    /// Parse the complete input
1111    pub fn parse(&mut self) -> Result<ZshProgram, Vec<ParseError>> {
1112        self.lexer.zshlex();
1113
1114        let mut program = self.parse_program_until(None);
1115
1116        if !self.errors.is_empty() {
1117            return Err(std::mem::take(&mut self.errors));
1118        }
1119        // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1120        // that the parser silently rolls past. zsh aborts with a
1121        // diagnostic in this case; mirror it.
1122        if let Some(msg) = self.lexer.error.clone() {
1123            return Err(vec![ParseError {
1124                message: msg,
1125                line: 1,
1126            }]);
1127        }
1128
1129        // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1130        // back into ZshRedir.heredoc fields via heredoc_idx.
1131        let bodies: Vec<HereDocInfo> = self
1132            .lexer
1133            .heredocs
1134            .iter()
1135            .map(|h| HereDocInfo {
1136                content: h.content.clone(),
1137                terminator: h.terminator.clone(),
1138                quoted: h.quoted,
1139            })
1140            .collect();
1141        if !bodies.is_empty() {
1142            fill_heredoc_bodies(&mut program, &bodies);
1143        }
1144
1145        Ok(program)
1146    }
1147
1148    /// Parse a program (list of lists)
1149    /// Parse a complete program (top-level entry). Calls
1150    /// parse_program_until with no end-token sentinel. Direct port of
1151    /// zsh/Src/parse.c:614-720 `parse_event` / `parse_list` /
1152    /// `par_event` flow. C distinguishes COND_EVENT (single command
1153    /// for here-string) from full event parse; zshrs's parse_program
1154    /// is the full-event entry.
1155    fn parse_program(&mut self) -> ZshProgram {
1156        self.parse_program_until(None)
1157    }
1158
1159    /// Parse a program until we hit an end token
1160    /// Parse a program until one of `end_tokens` is seen (or EOF).
1161    /// Drives parse_list in a loop. C equivalent: the body of par_event
1162    /// (parse.c:635-695) iterating par_list against the lexer.
1163    fn parse_program_until(&mut self, end_tokens: Option<&[LexTok]>) -> ZshProgram {
1164        let mut lists = Vec::new();
1165
1166        loop {
1167            if self.check_limit() {
1168                self.error("parser exceeded global iteration limit");
1169                break;
1170            }
1171
1172            // Skip separators
1173            while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1174                if self.check_limit() {
1175                    self.error("parser exceeded global iteration limit");
1176                    return ZshProgram { lists };
1177                }
1178                self.lexer.zshlex();
1179            }
1180
1181            if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
1182                break;
1183            }
1184
1185            // Check for end tokens
1186            if let Some(end_toks) = end_tokens {
1187                if end_toks.contains(&self.lexer.tok) {
1188                    break;
1189                }
1190            }
1191
1192            // Also stop at these tokens when not explicitly looking for them
1193            // Note: Else/Elif/Then are NOT here - they're handled by parse_if
1194            // to allow nested if statements inside case arms, loops, etc.
1195            match self.lexer.tok {
1196                LexTok::Outbrace
1197                | LexTok::Dsemi
1198                | LexTok::Semiamp
1199                | LexTok::Semibar
1200                | LexTok::Done
1201                | LexTok::Fi
1202                | LexTok::Esac
1203                | LexTok::Zend => break,
1204                _ => {}
1205            }
1206
1207            match self.parse_list() {
1208                Some(list) => {
1209                    let detected = simple_name_with_inoutpar(&list);
1210                    lists.push(list);
1211                    // Synthesize a FuncDef for the `name() { body }` shape
1212                    // at parse time so body_source is captured while the
1213                    // lexer still has the input. The lexer port emits
1214                    // `name(` as a single Word ending in `<INPAR><OUTPAR>`,
1215                    // so the Simple list is followed by an Inbrace once
1216                    // separators are skipped. For `name() cmd args` the
1217                    // body has already been swallowed into the same
1218                    // Simple's words tail — synthesize directly from there.
1219                    if let Some((names, body_argv)) = detected {
1220                        if !body_argv.is_empty() {
1221                            // One-line body already in the Simple. Build
1222                            // a Simple from body_argv as the function body.
1223                            lists.pop();
1224                            let body_simple = ZshCommand::Simple(ZshSimple {
1225                                assigns: Vec::new(),
1226                                words: body_argv,
1227                                redirs: Vec::new(),
1228                            });
1229                            let body_list = ZshList {
1230                                sublist: ZshSublist {
1231                                    pipe: ZshPipe {
1232                                        cmd: body_simple,
1233                                        next: None,
1234                                        lineno: self.lexer.lineno,
1235                                        merge_stderr: false,
1236                                    },
1237                                    next: None,
1238                                    flags: SublistFlags::default(),
1239                                },
1240                                flags: ListFlags::default(),
1241                            };
1242                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1243                                names,
1244                                body: Box::new(ZshProgram {
1245                                    lists: vec![body_list],
1246                                }),
1247                                tracing: false,
1248                                auto_call_args: None,
1249                                body_source: None,
1250                            });
1251                            let synthetic = ZshList {
1252                                sublist: ZshSublist {
1253                                    pipe: ZshPipe {
1254                                        cmd: funcdef,
1255                                        next: None,
1256                                        lineno: self.lexer.lineno,
1257                                        merge_stderr: false,
1258                                    },
1259                                    next: None,
1260                                    flags: SublistFlags::default(),
1261                                },
1262                                flags: ListFlags::default(),
1263                            };
1264                            lists.push(synthetic);
1265                            continue;
1266                        }
1267                        // Else: words.len() == 1 (only the trailing `name()`
1268                        // word), brace body follows. `names` may carry
1269                        // multiple identifiers from the `fna fnb fnc()`
1270                        // shorthand — all share the same brace body per
1271                        // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
1272                        // Skip separators on the real lexer; safe because
1273                        // parse_program's next iteration would also skip them.
1274                        while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1275                            self.lexer.zshlex();
1276                        }
1277                        if self.lexer.tok == LexTok::Inbrace {
1278                            // Capture body_start BEFORE the lexer
1279                            // advances past the first body token. The
1280                            // outer zshlex() consumed `{`; lexer.pos
1281                            // is now right after `{`. The next
1282                            // `zshlex()` would advance past `echo`,
1283                            // making body_start land mid-body and
1284                            // lose the first word — `typeset -f f`
1285                            // printed `a; echo b` instead of
1286                            // `echo a; echo b` for `f() { echo a;
1287                            // echo b }`.
1288                            let body_start = self.lexer.pos;
1289                            self.lexer.zshlex();
1290                            let body = self.parse_program();
1291                            let body_end = if self.lexer.tok == LexTok::Outbrace {
1292                                self.lexer.pos.saturating_sub(1)
1293                            } else {
1294                                self.lexer.pos
1295                            };
1296                            let body_source = self
1297                                .lexer
1298                                .input
1299                                .get(body_start..body_end)
1300                                .map(|s| s.trim().to_string())
1301                                .filter(|s| !s.is_empty());
1302                            if self.lexer.tok == LexTok::Outbrace {
1303                                self.lexer.zshlex();
1304                            }
1305                            // Replace the Simple list with a FuncDef list.
1306                            lists.pop();
1307                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1308                                names,
1309                                body: Box::new(body),
1310                                tracing: false,
1311                                auto_call_args: None,
1312                                body_source,
1313                            });
1314                            let synthetic = ZshList {
1315                                sublist: ZshSublist {
1316                                    pipe: ZshPipe {
1317                                        cmd: funcdef,
1318                                        next: None,
1319                                        lineno: self.lexer.lineno,
1320                                        merge_stderr: false,
1321                                    },
1322                                    next: None,
1323                                    flags: SublistFlags::default(),
1324                                },
1325                                flags: ListFlags::default(),
1326                            };
1327                            lists.push(synthetic);
1328                        } else if !matches!(
1329                            self.lexer.tok,
1330                            LexTok::Endinput | LexTok::Outbrace | LexTok::Seper | LexTok::Newlin
1331                        ) {
1332                            // No-brace one-line body: `foo() echo hello`.
1333                            // Parse a single command for the body.
1334                            let body_cmd = self.parse_cmd();
1335                            if let Some(cmd) = body_cmd {
1336                                let body_list = ZshList {
1337                                    sublist: ZshSublist {
1338                                        pipe: ZshPipe {
1339                                            cmd,
1340                                            next: None,
1341                                            lineno: self.lexer.lineno,
1342                                            merge_stderr: false,
1343                                        },
1344                                        next: None,
1345                                        flags: SublistFlags::default(),
1346                                    },
1347                                    flags: ListFlags::default(),
1348                                };
1349                                lists.pop();
1350                                let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1351                                    names: names.clone(),
1352                                    body: Box::new(ZshProgram {
1353                                        lists: vec![body_list],
1354                                    }),
1355                                    tracing: false,
1356                                    auto_call_args: None,
1357                                    body_source: None,
1358                                });
1359                                let synthetic = ZshList {
1360                                    sublist: ZshSublist {
1361                                        pipe: ZshPipe {
1362                                            cmd: funcdef,
1363                                            next: None,
1364                                            lineno: self.lexer.lineno,
1365                                            merge_stderr: false,
1366                                        },
1367                                        next: None,
1368                                        flags: SublistFlags::default(),
1369                                    },
1370                                    flags: ListFlags::default(),
1371                                };
1372                                lists.push(synthetic);
1373                            }
1374                        }
1375                    }
1376                }
1377                None => break,
1378            }
1379        }
1380
1381        ZshProgram { lists }
1382    }
1383
1384    /// Parse a list (sublist with optional & or ;).
1385    ///
1386    /// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
1387    /// par_list1 wrapper at parse.c:807-817).
1388    ///
1389    /// **Structural divergence**: zsh's parse.c emits flat wordcode
1390    /// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
1391    /// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
1392    /// builds an AST node `ZshList { sublist, flags }` instead. The
1393    /// async/sync/disown discrimination at parse.c:785-790 maps to
1394    /// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
1395    /// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
1396    /// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
1397    /// representation. This divergence is repository-wide: every
1398    /// `par_*` function emits wordcode in C, every `parse_*` builds
1399    /// AST in Rust. The compile_zsh module then traverses the AST to
1400    /// emit fusevm bytecode, which serves the same role as zsh's
1401    /// wordcode but with a different opcode set and execution model.
1402    fn parse_list(&mut self) -> Option<ZshList> {
1403        let sublist = self.parse_sublist()?;
1404
1405        let flags = match self.lexer.tok {
1406            LexTok::Amper => {
1407                self.lexer.zshlex();
1408                ListFlags {
1409                    async_: true,
1410                    disown: false,
1411                }
1412            }
1413            LexTok::Amperbang => {
1414                self.lexer.zshlex();
1415                ListFlags {
1416                    async_: true,
1417                    disown: true,
1418                }
1419            }
1420            LexTok::Seper | LexTok::Semi | LexTok::Newlin => {
1421                self.lexer.zshlex();
1422                ListFlags::default()
1423            }
1424            _ => ListFlags::default(),
1425        };
1426
1427        Some(ZshList { sublist, flags })
1428    }
1429
1430    /// Parse a sublist (pipelines connected by && or ||).
1431    ///
1432    /// Direct port of zsh/Src/parse.c:825-867 `par_sublist` and
1433    /// par_sublist2 at parse.c:869-892. par_sublist handles the
1434    /// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
1435    /// handles the leading `!` negation and `coproc` keyword.
1436    ///
1437    /// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
1438    /// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
1439    /// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
1440    fn parse_sublist(&mut self) -> Option<ZshSublist> {
1441        self.recursion_depth += 1;
1442        if self.check_recursion() {
1443            self.error("parse_sublist: max recursion depth exceeded");
1444            self.recursion_depth -= 1;
1445            return None;
1446        }
1447
1448        let mut flags = SublistFlags::default();
1449
1450        // Handle coproc and !
1451        if self.lexer.tok == LexTok::Coproc {
1452            flags.coproc = true;
1453            self.lexer.zshlex();
1454        } else if self.lexer.tok == LexTok::Bang {
1455            flags.not = true;
1456            self.lexer.zshlex();
1457        }
1458
1459        let pipe = match self.parse_pipe() {
1460            Some(p) => p,
1461            None => {
1462                self.recursion_depth -= 1;
1463                return None;
1464            }
1465        };
1466
1467        // Check for && or ||
1468        let next = match self.lexer.tok {
1469            LexTok::Damper => {
1470                self.lexer.zshlex();
1471                self.skip_separators();
1472                self.parse_sublist().map(|s| (SublistOp::And, Box::new(s)))
1473            }
1474            LexTok::Dbar => {
1475                self.lexer.zshlex();
1476                self.skip_separators();
1477                self.parse_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1478            }
1479            _ => None,
1480        };
1481
1482        self.recursion_depth -= 1;
1483        Some(ZshSublist { pipe, next, flags })
1484    }
1485
1486    /// Parse a pipeline
1487    /// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1488    /// zsh/Src/parse.c:894-956 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1489    /// C emits WC_PIPE wordcodes per command; same flow.
1490    fn parse_pipe(&mut self) -> Option<ZshPipe> {
1491        self.recursion_depth += 1;
1492        if self.check_recursion() {
1493            self.error("parse_pipe: max recursion depth exceeded");
1494            self.recursion_depth -= 1;
1495            return None;
1496        }
1497
1498        let lineno = self.lexer.toklineno;
1499        let cmd = match self.parse_cmd() {
1500            Some(c) => c,
1501            None => {
1502                self.recursion_depth -= 1;
1503                return None;
1504            }
1505        };
1506
1507        // Check for | or |&
1508        let mut merge_stderr = false;
1509        let next = match self.lexer.tok {
1510            LexTok::Bar | LexTok::Baramp => {
1511                merge_stderr = self.lexer.tok == LexTok::Baramp;
1512                self.lexer.zshlex();
1513                self.skip_separators();
1514                self.parse_pipe().map(Box::new)
1515            }
1516            _ => None,
1517        };
1518
1519        self.recursion_depth -= 1;
1520        Some(ZshPipe {
1521            cmd,
1522            next,
1523            lineno,
1524            merge_stderr,
1525        })
1526    }
1527
1528    /// Parse a command
1529    /// Parse a command — dispatches by leading token (FOR / CASE /
1530    /// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1531    /// INPAR subshell / INBRACE current-shell / TIME / NOCORRECT,
1532    /// else simple). Direct port of zsh/Src/parse.c:958-1085 `par_cmd`.
1533    fn parse_cmd(&mut self) -> Option<ZshCommand> {
1534        // Parse leading redirections
1535        let mut redirs = Vec::new();
1536        while self.lexer.tok.is_redirop() {
1537            if let Some(redir) = self.parse_redir() {
1538                redirs.push(redir);
1539            }
1540        }
1541
1542        let cmd = match self.lexer.tok {
1543            LexTok::For | LexTok::Foreach => self.parse_for(),
1544            LexTok::Select => self.parse_select(),
1545            LexTok::Case => self.parse_case(),
1546            LexTok::If => self.parse_if(),
1547            LexTok::While => self.parse_while(false),
1548            LexTok::Until => self.parse_while(true),
1549            LexTok::Repeat => self.parse_repeat(),
1550            LexTok::Inpar => self.parse_subsh(),
1551            LexTok::Inoutpar => self.parse_anon_funcdef(),
1552            LexTok::Inbrace => self.parse_cursh(),
1553            LexTok::Func => self.parse_funcdef(),
1554            LexTok::Dinbrack => self.parse_cond(),
1555            LexTok::Dinpar => self.parse_arith(),
1556            LexTok::Time => self.parse_time(),
1557            _ => self.parse_simple(redirs),
1558        };
1559
1560        // Parse trailing redirections. For Simple commands the redirs were
1561        // already captured inside parse_simple; for compound forms (Cursh,
1562        // Subsh, If, While, etc.) we collect them here and wrap in
1563        // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1564        if let Some(inner) = cmd {
1565            let mut trailing: Vec<ZshRedir> = Vec::new();
1566            while self.lexer.tok.is_redirop() {
1567                if let Some(redir) = self.parse_redir() {
1568                    trailing.push(redir);
1569                }
1570            }
1571            if trailing.is_empty() {
1572                return Some(inner);
1573            }
1574            // Simple already absorbed its own redirs (compile path expects
1575            // them on ZshSimple), so don't double-wrap.
1576            if matches!(inner, ZshCommand::Simple(_)) {
1577                if let ZshCommand::Simple(mut s) = inner {
1578                    s.redirs.extend(trailing);
1579                    return Some(ZshCommand::Simple(s));
1580                }
1581                unreachable!()
1582            }
1583            return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1584        }
1585
1586        None
1587    }
1588
1589    /// Parse a simple command
1590    /// Parse a simple command (assignments + words + redirections).
1591    /// Direct port of zsh/Src/parse.c:1836-2228 `par_simple` —
1592    /// the largest single function in parse.c. Handles ENVSTRING/
1593    /// ENVARRAY assignments at command head, intermixed redirs,
1594    /// typeset-style multi-assignment commands, and the trailing
1595    /// inout-par `()` that converts a simple command into an inline
1596    /// function definition.
1597    fn parse_simple(&mut self, mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1598        let mut assigns = Vec::new();
1599        let mut words = Vec::new();
1600        const MAX_ITERATIONS: usize = 10_000;
1601        let mut iterations = 0;
1602
1603        // Parse leading assignments
1604        while self.lexer.tok == LexTok::Envstring || self.lexer.tok == LexTok::Envarray {
1605            iterations += 1;
1606            if iterations > MAX_ITERATIONS {
1607                self.error("parse_simple: exceeded max iterations in assignments");
1608                return None;
1609            }
1610            if let Some(assign) = self.parse_assign() {
1611                assigns.push(assign);
1612            }
1613            self.lexer.zshlex();
1614        }
1615
1616        // Parse words and redirections
1617        loop {
1618            iterations += 1;
1619            if iterations > MAX_ITERATIONS {
1620                self.error("parse_simple: exceeded max iterations");
1621                return None;
1622            }
1623            match self.lexer.tok {
1624                LexTok::String | LexTok::Typeset => {
1625                    let s = self.lexer.tokstr.clone();
1626                    if let Some(s) = s {
1627                        words.push(s);
1628                    }
1629                    self.lexer.zshlex();
1630                    // Check for function definition foo() { ... }
1631                    if words.len() == 1 && self.peek_inoutpar() {
1632                        return self.parse_inline_funcdef(words.pop().unwrap());
1633                    }
1634                    // `{name}>file` named-fd redirect: the lexer doesn't
1635                    // recognize this shape, so the bare word `{name}`
1636                    // arrives as a String. If it matches `{IDENT}` and
1637                    // the NEXT token is a redirop, pop it off as the
1638                    // varid for that redir.
1639                    if !words.is_empty() && self.lexer.tok.is_redirop() {
1640                        let last = words.last().unwrap();
1641                        let untoked = crate::lexer::untokenize(last);
1642                        if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1643                            let name = &untoked[1..untoked.len() - 1];
1644                            if !name.is_empty()
1645                                && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1646                                && name
1647                                    .chars()
1648                                    .next()
1649                                    .map(|c| c == '_' || c.is_ascii_alphabetic())
1650                                    .unwrap_or(false)
1651                            {
1652                                let varid = name.to_string();
1653                                words.pop();
1654                                if let Some(mut redir) = self.parse_redir() {
1655                                    redir.varid = Some(varid);
1656                                    redirs.push(redir);
1657                                }
1658                                continue;
1659                            }
1660                        }
1661                    }
1662                }
1663                _ if self.lexer.tok.is_redirop() => {
1664                    match self.parse_redir() {
1665                        Some(redir) => redirs.push(redir),
1666                        None => break, // Error in redir parsing, stop
1667                    }
1668                }
1669                LexTok::Inoutpar if !words.is_empty() => {
1670                    // foo() { ... } style function
1671                    return self.parse_inline_funcdef(words.pop().unwrap());
1672                }
1673                _ => break,
1674            }
1675        }
1676
1677        if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1678            return None;
1679        }
1680
1681        Some(ZshCommand::Simple(ZshSimple {
1682            assigns,
1683            words,
1684            redirs,
1685        }))
1686    }
1687
1688    /// Parse an assignment
1689    /// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
1690    /// Sub-routine of parse_simple. The C source handles assignments
1691    /// inline in par_simple via the ENVSTRING/ENVARRAY token paths
1692    /// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
1693    /// helper for clarity.
1694    fn parse_assign(&mut self) -> Option<ZshAssign> {
1695        use crate::tokens::char_tokens;
1696
1697        let tokstr = self.lexer.tokstr.as_ref()?;
1698
1699        // Parse name=value or name+=value.
1700        let (name, value_str, append) = if self.lexer.tok == LexTok::Envarray {
1701            let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
1702                (stripped, true)
1703            } else {
1704                (tokstr.as_str(), false)
1705            };
1706            (name.to_string(), String::new(), append)
1707        } else if let Some(pos) = tokstr.find(char_tokens::EQUALS) {
1708            let name_part = &tokstr[..pos];
1709            let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
1710                (stripped, true)
1711            } else {
1712                (name_part, false)
1713            };
1714            (
1715                name.to_string(),
1716                tokstr[pos + char_tokens::EQUALS.len_utf8()..].to_string(),
1717                append,
1718            )
1719        } else if let Some(pos) = tokstr.find('=') {
1720            // Fallback to literal '=' for compatibility
1721            let name_part = &tokstr[..pos];
1722            let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
1723                (stripped, true)
1724            } else {
1725                (name_part, false)
1726            };
1727            (name.to_string(), tokstr[pos + 1..].to_string(), append)
1728        } else {
1729            return None;
1730        };
1731
1732        let value = if self.lexer.tok == LexTok::Envarray {
1733            // Array assignment: name=(...)
1734            let mut elements = Vec::new();
1735            self.lexer.zshlex(); // skip past token
1736
1737            let mut arr_iters = 0;
1738            const MAX_ARRAY_ELEMENTS: usize = 10_000;
1739            while matches!(
1740                self.lexer.tok,
1741                LexTok::String | LexTok::Seper | LexTok::Newlin
1742            ) {
1743                arr_iters += 1;
1744                if arr_iters > MAX_ARRAY_ELEMENTS {
1745                    self.error("array assignment exceeded maximum elements");
1746                    break;
1747                }
1748                if self.lexer.tok == LexTok::String {
1749                    if let Some(ref s) = self.lexer.tokstr {
1750                        elements.push(s.clone());
1751                    }
1752                }
1753                self.lexer.zshlex();
1754            }
1755
1756            // The closing OUTPAR is consumed here. The outer parse_simple
1757            // loop will then `zshlex()` past whatever follows (typically
1758            // a separator or the next word) — calling zshlex twice in
1759            // tandem (here AND in parse_simple) over-advances and merges
1760            // a following `name() { … }` funcdef into the same Simple.
1761            // We only consume Outpar; let the caller handle the rest.
1762            // Without this guard `g=(o1); f() { :; }` parsed as one
1763            // Simple with assigns=[g] and words=["f()"] (one token).
1764            if self.lexer.tok == LexTok::Outpar {
1765                // Note: do NOT zshlex() here. parse_simple's `self.lexer
1766                // .zshlex()` after `parse_assign` returns advances past
1767                // the Outpar onto the next significant token.
1768                //
1769                // Force `incmdpos=true` so the next zshlex() recognizes
1770                // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
1771                // The lexer flips incmdpos to false on bare Outpar (which
1772                // is correct for subshell-close context), but for an
1773                // array-assignment close more assigns/words may follow.
1774                self.lexer.incmdpos = true;
1775            }
1776
1777            ZshAssignValue::Array(elements)
1778        } else {
1779            ZshAssignValue::Scalar(value_str)
1780        };
1781
1782        Some(ZshAssign {
1783            name,
1784            value,
1785            append,
1786        })
1787    }
1788
1789    /// Parse a redirection
1790    /// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1791    /// Direct port of zsh/Src/parse.c:2229-2346 `par_redir`. Returns
1792    /// a ZshRedir node carrying the operator type, fd, target word
1793    /// (or here-doc body / pipe-redir command), and any `{var}` style
1794    /// fd-binding parameter.
1795    fn parse_redir(&mut self) -> Option<ZshRedir> {
1796        let rtype = match self.lexer.tok {
1797            LexTok::Outang => RedirType::Write,
1798            LexTok::Outangbang => RedirType::Writenow,
1799            LexTok::Doutang => RedirType::Append,
1800            LexTok::Doutangbang => RedirType::Appendnow,
1801            LexTok::Inang => RedirType::Read,
1802            LexTok::Inoutang => RedirType::ReadWrite,
1803            LexTok::Dinang => RedirType::Heredoc,
1804            LexTok::Dinangdash => RedirType::HeredocDash,
1805            LexTok::Trinang => RedirType::Herestr,
1806            LexTok::Inangamp => RedirType::MergeIn,
1807            LexTok::Outangamp => RedirType::MergeOut,
1808            LexTok::Ampoutang => RedirType::ErrWrite,
1809            LexTok::Outangampbang => RedirType::ErrWritenow,
1810            LexTok::Doutangamp => RedirType::ErrAppend,
1811            LexTok::Doutangampbang => RedirType::ErrAppendnow,
1812            _ => return None,
1813        };
1814
1815        let fd = if self.lexer.tokfd >= 0 {
1816            self.lexer.tokfd
1817        } else if matches!(
1818            rtype,
1819            RedirType::Read
1820                | RedirType::ReadWrite
1821                | RedirType::MergeIn
1822                | RedirType::Heredoc
1823                | RedirType::HeredocDash
1824                | RedirType::Herestr
1825        ) {
1826            0
1827        } else {
1828            1
1829        };
1830
1831        self.lexer.zshlex();
1832
1833        let name = match self.lexer.tok {
1834            LexTok::String | LexTok::Envstring => {
1835                let n = self.lexer.tokstr.clone().unwrap_or_default();
1836                self.lexer.zshlex();
1837                n
1838            }
1839            _ => {
1840                self.error("expected word after redirection");
1841                return None;
1842            }
1843        };
1844
1845        // Heredoc body capture: when reading the terminator above, the
1846        // lexer pushed a HereDoc to self.lexer.heredocs[]. Record the
1847        // index so fill_heredoc_bodies() can wire content back after
1848        // process_heredocs() has run.
1849        let heredoc_idx = if matches!(rtype, RedirType::Heredoc | RedirType::HeredocDash) {
1850            if !self.lexer.heredocs.is_empty() {
1851                Some(self.lexer.heredocs.len() - 1)
1852            } else {
1853                None
1854            }
1855        } else {
1856            None
1857        };
1858
1859        Some(ZshRedir {
1860            rtype,
1861            fd,
1862            name,
1863            heredoc: None,
1864            varid: None,
1865            heredoc_idx,
1866        })
1867    }
1868
1869    /// Parse for/foreach loop
1870    /// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1871    /// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1872    /// of zsh/Src/parse.c:1087-1207 `par_for`. parse_for_cstyle is the
1873    /// inner branch for the `((...))` arithmetic-header variant
1874    /// (parse.c:1100-1140 inside par_for).
1875    fn parse_for(&mut self) -> Option<ZshCommand> {
1876        let is_foreach = self.lexer.tok == LexTok::Foreach;
1877        self.lexer.zshlex();
1878
1879        // Check for C-style: for (( init; cond; step ))
1880        if self.lexer.tok == LexTok::Dinpar {
1881            return self.parse_for_cstyle();
1882        }
1883
1884        // Get variable name(s). zsh parse.c par_for accepts multiple
1885        // identifier tokens before `in`/`(`/newline — `for k v in ...`
1886        // assigns each iteration's pair of values to k and v in turn.
1887        // We store the names space-joined since variable identifiers
1888        // can't contain whitespace.
1889        let mut names: Vec<String> = Vec::new();
1890        while self.lexer.tok == LexTok::String {
1891            let v = self.lexer.tokstr.clone().unwrap_or_default();
1892            if v == "in" {
1893                break;
1894            }
1895            names.push(v);
1896            self.lexer.zshlex();
1897        }
1898        if names.is_empty() {
1899            self.error("expected variable name in for");
1900            return None;
1901        }
1902        let var = names.join(" ");
1903
1904        // Skip newlines
1905        self.skip_separators();
1906
1907        // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1908        // single String token with the parens lexed-as-content
1909        // (`<INPAR>a b c<OUTPAR>`) instead of as separate Inpar/String/
1910        // Outpar tokens. Detect that shape and split it manually.
1911        let list = if self.lexer.tok == LexTok::String
1912            && self
1913                .lexer
1914                .tokstr
1915                .as_ref()
1916                .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1917                .unwrap_or(false)
1918        {
1919            let raw = self.lexer.tokstr.clone().unwrap_or_default();
1920            // Strip leading INPAR + trailing OUTPAR, then untokenize the
1921            // inner content and split on whitespace for the word list.
1922            let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1923                ..raw
1924                    .char_indices()
1925                    .last()
1926                    .map(|(i, _)| i)
1927                    .unwrap_or(raw.len())];
1928            let cleaned = crate::lexer::untokenize(inner);
1929            let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1930            self.lexer.zshlex();
1931            ForList::Words(words)
1932        } else if self.lexer.tok == LexTok::String {
1933            let s = self.lexer.tokstr.as_ref();
1934            if s.map(|s| s == "in").unwrap_or(false) {
1935                self.lexer.zshlex();
1936                let mut words = Vec::new();
1937                let mut word_count = 0;
1938                while self.lexer.tok == LexTok::String {
1939                    word_count += 1;
1940                    if word_count > 500 || self.check_limit() {
1941                        self.error("for: too many words");
1942                        return None;
1943                    }
1944                    if let Some(ref s) = self.lexer.tokstr {
1945                        words.push(s.clone());
1946                    }
1947                    self.lexer.zshlex();
1948                }
1949                ForList::Words(words)
1950            } else {
1951                ForList::Positional
1952            }
1953        } else if self.lexer.tok == LexTok::Inpar {
1954            // for var (...)
1955            self.lexer.zshlex();
1956            let mut words = Vec::new();
1957            let mut word_count = 0;
1958            while self.lexer.tok == LexTok::String || self.lexer.tok == LexTok::Seper {
1959                word_count += 1;
1960                if word_count > 500 || self.check_limit() {
1961                    self.error("for: too many words in parens");
1962                    return None;
1963                }
1964                if self.lexer.tok == LexTok::String {
1965                    if let Some(ref s) = self.lexer.tokstr {
1966                        words.push(s.clone());
1967                    }
1968                }
1969                self.lexer.zshlex();
1970            }
1971            if self.lexer.tok == LexTok::Outpar {
1972                // After the `)` of a for-list, the next token is the
1973                // body opener — `do`/`{`. zsh's lexer needs incmdpos
1974                // set so `{` lexes as Inbrace (not as a literal). C
1975                // analogue: parse.c::par_for sets `incmdpos = 1`
1976                // after consuming the OUTPAR before the body parse.
1977                self.lexer.incmdpos = true;
1978                self.lexer.zshlex();
1979            }
1980            ForList::Words(words)
1981        } else {
1982            ForList::Positional
1983        };
1984
1985        // Skip to body
1986        self.skip_separators();
1987
1988        // Parse body
1989        let body = self.parse_loop_body(is_foreach)?;
1990
1991        Some(ZshCommand::For(ZshFor {
1992            var,
1993            list,
1994            body: Box::new(body),
1995            is_select: false,
1996        }))
1997    }
1998
1999    /// Parse C-style for loop: for (( init; cond; step ))
2000    /// Parse the c-style `for ((init; cond; incr)) do BODY done`.
2001    /// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
2002    /// Recognized when the token after FOR is DINPAR (the `((`
2003    /// detected by gettok via dbparens setup).
2004    fn parse_for_cstyle(&mut self) -> Option<ZshCommand> {
2005        // We're at (( (Dinpar None) - the opening ((
2006        // Lexer returns:
2007        //   Dinpar None     - opening ((
2008        //   Dinpar "init"   - init expression, semicolon consumed
2009        //   Dinpar "cond"   - cond expression, semicolon consumed
2010        //   Doutpar "step"  - step expression, closing )) consumed
2011
2012        self.lexer.zshlex(); // Get init: Dinpar "i=0"
2013
2014        if self.lexer.tok != LexTok::Dinpar {
2015            self.error("expected init expression in for ((");
2016            return None;
2017        }
2018        let init = self.lexer.tokstr.clone().unwrap_or_default();
2019
2020        self.lexer.zshlex(); // Get cond: Dinpar "i<10"
2021
2022        if self.lexer.tok != LexTok::Dinpar {
2023            self.error("expected condition in for ((");
2024            return None;
2025        }
2026        let cond = self.lexer.tokstr.clone().unwrap_or_default();
2027
2028        self.lexer.zshlex(); // Get step: Doutpar "i++"
2029
2030        if self.lexer.tok != LexTok::Doutpar {
2031            self.error("expected )) in for");
2032            return None;
2033        }
2034        let step = self.lexer.tokstr.clone().unwrap_or_default();
2035
2036        self.lexer.zshlex(); // Move past ))
2037
2038        self.skip_separators();
2039        let body = self.parse_loop_body(false)?;
2040
2041        Some(ZshCommand::For(ZshFor {
2042            var: String::new(),
2043            list: ForList::CStyle { init, cond, step },
2044            body: Box::new(body),
2045            is_select: false,
2046        }))
2047    }
2048
2049    /// Parse select loop (same syntax as for)
2050    /// Parse `select NAME in WORDS; do BODY; done`. Same shape as
2051    /// `for NAME in WORDS; do ...` but with menu-prompt semantics in
2052    /// the executor. C equivalent: the SELECT case in par_for at
2053    /// parse.c:1087-1207 (selects share parser flow with foreach).
2054    fn parse_select(&mut self) -> Option<ZshCommand> {
2055        // `select` shares parse_for's grammar (var, words, body) but the
2056        // compile path is different (interactive prompt loop).
2057        match self.parse_for()? {
2058            ZshCommand::For(mut f) => {
2059                f.is_select = true;
2060                Some(ZshCommand::For(f))
2061            }
2062            other => Some(other),
2063        }
2064    }
2065
2066    /// Parse case statement
2067    /// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
2068    /// of zsh/Src/parse.c:1209-1409 `par_case`. Each case arm is a
2069    /// (pattern_list, body, terminator) tuple where terminator is
2070    /// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
2071    fn parse_case(&mut self) -> Option<ZshCommand> {
2072        self.lexer.zshlex(); // skip 'case'
2073
2074        let word = match self.lexer.tok {
2075            LexTok::String => {
2076                let w = self.lexer.tokstr.clone().unwrap_or_default();
2077                self.lexer.zshlex();
2078                w
2079            }
2080            _ => {
2081                self.error("expected word after case");
2082                return None;
2083            }
2084        };
2085
2086        self.skip_separators();
2087
2088        // Expect 'in' or {
2089        let use_brace = self.lexer.tok == LexTok::Inbrace;
2090        if self.lexer.tok == LexTok::String {
2091            let s = self.lexer.tokstr.as_ref();
2092            if s.map(|s| s != "in").unwrap_or(true) {
2093                self.error("expected 'in' in case");
2094                return None;
2095            }
2096        } else if !use_brace {
2097            self.error("expected 'in' or '{' in case");
2098            return None;
2099        }
2100        // Set incasepat=1 BEFORE consuming "in" so the next token (which
2101        // could be a leading `(` of a paren-prefixed pattern like
2102        // `case foo in (a|b) …`) is lexed as Inpar, not as a glob-token.
2103        // Without this the `(` got swallowed into a gettokstr('(', false)
2104        // call and produced a String like "(foo)" — the parser then saw
2105        // the `)` inside a string instead of as a separate Outpar.
2106        self.lexer.incasepat = 1;
2107        self.lexer.zshlex();
2108
2109        let mut arms = Vec::new();
2110        const MAX_ARMS: usize = 10_000;
2111
2112        loop {
2113            if arms.len() > MAX_ARMS {
2114                self.error("parse_case: too many arms");
2115                break;
2116            }
2117
2118            // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
2119            // This affects how [ and | are lexed
2120            self.lexer.incasepat = 1;
2121
2122            self.skip_separators();
2123
2124            // Check for end
2125            // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
2126            let is_esac = self.lexer.tok == LexTok::Esac
2127                || (self.lexer.tok == LexTok::String
2128                    && self
2129                        .lexer
2130                        .tokstr
2131                        .as_ref()
2132                        .map(|s| s == "esac")
2133                        .unwrap_or(false));
2134            if (use_brace && self.lexer.tok == LexTok::Outbrace) || (!use_brace && is_esac) {
2135                self.lexer.incasepat = 0;
2136                self.lexer.zshlex();
2137                break;
2138            }
2139
2140            // Also break on EOF
2141            if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
2142                self.lexer.incasepat = 0;
2143                break;
2144            }
2145
2146            // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
2147            // The leading `(` is paired with a matching `)` that closes
2148            // the pattern itself; the arm-close `)` follows separately.
2149            // Track whether we consumed it so we can skip the matching
2150            // `)` after pattern parsing — otherwise the arm-close would
2151            // be interpreted as the pattern-close and the actual body
2152            // would get the leftover `)`.
2153            let had_leading_paren = self.lexer.tok == LexTok::Inpar;
2154            if had_leading_paren {
2155                self.lexer.zshlex();
2156            }
2157
2158            // incasepat is already set above
2159            let mut patterns = Vec::new();
2160            let mut pattern_iterations = 0;
2161            loop {
2162                pattern_iterations += 1;
2163                if pattern_iterations > 1000 {
2164                    self.error("parse_case: too many pattern iterations");
2165                    self.lexer.incasepat = 0;
2166                    return None;
2167                }
2168
2169                if self.lexer.tok == LexTok::String {
2170                    let s = self.lexer.tokstr.as_ref();
2171                    if s.map(|s| s == "esac").unwrap_or(false) {
2172                        break;
2173                    }
2174                    patterns.push(self.lexer.tokstr.clone().unwrap_or_default());
2175                    // After first pattern token, set incasepat=2 so ( is treated as part of pattern
2176                    self.lexer.incasepat = 2;
2177                    self.lexer.zshlex();
2178                } else if self.lexer.tok != LexTok::Bar {
2179                    break;
2180                }
2181
2182                if self.lexer.tok == LexTok::Bar {
2183                    // Reset to 1 (start of next alternative pattern)
2184                    self.lexer.incasepat = 1;
2185                    self.lexer.zshlex();
2186                } else {
2187                    break;
2188                }
2189            }
2190            self.lexer.incasepat = 0;
2191
2192            // zsh's `(P)` form (parse.c:1320-1360 hack) treats the entire
2193            // parenthesized contents as ONE zsh pattern with internal `|`
2194            // as the literal alternation operator — NOT as multiple
2195            // case-arm alternatives. Without a leading `(`, the bare
2196            // `P1|P2)` form splits into multiple alts. Mirror that here:
2197            // when a leading `(` was consumed, fold the |-separated
2198            // pieces back into a single pattern string.
2199            if had_leading_paren && patterns.len() > 1 {
2200                let joined = patterns.join("|");
2201                patterns = vec![joined];
2202            }
2203
2204            // Expect ).  Also handle the `(P))` wrapped-pattern form:
2205            // when a leading `(` was consumed, accept an extra `)` —
2206            // the inner `)` closes the optional-paren wrapper, the
2207            // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
2208            // (bare pattern, leading-paren is just the opt-marker, the
2209            // close is arm-close) and `(P)) BODY` (paren-wrapped
2210            // pattern, then arm-close). The first form is unambiguous
2211            // when the bare pattern was simple; the second is needed
2212            // when the body starts with `(`.
2213            if self.lexer.tok != LexTok::Outpar {
2214                self.error("expected ')' in case pattern");
2215                return None;
2216            }
2217            // Port of Src/parse.c:1310-1313 — when the case pattern
2218            // closes with `)`, set `incmdpos = 1` BEFORE consuming
2219            // the token so the first word of the arm body is lexed
2220            // in command position. Without this, `case X in X) c1=v ;;`
2221            // lexes `c1=v` as a plain STRING rather than an assignment
2222            // word, and exec treats it as a command name (yielding
2223            // "command not found: c1=v"). Subsequent statements after
2224            // `;` parse correctly because the `;` separator restores
2225            // command position; only the FIRST body word was broken.
2226            self.lexer.incmdpos = true;
2227            self.lexer.zshlex();
2228            if had_leading_paren && self.lexer.tok == LexTok::Outpar {
2229                self.lexer.incmdpos = true;
2230                self.lexer.zshlex();
2231            }
2232
2233            // Parse body
2234            let body = self.parse_program();
2235
2236            // Get terminator. Set incasepat=1 BEFORE the zshlex
2237            // advance so the next token (the next arm's pattern, like
2238            // `[a-z]`) gets tokenized in pattern context. Without
2239            // this, a `[`-prefixed pattern after the FIRST arm became
2240            // Inbrack instead of String and the pattern-loop bailed
2241            // out with "expected ')' in case pattern".
2242            let terminator = match self.lexer.tok {
2243                LexTok::Dsemi => {
2244                    self.lexer.incasepat = 1;
2245                    self.lexer.zshlex();
2246                    CaseTerm::Break
2247                }
2248                LexTok::Semiamp => {
2249                    self.lexer.incasepat = 1;
2250                    self.lexer.zshlex();
2251                    CaseTerm::Continue
2252                }
2253                LexTok::Semibar => {
2254                    self.lexer.incasepat = 1;
2255                    self.lexer.zshlex();
2256                    CaseTerm::TestNext
2257                }
2258                _ => CaseTerm::Break,
2259            };
2260
2261            if !patterns.is_empty() {
2262                arms.push(CaseArm {
2263                    patterns,
2264                    body,
2265                    terminator,
2266                });
2267            }
2268        }
2269
2270        Some(ZshCommand::Case(ZshCase { word, arms }))
2271    }
2272
2273    /// Parse if statement
2274    /// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
2275    /// Direct port of zsh/Src/parse.c:1411-1519 `par_if`. The C source
2276    /// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
2277    /// (cond, then_body) tuples plus an optional else_body.
2278    fn parse_if(&mut self) -> Option<ZshCommand> {
2279        self.lexer.zshlex(); // skip 'if'
2280
2281        // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
2282        let cond = Box::new(self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace])));
2283
2284        self.skip_separators();
2285
2286        // Expect 'then' or {
2287        let use_brace = self.lexer.tok == LexTok::Inbrace;
2288        if self.lexer.tok != LexTok::Then && !use_brace {
2289            self.error("expected 'then' or '{' after if condition");
2290            return None;
2291        }
2292        self.lexer.zshlex();
2293
2294        // Parse then-body - stops at else/elif/fi, or } if using brace syntax
2295        let then = if use_brace {
2296            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2297            if self.lexer.tok == LexTok::Outbrace {
2298                self.lexer.zshlex();
2299            }
2300            Box::new(body)
2301        } else {
2302            Box::new(self.parse_program_until(Some(&[LexTok::Else, LexTok::Elif, LexTok::Fi])))
2303        };
2304
2305        // Parse elif and else. zsh accepts the SAME elif/else
2306        // continuations for both classic `then/fi` AND the brace
2307        // form `{ ... } elif ... { ... } else { ... }`. Direct port
2308        // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
2309        // arms are checked AFTER the body close regardless of which
2310        // delimiter style opened the block. Without this, zinit's
2311        //   if [[ -z $sel ]] { ... } else { ... }
2312        // hung the parser — `else` was treated as an external
2313        // command following the if-statement, which the lexer state
2314        // mis-classified inside the still-open function body.
2315        //
2316        // For brace-form: skip the `fi` consumption at the end of
2317        // the loop (no `fi` after a brace block), and `else` may
2318        // arrive after a `}` close. Skip-separators between the
2319        // body close and the elif/else token.
2320        let mut elif = Vec::new();
2321        let mut else_ = None;
2322
2323        {
2324            loop {
2325                self.skip_separators();
2326
2327                match self.lexer.tok {
2328                    LexTok::Elif => {
2329                        self.lexer.zshlex();
2330                        // elif condition stops at 'then' or '{'
2331                        let econd =
2332                            self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace]));
2333                        self.skip_separators();
2334
2335                        let elif_use_brace = self.lexer.tok == LexTok::Inbrace;
2336                        if self.lexer.tok != LexTok::Then && !elif_use_brace {
2337                            self.error("expected 'then' after elif");
2338                            return None;
2339                        }
2340                        self.lexer.zshlex();
2341
2342                        // elif body stops at else/elif/fi or } if using braces
2343                        let ebody = if elif_use_brace {
2344                            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2345                            if self.lexer.tok == LexTok::Outbrace {
2346                                self.lexer.zshlex();
2347                            }
2348                            body
2349                        } else {
2350                            self.parse_program_until(Some(&[
2351                                LexTok::Else,
2352                                LexTok::Elif,
2353                                LexTok::Fi,
2354                            ]))
2355                        };
2356
2357                        elif.push((econd, ebody));
2358                    }
2359                    LexTok::Else => {
2360                        self.lexer.zshlex();
2361                        self.skip_separators();
2362
2363                        let else_use_brace = self.lexer.tok == LexTok::Inbrace;
2364                        if else_use_brace {
2365                            self.lexer.zshlex();
2366                        }
2367
2368                        // else body stops at 'fi' or '}'
2369                        else_ = Some(Box::new(if else_use_brace {
2370                            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2371                            if self.lexer.tok == LexTok::Outbrace {
2372                                self.lexer.zshlex();
2373                            }
2374                            body
2375                        } else {
2376                            self.parse_program_until(Some(&[LexTok::Fi]))
2377                        }));
2378
2379                        // Consume the 'fi' if present (not for brace syntax)
2380                        if !else_use_brace && self.lexer.tok == LexTok::Fi {
2381                            self.lexer.zshlex();
2382                        }
2383                        break;
2384                    }
2385                    LexTok::Fi => {
2386                        self.lexer.zshlex();
2387                        break;
2388                    }
2389                    _ => break,
2390                }
2391            }
2392        }
2393
2394        Some(ZshCommand::If(ZshIf {
2395            cond,
2396            then,
2397            elif,
2398            else_,
2399        }))
2400    }
2401
2402    /// Parse while/until loop
2403    /// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
2404    /// Direct port of zsh/Src/parse.c:1521-1563 `par_while`. The
2405    /// `until` variant is the same loop with the condition negated.
2406    fn parse_while(&mut self, until: bool) -> Option<ZshCommand> {
2407        self.lexer.zshlex(); // skip while/until
2408
2409        let cond = Box::new(self.parse_program());
2410
2411        self.skip_separators();
2412        let body = self.parse_loop_body(false)?;
2413
2414        Some(ZshCommand::While(ZshWhile {
2415            cond,
2416            body: Box::new(body),
2417            until,
2418        }))
2419    }
2420
2421    /// Parse repeat loop
2422    /// Parse `repeat N; do BODY; done`. Direct port of
2423    /// zsh/Src/parse.c:1565-1617 `par_repeat`. The C source supports
2424    /// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
2425    /// parser doesn't yet special-case that variant.
2426    fn parse_repeat(&mut self) -> Option<ZshCommand> {
2427        self.lexer.zshlex(); // skip 'repeat'
2428
2429        let count = match self.lexer.tok {
2430            LexTok::String => {
2431                let c = self.lexer.tokstr.clone().unwrap_or_default();
2432                self.lexer.zshlex();
2433                c
2434            }
2435            _ => {
2436                self.error("expected count after repeat");
2437                return None;
2438            }
2439        };
2440
2441        self.skip_separators();
2442        let body = self.parse_loop_body(false)?;
2443
2444        Some(ZshCommand::Repeat(ZshRepeat {
2445            count,
2446            body: Box::new(body),
2447        }))
2448    }
2449
2450    /// Parse loop body (do...done, {...}, or shortloop)
2451    /// Parse the `do BODY done` body of a for/while/until/select/
2452    /// repeat loop. Direct equivalent of zsh's parse.c handling
2453    /// inside the loop builders — they all consume DOLOOP, parse a
2454    /// list until DONE, and return the list. The `foreach_style`
2455    /// flag signals foreach (where short-form `for NAME in WORDS;
2456    /// CMD` may skip do/done) vs c-style (which always requires
2457    /// do/done).
2458    fn parse_loop_body(&mut self, foreach_style: bool) -> Option<ZshProgram> {
2459        if self.lexer.tok == LexTok::Doloop {
2460            self.lexer.zshlex();
2461            let body = self.parse_program();
2462            if self.lexer.tok == LexTok::Done {
2463                self.lexer.zshlex();
2464            }
2465            Some(body)
2466        } else if self.lexer.tok == LexTok::Inbrace {
2467            self.lexer.zshlex();
2468            let body = self.parse_program();
2469            if self.lexer.tok == LexTok::Outbrace {
2470                self.lexer.zshlex();
2471            }
2472            Some(body)
2473        } else if foreach_style {
2474            // foreach allows 'end' terminator
2475            let body = self.parse_program();
2476            if self.lexer.tok == LexTok::Zend {
2477                self.lexer.zshlex();
2478            }
2479            Some(body)
2480        } else {
2481            // Short loop - single command
2482            self.parse_list()
2483                .map(|list| ZshProgram { lists: vec![list] })
2484        }
2485    }
2486
2487    /// Parse (...) subshell
2488    /// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619-1670
2489    /// `par_subsh`. Body parses as a normal list; the subshell wrapper
2490    /// fork-isolates execution in the executor.
2491    fn parse_subsh(&mut self) -> Option<ZshCommand> {
2492        self.lexer.zshlex(); // skip (
2493        let prog = self.parse_program();
2494        if self.lexer.tok == LexTok::Outpar {
2495            self.lexer.zshlex();
2496        }
2497        Some(ZshCommand::Subsh(Box::new(prog)))
2498    }
2499
2500    /// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
2501    /// function named `_zshrs_anon_N`, invokes it with the args, and the
2502    /// body runs with positional params set. Implemented as the desugared
2503    /// pair (FuncDef + Simple call) so the compile path doesn't need new
2504    /// machinery.
2505    /// Parse an anonymous function definition `() { BODY }` followed
2506    /// by call args. zsh treats `() { echo hi; } a b c` as defining
2507    /// and immediately calling an anon fn with args a/b/c. C
2508    /// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
2509    /// triggers an anon-funcdef path.
2510    fn parse_anon_funcdef(&mut self) -> Option<ZshCommand> {
2511        self.lexer.zshlex(); // skip ()
2512        self.skip_separators();
2513        // No `{` after `()` → bare empty subshell shape `()`. Fall back
2514        // to a Subsh with an empty program so the status is 0 (matches
2515        // zsh's `()` no-op behavior).
2516        if self.lexer.tok != LexTok::Inbrace {
2517            return Some(ZshCommand::Subsh(Box::new(ZshProgram {
2518                lists: Vec::new(),
2519            })));
2520        }
2521        self.lexer.zshlex(); // skip {
2522        let body = self.parse_program();
2523        if self.lexer.tok == LexTok::Outbrace {
2524            self.lexer.zshlex();
2525        }
2526        // Collect any trailing args until a separator. zsh's anon-fn form
2527        // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
2528        let mut args = Vec::new();
2529        while self.lexer.tok == LexTok::String {
2530            if let Some(s) = self.lexer.tokstr.clone() {
2531                args.push(s);
2532            }
2533            self.lexer.zshlex();
2534        }
2535
2536        // Generate a unique name. Module-level static would be cleaner but
2537        // a thread-local atomic is enough — anonymous functions are
2538        // ephemeral and the name isn't user-visible.
2539        use std::sync::atomic::{AtomicUsize, Ordering};
2540        static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2541        let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2542        let name = format!("_zshrs_anon_{}", n);
2543        Some(ZshCommand::FuncDef(ZshFuncDef {
2544            names: vec![name],
2545            body: Box::new(body),
2546            tracing: false,
2547            auto_call_args: Some(args),
2548            body_source: None,
2549        }))
2550    }
2551
2552    /// Parse {...} cursh
2553    /// Parse a current-shell brace block `{ BODY }`. C source
2554    /// par_cmd at parse.c:958-1085 handles INBRACE → emit WC_CURSH
2555    /// and recurses into the list. zshrs's parse_cursh extracts that
2556    /// arm into a dedicated method.
2557    fn parse_cursh(&mut self) -> Option<ZshCommand> {
2558        self.lexer.zshlex(); // skip {
2559        let prog = self.parse_program();
2560
2561        // Check for { ... } always { ... }. Direct port of zsh's
2562        // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
2563        // forces (parse.c:1632, 1637): after consuming the closing
2564        // OUTBRACE AND after matching the `always` keyword, the parser
2565        // explicitly resets command position so the next `{` lexes as
2566        // INBRACE. Without these resets the lexer's String-clears-cmdpos
2567        // rule (lex.rs:976-983) leaves the second `{` in word position,
2568        // turning `always { ... }` into a Simple `{` `echo` … and the
2569        // try/always pairing is silently lost.
2570        if self.lexer.tok == LexTok::Outbrace {
2571            self.lexer.incmdpos = true; // parse.c:1632 incmdpos = !zsh_construct
2572            self.lexer.zshlex();
2573
2574            // Check for 'always'
2575            if self.lexer.tok == LexTok::String {
2576                let s = self.lexer.tokstr.as_ref();
2577                if s.map(|s| s == "always").unwrap_or(false) {
2578                    self.lexer.incmdpos = true; // parse.c:1637 incmdpos = 1
2579                    self.lexer.zshlex();
2580                    self.skip_separators();
2581
2582                    if self.lexer.tok == LexTok::Inbrace {
2583                        self.lexer.zshlex();
2584                        let always = self.parse_program();
2585                        if self.lexer.tok == LexTok::Outbrace {
2586                            self.lexer.zshlex();
2587                        }
2588                        return Some(ZshCommand::Try(ZshTry {
2589                            try_block: Box::new(prog),
2590                            always: Box::new(always),
2591                        }));
2592                    }
2593                }
2594            }
2595        }
2596
2597        Some(ZshCommand::Cursh(Box::new(prog)))
2598    }
2599
2600    /// Parse function definition
2601    /// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
2602    /// port of zsh/Src/parse.c:1672-1785 `par_funcdef`. zsh handles
2603    /// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
2604    /// the optional `[fname1 fname2 ...]` for multi-name function defs,
2605    /// and the `function FOO () { ... }` traditional/POSIX hybrid form.
2606    fn parse_funcdef(&mut self) -> Option<ZshCommand> {
2607        self.lexer.zshlex(); // skip 'function'
2608
2609        let mut names = Vec::new();
2610        let mut tracing = false;
2611
2612        // Handle options like -T and function names. Two subtleties:
2613        //
2614        //   1. Flags: zsh's lexer encodes a leading `-` as
2615        //      `char_tokens::DASH` (\u{9b}) inside the String tokstr.
2616        //      The previous `s.starts_with('-')` check failed for
2617        //      `\u{9b}T`, so `function -T NAME { body }` slipped the
2618        //      `-T` token into `names` and the function got registered
2619        //      as `T` plus the intended `NAME`.
2620        //
2621        //   2. Body opener: zsh's lexer emits the opening `{` as a
2622        //      String (not LexTok::Inbrace) when it follows the String
2623        //      NAME — the preceding name token resets incmdpos to
2624        //      false, and only `{` immediately followed by `}` (the
2625        //      empty-body case) gets promoted to Inbrace. The funcdef
2626        //      parser must recognise the bare-`{` String as the body
2627        //      opener; otherwise `function NAME { body }` falls through
2628        //      to `_ => break`, no body parses, and the FuncDef never
2629        //      lands in the AST. This is consistent with C zsh's
2630        //      par_funcdef which knows it's in funcdef-header context
2631        //      and accepts the brace either way.
2632        loop {
2633            match self.lexer.tok {
2634                LexTok::String => {
2635                    let s = self.lexer.tokstr.as_ref()?;
2636                    if s == "{" {
2637                        // Funcdef body opener — break, body-parser branch handles it.
2638                        break;
2639                    }
2640                    let first = s.chars().next();
2641                    if matches!(first, Some('-') | Some('+'))
2642                        || matches!(first, Some(c) if c == crate::tokens::char_tokens::DASH)
2643                    {
2644                        if s.contains('T') {
2645                            tracing = true;
2646                        }
2647                        self.lexer.zshlex();
2648                        continue;
2649                    }
2650                    names.push(s.clone());
2651                    self.lexer.zshlex();
2652                }
2653                LexTok::Inbrace | LexTok::Inoutpar | LexTok::Seper | LexTok::Newlin => break,
2654                _ => break,
2655            }
2656        }
2657
2658        // Optional ()
2659        let saw_paren = self.lexer.tok == LexTok::Inoutpar;
2660        if saw_paren {
2661            self.lexer.zshlex();
2662        }
2663
2664        self.skip_separators();
2665
2666        // Body opener: real Inbrace OR a String("{") (the lexer emits
2667        // the latter after a String NAME — see comment above).
2668        let body_opener_is_string_brace = self.lexer.tok == LexTok::String
2669            && self.lexer.tokstr.as_deref() == Some("{");
2670        if self.lexer.tok == LexTok::Inbrace || body_opener_is_string_brace {
2671            // Capture body_start BEFORE the lexer advances past the
2672            // first body token. After the previous zshlex consumed
2673            // `{`, lexer.pos points just past `{` (which is where the
2674            // body source starts). The next `zshlex()` would advance
2675            // past the first token (`echo`), making body_start land
2676            // mid-body and lose the first word — `typeset -f f` would
2677            // print `a; echo b` for `{ echo a; echo b }`.
2678            let body_start = self.lexer.pos;
2679            self.lexer.zshlex();
2680            let body = self.parse_program();
2681            let body_end = if self.lexer.tok == LexTok::Outbrace {
2682                // Lexer has just consumed `}`; pos is past it. Body content
2683                // ends one byte before pos.
2684                self.lexer.pos.saturating_sub(1)
2685            } else {
2686                self.lexer.pos
2687            };
2688            let body_source = self
2689                .lexer
2690                .input
2691                .get(body_start..body_end)
2692                .map(|s| s.trim().to_string())
2693                .filter(|s| !s.is_empty());
2694            if self.lexer.tok == LexTok::Outbrace {
2695                self.lexer.zshlex();
2696            }
2697
2698            // Anonymous form `function () { body } a b c` (with `()`) or
2699            // `function { body } a b c` (zsh-only shorthand, no `()`). No
2700            // name was collected. Mirror parse_anon_funcdef: synthesize
2701            // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2702            // so compile_funcdef registers + immediately calls the
2703            // function with the args as positional params.
2704            if names.is_empty() {
2705                let mut args = Vec::new();
2706                while self.lexer.tok == LexTok::String {
2707                    if let Some(s) = self.lexer.tokstr.clone() {
2708                        args.push(s);
2709                    }
2710                    self.lexer.zshlex();
2711                }
2712                use std::sync::atomic::{AtomicUsize, Ordering};
2713                static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2714                let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2715                let name = format!("_zshrs_anon_kw_{}", n);
2716                return Some(ZshCommand::FuncDef(ZshFuncDef {
2717                    names: vec![name],
2718                    body: Box::new(body),
2719                    tracing,
2720                    auto_call_args: Some(args),
2721                    body_source,
2722                }));
2723            }
2724
2725            Some(ZshCommand::FuncDef(ZshFuncDef {
2726                names,
2727                body: Box::new(body),
2728                tracing,
2729                auto_call_args: None,
2730                body_source,
2731            }))
2732        } else {
2733            // Short form
2734            self.parse_list().map(|list| {
2735                ZshCommand::FuncDef(ZshFuncDef {
2736                    names,
2737                    body: Box::new(ZshProgram { lists: vec![list] }),
2738                    tracing,
2739                    auto_call_args: None,
2740                    body_source: None,
2741                })
2742            })
2743        }
2744    }
2745
2746    /// Parse inline function definition: name() { ... }
2747    /// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
2748    /// without the `function` keyword). The name has already been
2749    /// consumed and pushed by parse_simple before this method fires.
2750    /// C source: handled inline in par_simple's INOUTPAR-after-name
2751    /// arm (parse.c:1836-2228).
2752    fn parse_inline_funcdef(&mut self, name: String) -> Option<ZshCommand> {
2753        // Skip ()
2754        if self.lexer.tok == LexTok::Inoutpar {
2755            self.lexer.zshlex();
2756        }
2757
2758        self.skip_separators();
2759
2760        // Parse body
2761        if self.lexer.tok == LexTok::Inbrace {
2762            // Same body_start-before-zshlex fix as parse_funcdef.
2763            let body_start = self.lexer.pos;
2764            self.lexer.zshlex();
2765            let body = self.parse_program();
2766            let body_end = if self.lexer.tok == LexTok::Outbrace {
2767                self.lexer.pos.saturating_sub(1)
2768            } else {
2769                self.lexer.pos
2770            };
2771            let body_source = self
2772                .lexer
2773                .input
2774                .get(body_start..body_end)
2775                .map(|s| s.trim().to_string())
2776                .filter(|s| !s.is_empty());
2777            if self.lexer.tok == LexTok::Outbrace {
2778                self.lexer.zshlex();
2779            }
2780            Some(ZshCommand::FuncDef(ZshFuncDef {
2781                names: vec![name],
2782                body: Box::new(body),
2783                tracing: false,
2784                auto_call_args: None,
2785                body_source,
2786            }))
2787        } else {
2788            match self.parse_cmd() {
2789                Some(cmd) => {
2790                    let list = ZshList {
2791                        sublist: ZshSublist {
2792                            pipe: ZshPipe {
2793                                cmd,
2794                                next: None,
2795                                lineno: self.lexer.lineno,
2796                                merge_stderr: false,
2797                            },
2798                            next: None,
2799                            flags: SublistFlags::default(),
2800                        },
2801                        flags: ListFlags::default(),
2802                    };
2803                    Some(ZshCommand::FuncDef(ZshFuncDef {
2804                        names: vec![name],
2805                        body: Box::new(ZshProgram { lists: vec![list] }),
2806                        tracing: false,
2807                        auto_call_args: None,
2808                        body_source: None,
2809                    }))
2810                }
2811                None => None,
2812            }
2813        }
2814    }
2815
2816    /// Parse [[ ... ]] conditional
2817    /// Parse `[[ EXPR ]]` conditional expression. Direct port of
2818    /// zsh/Src/parse.c:2409-2731 `par_cond` (and helpers par_cond_1,
2819    /// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2820    /// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2821    /// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2822    ///   <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2823    fn parse_cond(&mut self) -> Option<ZshCommand> {
2824        self.lexer.zshlex(); // skip [[
2825                             // Empty cond `[[ ]]` is a parse error in zsh — emit the
2826                             // diagnostic and return None so the caller produces a
2827                             // non-zero exit. Without this, `[[ ]]` silently passed and
2828                             // returned exit 0.
2829        if self.lexer.tok == LexTok::Doutbrack {
2830            self.error("parse error near `]]'");
2831            self.lexer.zshlex();
2832            return None;
2833        }
2834        let cond = self.parse_cond_expr();
2835
2836        if self.lexer.tok == LexTok::Doutbrack {
2837            self.lexer.zshlex();
2838        }
2839
2840        cond.map(ZshCommand::Cond)
2841    }
2842
2843    /// Parse conditional expression
2844    /// Top of `[[ ]]` cond-expression parsing — entry to recursive
2845    /// descent (or → and → not → primary). Direct port of zsh's
2846    /// par_cond_1 at parse.c:2434-2475.
2847    fn parse_cond_expr(&mut self) -> Option<ZshCond> {
2848        self.parse_cond_or()
2849    }
2850
2851    /// Cond-expression `||` level. C: inside par_cond_1 at
2852    /// parse.c:2434-2475 (the `cond_or` ladder).
2853    fn parse_cond_or(&mut self) -> Option<ZshCond> {
2854        self.recursion_depth += 1;
2855        if self.check_recursion() {
2856            self.error("parse_cond_or: max recursion depth exceeded");
2857            self.recursion_depth -= 1;
2858            return None;
2859        }
2860
2861        let left = match self.parse_cond_and() {
2862            Some(l) => l,
2863            None => {
2864                self.recursion_depth -= 1;
2865                return None;
2866            }
2867        };
2868
2869        self.skip_cond_separators();
2870
2871        let result = if self.lexer.tok == LexTok::Dbar {
2872            self.lexer.zshlex();
2873            self.skip_cond_separators();
2874            self.parse_cond_or()
2875                .map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
2876        } else {
2877            Some(left)
2878        };
2879
2880        self.recursion_depth -= 1;
2881        result
2882    }
2883
2884    /// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
2885    fn parse_cond_and(&mut self) -> Option<ZshCond> {
2886        self.recursion_depth += 1;
2887        if self.check_recursion() {
2888            self.error("parse_cond_and: max recursion depth exceeded");
2889            self.recursion_depth -= 1;
2890            return None;
2891        }
2892
2893        let left = match self.parse_cond_not() {
2894            Some(l) => l,
2895            None => {
2896                self.recursion_depth -= 1;
2897                return None;
2898            }
2899        };
2900
2901        self.skip_cond_separators();
2902
2903        let result = if self.lexer.tok == LexTok::Damper {
2904            self.lexer.zshlex();
2905            self.skip_cond_separators();
2906            self.parse_cond_and()
2907                .map(|right| ZshCond::And(Box::new(left), Box::new(right)))
2908        } else {
2909            Some(left)
2910        };
2911
2912        self.recursion_depth -= 1;
2913        result
2914    }
2915
2916    /// Cond-expression `!` negation level. C: handled inside
2917    /// par_cond_2 at parse.c:2476-2625 via the BANG token check.
2918    fn parse_cond_not(&mut self) -> Option<ZshCond> {
2919        self.recursion_depth += 1;
2920        if self.check_recursion() {
2921            self.error("parse_cond_not: max recursion depth exceeded");
2922            self.recursion_depth -= 1;
2923            return None;
2924        }
2925
2926        self.skip_cond_separators();
2927
2928        // ! can be either LexTok::Bang or String "!"
2929        let is_not = self.lexer.tok == LexTok::Bang
2930            || (self.lexer.tok == LexTok::String
2931                && self
2932                    .lexer
2933                    .tokstr
2934                    .as_ref()
2935                    .map(|s| s == "!")
2936                    .unwrap_or(false));
2937        if is_not {
2938            self.lexer.zshlex();
2939            let inner = match self.parse_cond_not() {
2940                Some(i) => i,
2941                None => {
2942                    self.recursion_depth -= 1;
2943                    return None;
2944                }
2945            };
2946            self.recursion_depth -= 1;
2947            return Some(ZshCond::Not(Box::new(inner)));
2948        }
2949
2950        if self.lexer.tok == LexTok::Inpar {
2951            self.lexer.zshlex();
2952            self.skip_cond_separators();
2953            let inner = match self.parse_cond_expr() {
2954                Some(i) => i,
2955                None => {
2956                    self.recursion_depth -= 1;
2957                    return None;
2958                }
2959            };
2960            self.skip_cond_separators();
2961            if self.lexer.tok == LexTok::Outpar {
2962                self.lexer.zshlex();
2963            }
2964            self.recursion_depth -= 1;
2965            return Some(inner);
2966        }
2967
2968        let result = self.parse_cond_primary();
2969        self.recursion_depth -= 1;
2970        result
2971    }
2972
2973    /// Cond-expression primary: unary tests (-f, -d, ...), binary
2974    /// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
2975    /// sub-expressions. Direct port of par_cond_double / par_cond_triple
2976    /// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
2977    fn parse_cond_primary(&mut self) -> Option<ZshCond> {
2978        let s1 = match self.lexer.tok {
2979            LexTok::String => {
2980                let s = self.lexer.tokstr.clone().unwrap_or_default();
2981                self.lexer.zshlex();
2982                s
2983            }
2984            _ => return None,
2985        };
2986
2987        self.skip_cond_separators();
2988
2989        // Check for unary operator. zsh's lexer tokenizes leading `-` as
2990        // `char_tokens::DASH` (\u{9b}) inside gettokstr (lex.c:1390-1400
2991        // LX2_DASH — `-` always becomes Dash, untokenized later). Match
2992        // either form here, and use char-count not byte-count since DASH
2993        // is 2 UTF-8 bytes (`\xc2\x9b`).
2994        let s1_chars: Vec<char> = s1.chars().collect();
2995        if s1_chars.len() == 2 && crate::tokens::is_dash(s1_chars[0]) {
2996            let s2 = match self.lexer.tok {
2997                LexTok::String => {
2998                    let s = self.lexer.tokstr.clone().unwrap_or_default();
2999                    self.lexer.zshlex();
3000                    s
3001                }
3002                _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
3003            };
3004            return Some(ZshCond::Unary(s1, s2));
3005        }
3006
3007        // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
3008        //   incond++;  /* parentheses do globbing */
3009        //   do condlex(); while (COND_SEP());
3010        //   incond--;  /* parentheses do grouping */
3011        // The bump makes the lexer treat `(` as a literal character inside
3012        // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning INPAR
3013        // and splitting the regex into multiple tokens.
3014        let op = match self.lexer.tok {
3015            LexTok::String => {
3016                let s = self.lexer.tokstr.clone().unwrap_or_default();
3017                self.lexer.incond += 1;
3018                self.lexer.zshlex();
3019                self.lexer.incond -= 1;
3020                s
3021            }
3022            LexTok::Inang => {
3023                self.lexer.incond += 1;
3024                self.lexer.zshlex();
3025                self.lexer.incond -= 1;
3026                "<".to_string()
3027            }
3028            LexTok::Outang => {
3029                self.lexer.incond += 1;
3030                self.lexer.zshlex();
3031                self.lexer.incond -= 1;
3032                ">".to_string()
3033            }
3034            _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
3035        };
3036
3037        self.skip_cond_separators();
3038
3039        let s2 = match self.lexer.tok {
3040            LexTok::String => {
3041                let s = self.lexer.tokstr.clone().unwrap_or_default();
3042                self.lexer.zshlex();
3043                s
3044            }
3045            _ => return Some(ZshCond::Binary(s1, op, String::new())),
3046        };
3047
3048        if op == "=~" {
3049            Some(ZshCond::Regex(s1, s2))
3050        } else {
3051            Some(ZshCond::Binary(s1, op, s2))
3052        }
3053    }
3054
3055    fn skip_cond_separators(&mut self) {
3056        while self.lexer.tok == LexTok::Seper && {
3057            let s = self.lexer.tokstr.as_ref();
3058            s.map(|s| !s.contains(';')).unwrap_or(true)
3059        } {
3060            self.lexer.zshlex();
3061        }
3062    }
3063
3064    /// Parse (( ... )) arithmetic command
3065    /// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
3066    /// `par_dinbrack` (despite the name; the function actually handles
3067    /// DINPAR `(( ))` blocks too).
3068    fn parse_arith(&mut self) -> Option<ZshCommand> {
3069        let expr = self.lexer.tokstr.clone().unwrap_or_default();
3070        self.lexer.zshlex();
3071        Some(ZshCommand::Arith(expr))
3072    }
3073
3074    /// Parse time command
3075    /// Parse `time CMD` (POSIX time keyword). Direct port of
3076    /// zsh/Src/parse.c:1787-1808 `par_time`. The `time` keyword
3077    /// times the execution of the following pipeline / cmd.
3078    fn parse_time(&mut self) -> Option<ZshCommand> {
3079        self.lexer.zshlex(); // skip 'time'
3080
3081        // Check if there's a pipeline to time
3082        if self.lexer.tok == LexTok::Seper
3083            || self.lexer.tok == LexTok::Newlin
3084            || self.lexer.tok == LexTok::Endinput
3085        {
3086            Some(ZshCommand::Time(None))
3087        } else {
3088            let sublist = self.parse_sublist();
3089            Some(ZshCommand::Time(sublist.map(Box::new)))
3090        }
3091    }
3092
3093    /// Check if next token is ()
3094    fn peek_inoutpar(&mut self) -> bool {
3095        self.lexer.tok == LexTok::Inoutpar
3096    }
3097
3098    /// Skip separator tokens
3099    fn skip_separators(&mut self) {
3100        let mut iterations = 0;
3101        while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
3102            iterations += 1;
3103            if iterations > 100_000 {
3104                self.error("skip_separators: too many iterations");
3105                return;
3106            }
3107            self.lexer.zshlex();
3108        }
3109    }
3110
3111    /// Record an error
3112    fn error(&mut self, msg: &str) {
3113        self.errors.push(ParseError {
3114            message: msg.to_string(),
3115            line: self.lexer.lineno,
3116        });
3117    }
3118}
3119
3120#[cfg(test)]
3121mod tests {
3122    use super::*;
3123
3124    fn parse(input: &str) -> Result<ZshProgram, Vec<ParseError>> {
3125        let mut parser = ZshParser::new(input);
3126        parser.parse()
3127    }
3128
3129    #[test]
3130    fn test_simple_command() {
3131        let prog = parse("echo hello world").unwrap();
3132        assert_eq!(prog.lists.len(), 1);
3133        match &prog.lists[0].sublist.pipe.cmd {
3134            ZshCommand::Simple(s) => {
3135                assert_eq!(s.words, vec!["echo", "hello", "world"]);
3136            }
3137            _ => panic!("expected simple command"),
3138        }
3139    }
3140
3141    #[test]
3142    fn test_pipeline() {
3143        let prog = parse("ls | grep foo | wc -l").unwrap();
3144        assert_eq!(prog.lists.len(), 1);
3145
3146        let pipe = &prog.lists[0].sublist.pipe;
3147        assert!(pipe.next.is_some());
3148
3149        let pipe2 = pipe.next.as_ref().unwrap();
3150        assert!(pipe2.next.is_some());
3151    }
3152
3153    #[test]
3154    fn test_and_or() {
3155        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
3156        let sublist = &prog.lists[0].sublist;
3157
3158        assert!(sublist.next.is_some());
3159        let (op, _) = sublist.next.as_ref().unwrap();
3160        assert_eq!(*op, SublistOp::And);
3161    }
3162
3163    #[test]
3164    fn test_if_then() {
3165        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
3166        match &prog.lists[0].sublist.pipe.cmd {
3167            ZshCommand::If(_) => {}
3168            _ => panic!("expected if command"),
3169        }
3170    }
3171
3172    #[test]
3173    fn test_for_loop() {
3174        let prog = parse("for i in a b c; do echo $i; done").unwrap();
3175        match &prog.lists[0].sublist.pipe.cmd {
3176            ZshCommand::For(f) => {
3177                assert_eq!(f.var, "i");
3178                match &f.list {
3179                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
3180                    _ => panic!("expected word list"),
3181                }
3182            }
3183            _ => panic!("expected for command"),
3184        }
3185    }
3186
3187    #[test]
3188    fn test_case() {
3189        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
3190        match &prog.lists[0].sublist.pipe.cmd {
3191            ZshCommand::Case(c) => {
3192                assert_eq!(c.arms.len(), 2);
3193            }
3194            _ => panic!("expected case command"),
3195        }
3196    }
3197
3198    #[test]
3199    fn test_function() {
3200        // First test just parsing "function foo" to see what happens
3201        let prog = parse("function foo { }").unwrap();
3202        match &prog.lists[0].sublist.pipe.cmd {
3203            ZshCommand::FuncDef(f) => {
3204                assert_eq!(f.names, vec!["foo"]);
3205            }
3206            _ => panic!(
3207                "expected function, got {:?}",
3208                prog.lists[0].sublist.pipe.cmd
3209            ),
3210        }
3211    }
3212
3213    #[test]
3214    fn test_redirection() {
3215        let prog = parse("echo hello > file.txt").unwrap();
3216        match &prog.lists[0].sublist.pipe.cmd {
3217            ZshCommand::Simple(s) => {
3218                assert_eq!(s.redirs.len(), 1);
3219                assert_eq!(s.redirs[0].rtype, RedirType::Write);
3220            }
3221            _ => panic!("expected simple command"),
3222        }
3223    }
3224
3225    #[test]
3226    fn test_assignment() {
3227        let prog = parse("FOO=bar echo $FOO").unwrap();
3228        match &prog.lists[0].sublist.pipe.cmd {
3229            ZshCommand::Simple(s) => {
3230                assert_eq!(s.assigns.len(), 1);
3231                assert_eq!(s.assigns[0].name, "FOO");
3232            }
3233            _ => panic!("expected simple command"),
3234        }
3235    }
3236
3237    #[test]
3238    fn test_parse_completion_function() {
3239        let input = r#"_2to3_fixes() {
3240  local -a fixes
3241  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3242  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3243}"#;
3244        let result = parse(input);
3245        assert!(
3246            result.is_ok(),
3247            "Failed to parse completion function: {:?}",
3248            result.err()
3249        );
3250        let prog = result.unwrap();
3251        assert!(
3252            !prog.lists.is_empty(),
3253            "Expected at least one list in program"
3254        );
3255    }
3256
3257    #[test]
3258    fn test_parse_array_with_complex_elements() {
3259        let input = r#"arguments=(
3260  '(- * :)'{-h,--help}'[show this help message and exit]'
3261  {-d,--doctests_only}'[fix up doctests only]'
3262  '*:filename:_files'
3263)"#;
3264        let result = parse(input);
3265        assert!(
3266            result.is_ok(),
3267            "Failed to parse array assignment: {:?}",
3268            result.err()
3269        );
3270    }
3271
3272    #[test]
3273    fn test_parse_full_completion_file() {
3274        let input = r##"#compdef 2to3
3275
3276# zsh completions for '2to3'
3277
3278_2to3_fixes() {
3279  local -a fixes
3280  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3281  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3282}
3283
3284local -a arguments
3285
3286arguments=(
3287  '(- * :)'{-h,--help}'[show this help message and exit]'
3288  {-d,--doctests_only}'[fix up doctests only]'
3289  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
3290  {-j,--processes}'[run 2to3 concurrently]:number: '
3291  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
3292  {-l,--list-fixes}'[list available transformations]'
3293  {-p,--print-function}'[modify the grammar so that print() is a function]'
3294  {-v,--verbose}'[more verbose logging]'
3295  '--no-diffs[do not show diffs of the refactoring]'
3296  {-w,--write}'[write back modified files]'
3297  {-n,--nobackups}'[do not write backups for modified files]'
3298  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
3299  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
3300  '--add-suffix[append this string to all output filenames]:suffix: '
3301  '*:filename:_files'
3302)
3303
3304_arguments -s -S $arguments
3305"##;
3306        let result = parse(input);
3307        assert!(
3308            result.is_ok(),
3309            "Failed to parse full completion file: {:?}",
3310            result.err()
3311        );
3312        let prog = result.unwrap();
3313        // Should have parsed successfully with at least one statement
3314        assert!(!prog.lists.is_empty(), "Expected at least one list");
3315    }
3316
3317    #[test]
3318    fn test_parse_logs_sh() {
3319        let input = r#"#!/usr/bin/env bash
3320shopt -s globstar
3321
3322if [[ $(uname) == Darwin ]]; then
3323    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
3324else
3325    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
3326        tail -f /var/log/**/*.log | lolcat
3327    else
3328        printf "Unsupported...\n" >&2
3329    fi
3330fi
3331"#;
3332        let result = parse(input);
3333        assert!(
3334            result.is_ok(),
3335            "Failed to parse logs.sh: {:?}",
3336            result.err()
3337        );
3338    }
3339
3340    #[test]
3341    fn test_parse_case_with_glob() {
3342        let input = r#"case "$ZPWR_OS_TYPE" in
3343    darwin*)  open_cmd='open'
3344      ;;
3345    cygwin*)  open_cmd='cygstart'
3346      ;;
3347    linux*)
3348        open_cmd='xdg-open'
3349      ;;
3350esac"#;
3351        let result = parse(input);
3352        assert!(
3353            result.is_ok(),
3354            "Failed to parse case with glob: {:?}",
3355            result.err()
3356        );
3357    }
3358
3359    #[test]
3360    fn test_parse_case_with_nested_if() {
3361        // Test case with nested if and glob patterns
3362        let input = r##"function zpwrGetOpenCommand(){
3363    local open_cmd
3364    case "$ZPWR_OS_TYPE" in
3365        darwin*)  open_cmd='open' ;;
3366        cygwin*)  open_cmd='cygstart' ;;
3367        linux*)
3368            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
3369                open_cmd='nohup xdg-open'
3370            fi
3371            ;;
3372    esac
3373}"##;
3374        let result = parse(input);
3375        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
3376    }
3377
3378    #[test]
3379    fn test_parse_zpwr_scripts() {
3380        use std::fs;
3381        use std::path::Path;
3382        use std::sync::mpsc;
3383        use std::thread;
3384        use std::time::{Duration, Instant};
3385
3386        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
3387        if !scripts_dir.exists() {
3388            eprintln!("Skipping test: scripts directory not found");
3389            return;
3390        }
3391
3392        let mut total = 0;
3393        let mut passed = 0;
3394        let mut failed_files = Vec::new();
3395        let mut timeout_files = Vec::new();
3396
3397        for ext in &["sh", "zsh"] {
3398            let pattern = scripts_dir.join(format!("*.{}", ext));
3399            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
3400                for entry in entries.flatten() {
3401                    total += 1;
3402                    let file_path = entry.display().to_string();
3403                    let content = match fs::read_to_string(&entry) {
3404                        Ok(c) => c,
3405                        Err(e) => {
3406                            failed_files.push((file_path, format!("read error: {}", e)));
3407                            continue;
3408                        }
3409                    };
3410
3411                    // Parse with timeout
3412                    let content_clone = content.clone();
3413                    let (tx, rx) = mpsc::channel();
3414                    let handle = thread::spawn(move || {
3415                        let result = parse(&content_clone);
3416                        let _ = tx.send(result);
3417                    });
3418
3419                    match rx.recv_timeout(Duration::from_secs(2)) {
3420                        Ok(Ok(_)) => passed += 1,
3421                        Ok(Err(errors)) => {
3422                            let first_err = errors
3423                                .first()
3424                                .map(|e| format!("line {}: {}", e.line, e.message))
3425                                .unwrap_or_default();
3426                            failed_files.push((file_path, first_err));
3427                        }
3428                        Err(_) => {
3429                            timeout_files.push(file_path);
3430                            // Thread will be abandoned
3431                        }
3432                    }
3433                }
3434            }
3435        }
3436
3437        eprintln!("\n=== ZPWR Scripts Parse Results ===");
3438        eprintln!("Passed: {}/{}", passed, total);
3439
3440        if !timeout_files.is_empty() {
3441            eprintln!("\nTimeout files (>2s):");
3442            for file in &timeout_files {
3443                eprintln!("  {}", file);
3444            }
3445        }
3446
3447        if !failed_files.is_empty() {
3448            eprintln!("\nFailed files:");
3449            for (file, err) in &failed_files {
3450                eprintln!("  {} - {}", file, err);
3451            }
3452        }
3453
3454        // Allow some failures initially, but track progress
3455        let pass_rate = if total > 0 {
3456            (passed as f64 / total as f64) * 100.0
3457        } else {
3458            0.0
3459        };
3460        eprintln!("Pass rate: {:.1}%", pass_rate);
3461
3462        // Require at least 50% pass rate for now
3463        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3464    }
3465
3466    #[test]
3467    #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
3468    fn test_parse_zsh_stdlib_functions() {
3469        use std::fs;
3470        use std::path::Path;
3471        use std::sync::mpsc;
3472        use std::thread;
3473        use std::time::Duration;
3474
3475        let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
3476        if !functions_dir.exists() {
3477            eprintln!(
3478                "Skipping test: zsh_functions directory not found at {:?}",
3479                functions_dir
3480            );
3481            return;
3482        }
3483
3484        let mut total = 0;
3485        let mut passed = 0;
3486        let mut failed_files = Vec::new();
3487        let mut timeout_files = Vec::new();
3488
3489        if let Ok(entries) = fs::read_dir(&functions_dir) {
3490            for entry in entries.flatten() {
3491                let path = entry.path();
3492                if !path.is_file() {
3493                    continue;
3494                }
3495
3496                total += 1;
3497                let file_path = path.display().to_string();
3498                let content = match fs::read_to_string(&path) {
3499                    Ok(c) => c,
3500                    Err(e) => {
3501                        failed_files.push((file_path, format!("read error: {}", e)));
3502                        continue;
3503                    }
3504                };
3505
3506                // Parse with timeout
3507                let content_clone = content.clone();
3508                let (tx, rx) = mpsc::channel();
3509                thread::spawn(move || {
3510                    let result = parse(&content_clone);
3511                    let _ = tx.send(result);
3512                });
3513
3514                match rx.recv_timeout(Duration::from_secs(2)) {
3515                    Ok(Ok(_)) => passed += 1,
3516                    Ok(Err(errors)) => {
3517                        let first_err = errors
3518                            .first()
3519                            .map(|e| format!("line {}: {}", e.line, e.message))
3520                            .unwrap_or_default();
3521                        failed_files.push((file_path, first_err));
3522                    }
3523                    Err(_) => {
3524                        timeout_files.push(file_path);
3525                    }
3526                }
3527            }
3528        }
3529
3530        eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
3531        eprintln!("Passed: {}/{}", passed, total);
3532
3533        if !timeout_files.is_empty() {
3534            eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
3535            for file in timeout_files.iter().take(10) {
3536                eprintln!("  {}", file);
3537            }
3538            if timeout_files.len() > 10 {
3539                eprintln!("  ... and {} more", timeout_files.len() - 10);
3540            }
3541        }
3542
3543        if !failed_files.is_empty() {
3544            eprintln!("\nFailed files: {}", failed_files.len());
3545            for (file, err) in failed_files.iter().take(20) {
3546                let filename = Path::new(file)
3547                    .file_name()
3548                    .unwrap_or_default()
3549                    .to_string_lossy();
3550                eprintln!("  {} - {}", filename, err);
3551            }
3552            if failed_files.len() > 20 {
3553                eprintln!("  ... and {} more", failed_files.len() - 20);
3554            }
3555        }
3556
3557        let pass_rate = if total > 0 {
3558            (passed as f64 / total as f64) * 100.0
3559        } else {
3560            0.0
3561        };
3562        eprintln!("Pass rate: {:.1}%", pass_rate);
3563
3564        // Require at least 50% pass rate
3565        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3566    }
3567}
zshrs_parse/parser.rs

zshrs_parse/
parser.rs