zshrs_parse/
parser.rs

1//! Zsh parser - Direct port from zsh/Src/parse.c
2//!
3//! This parser takes tokens from the ZshLexer and builds an AST.
4//! It follows the zsh grammar closely, producing structures that
5//! can be executed by the shell executor.
6
7use crate::lexer::ZshLexer;
8use crate::tokens::LexTok;
9use serde::{Deserialize, Serialize};
10
11/// AST node for a complete program (list of commands)
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ZshProgram {
14    pub lists: Vec<ZshList>,
15}
16
17/// A list is a sequence of sublists separated by ; or & or newline
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ZshList {
20    pub sublist: ZshSublist,
21    pub flags: ListFlags,
22}
23
24#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
25pub struct ListFlags {
26    /// Run asynchronously (&)
27    pub async_: bool,
28    /// Disown after running (&| or &!)
29    pub disown: bool,
30}
31
32/// A sublist is pipelines connected by && or ||
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ZshSublist {
35    pub pipe: ZshPipe,
36    pub next: Option<(SublistOp, Box<ZshSublist>)>,
37    pub flags: SublistFlags,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum SublistOp {
42    And, // &&
43    Or,  // ||
44}
45
46#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
47pub struct SublistFlags {
48    /// Coproc
49    pub coproc: bool,
50    /// Negated with !
51    pub not: bool,
52}
53
54/// A pipeline is commands connected by |
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ZshPipe {
57    pub cmd: ZshCommand,
58    pub next: Option<Box<ZshPipe>>,
59    pub lineno: u64,
60    /// `|&` between this stage and the next — merge stderr into the
61    /// pipe so the next stage's stdin sees both stdout AND stderr from
62    /// this stage. When `next` is None this flag is meaningless.
63    #[serde(default)]
64    pub merge_stderr: bool,
65}
66
67/// A command
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum ZshCommand {
70    Simple(ZshSimple),
71    Subsh(Box<ZshProgram>), // (list)
72    Cursh(Box<ZshProgram>), // {list}
73    For(ZshFor),
74    Case(ZshCase),
75    If(ZshIf),
76    While(ZshWhile),
77    Until(ZshWhile),
78    Repeat(ZshRepeat),
79    FuncDef(ZshFuncDef),
80    Time(Option<Box<ZshSublist>>),
81    Cond(ZshCond), // [[ ... ]]
82    Arith(String), // (( ... ))
83    Try(ZshTry),   // { ... } always { ... }
84    /// Compound command with trailing redirects:
85    /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
86    /// Simple commands carry redirects in their own struct; this wrapper
87    /// is only used for compound forms.
88    Redirected(Box<ZshCommand>, Vec<ZshRedir>),
89}
90
91/// A simple command (assignments, words, redirections)
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ZshSimple {
94    pub assigns: Vec<ZshAssign>,
95    pub words: Vec<String>,
96    pub redirs: Vec<ZshRedir>,
97}
98
99/// An assignment
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ZshAssign {
102    pub name: String,
103    pub value: ZshAssignValue,
104    pub append: bool, // +=
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum ZshAssignValue {
109    Scalar(String),
110    Array(Vec<String>),
111}
112
113/// A redirection
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshRedir {
116    pub rtype: RedirType,
117    pub fd: i32,
118    pub name: String,
119    pub heredoc: Option<HereDocInfo>,
120    pub varid: Option<String>, // {var}>file
121    /// Index into ZshLexer.heredocs[] for body lookup. Filled in by
122    /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
123    /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
124    /// has run for the line.
125    #[serde(skip)]
126    pub heredoc_idx: Option<usize>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HereDocInfo {
131    pub content: String,
132    pub terminator: String,
133    /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true the
134    /// body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
135    /// expansion. Plain `<<EOF` runs all expansions.
136    #[serde(default)]
137    pub quoted: bool,
138}
139
140/// Redirection type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
142pub enum RedirType {
143    Write,        // >
144    Writenow,     // >|
145    Append,       // >>
146    Appendnow,    // >>|
147    Read,         // <
148    ReadWrite,    // <>
149    Heredoc,      // <<
150    HeredocDash,  // <<-
151    Herestr,      // <<<
152    MergeIn,      // <&
153    MergeOut,     // >&
154    ErrWrite,     // &>
155    ErrWritenow,  // &>|
156    ErrAppend,    // >>&
157    ErrAppendnow, // >>&|
158    InPipe,       // < <(...)
159    OutPipe,      // > >(...)
160}
161
162/// For loop
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct ZshFor {
165    pub var: String,
166    pub list: ForList,
167    pub body: Box<ZshProgram>,
168    /// True if this was parsed as `select` rather than `for`. Both share
169    /// the same parser, so the compiler routes on this flag.
170    #[serde(default)]
171    pub is_select: bool,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub enum ForList {
176    Words(Vec<String>),
177    CStyle {
178        init: String,
179        cond: String,
180        step: String,
181    },
182    Positional,
183}
184
185/// Case statement
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ZshCase {
188    pub word: String,
189    pub arms: Vec<CaseArm>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct CaseArm {
194    pub patterns: Vec<String>,
195    pub body: ZshProgram,
196    pub terminator: CaseTerm,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CaseTerm {
201    Break,    // ;;
202    Continue, // ;&
203    TestNext, // ;|
204}
205
206/// If statement
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct ZshIf {
209    pub cond: Box<ZshProgram>,
210    pub then: Box<ZshProgram>,
211    pub elif: Vec<(ZshProgram, ZshProgram)>,
212    pub else_: Option<Box<ZshProgram>>,
213}
214
215/// While/Until loop
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ZshWhile {
218    pub cond: Box<ZshProgram>,
219    pub body: Box<ZshProgram>,
220    pub until: bool,
221}
222
223/// Repeat loop
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ZshRepeat {
226    pub count: String,
227    pub body: Box<ZshProgram>,
228}
229
230/// Function definition
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct ZshFuncDef {
233    pub names: Vec<String>,
234    pub body: Box<ZshProgram>,
235    pub tracing: bool,
236    /// Anonymous-function call args. `() { body } a b` parses as a
237    /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
238    /// compile_funcdef registers the function then emits a Simple call
239    /// with these args.
240    #[serde(default)]
241    pub auto_call_args: Option<Vec<String>>,
242    /// Original source text of the function body (the bytes between
243    /// `{` and `}`, without the braces themselves), captured at parse
244    /// time. Populated for `function name { body }` and `function name() { body }`
245    /// forms; left None for the synthesized inline-funcdef recovery
246    /// path. ZshCompiler::compile_funcdef forwards it to
247    /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
248    /// `${functions[name]}`) has canonical source text.
249    #[serde(default)]
250    pub body_source: Option<String>,
251}
252
253/// Conditional expression [[ ... ]]
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub enum ZshCond {
256    Not(Box<ZshCond>),
257    And(Box<ZshCond>, Box<ZshCond>),
258    Or(Box<ZshCond>, Box<ZshCond>),
259    Unary(String, String),          // -f file, -n str, etc.
260    Binary(String, String, String), // str = pat, a -eq b, etc.
261    Regex(String, String),          // str =~ regex
262}
263
264/// Try/always block
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct ZshTry {
267    pub try_block: Box<ZshProgram>,
268    pub always: Box<ZshProgram>,
269}
270
271/// Zsh parameter expansion flags
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub enum ZshParamFlag {
274    Lower,                 // L - lowercase
275    Upper,                 // U - uppercase
276    Capitalize,            // C - capitalize words
277    Join(String),          // j:sep: - join array with separator
278    JoinNewline,           // F - join with newlines
279    Split(String),         // s:sep: - split string into array
280    SplitLines,            // f - split on newlines
281    SplitWords,            // z - split into words (shell parsing)
282    Type,                  // t - type of variable
283    Words,                 // w - word splitting
284    Quote,                 // qq - single-quote always
285    QuoteIfNeeded,         // q+ - single-quote only if needed
286    DoubleQuote,           // qqq - double-quote
287    DollarQuote,           // qqqq - $'...' style
288    QuoteBackslash,        // q / b / B - backslash-escape special chars
289    Unique,                // u - unique elements only
290    Reverse,               // O - reverse sort
291    Sort,                  // o - sort
292    NumericSort,           // n - numeric sort
293    IndexSort,             // a - sort in array index order
294    Keys,                  // k - associative array keys
295    Values,                // v - associative array values
296    Length,                // # - length (character codes)
297    CountChars,            // c - count total characters
298    Expand,                // e - perform shell expansions
299    PromptExpand,          // % - expand prompt escapes
300    PromptExpandFull,      // %% - full prompt expansion
301    Visible,               // V - make non-printable chars visible
302    Directory,             // D - substitute directory names
303    Head(usize),           // [1,n] - first n elements
304    Tail(usize),           // [-n,-1] - last n elements
305    PadLeft(usize, char),  // l:len:fill: - pad left
306    PadRight(usize, char), // r:len:fill: - pad right
307    Width(usize),          // m - use width for padding
308    Match,                 // M - include matched portion
309    Remove,                // R - include non-matched portion (complement of M)
310    Subscript,             // S - subscript scanning
311    Parameter,             // P - use value as parameter name (indirection)
312    Glob,                  // ~ - glob patterns in pattern
313    /// `@` flag — force array-context behavior even inside DQ. zsh's
314    /// `"${(@o)arr}"` keeps the sort active and splices each element as
315    /// its own word. Without this, the array-only flags became no-ops
316    /// in DQ.
317    At,
318}
319
320/// List operator (for shell command lists)
321#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
322pub enum ListOp {
323    And,     // &&
324    Or,      // ||
325    Semi,    // ;
326    Amp,     // &
327    Newline, // \n
328}
329
330/// Shell word - can be simple literal or complex expansion
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum ShellWord {
333    /// Plain text token. Most ZWC-decoded words land here. Goes through
334    /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
335    /// final output.
336    Literal(String),
337    /// Concatenation of sub-words. ZWC array decoding produces this with
338    /// child Literals; nothing else constructs it now that the legacy
339    /// hand-rolled parser is gone.
340    Concat(Vec<ShellWord>),
341}
342
343/// Variable modifier for parameter expansion
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub enum VarModifier {
346    Default(ShellWord),
347    DefaultAssign(ShellWord),
348    Error(ShellWord),
349    Alternate(ShellWord),
350    Length,
351    Substring(i64, Option<i64>),
352    RemovePrefix(ShellWord),
353    RemovePrefixLong(ShellWord),
354    RemoveSuffix(ShellWord),
355    RemoveSuffixLong(ShellWord),
356    Replace(ShellWord, ShellWord),
357    ReplaceAll(ShellWord, ShellWord),
358    Upper,
359    Lower,
360}
361
362/// Shell command - the old shell_ast compatible type
363#[derive(Debug, Clone, Serialize, Deserialize)]
364pub enum ShellCommand {
365    Simple(SimpleCommand),
366    Pipeline(Vec<ShellCommand>, bool),
367    List(Vec<(ShellCommand, ListOp)>),
368    Compound(CompoundCommand),
369    FunctionDef(String, Box<ShellCommand>),
370}
371
372/// Simple command with assignments, words, and redirects
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct SimpleCommand {
375    pub assignments: Vec<(String, ShellWord, bool)>,
376    pub words: Vec<ShellWord>,
377    pub redirects: Vec<Redirect>,
378}
379
380/// Redirect
381#[derive(Debug, Clone, Serialize, Deserialize)]
382pub struct Redirect {
383    pub fd: Option<i32>,
384    pub op: RedirectOp,
385    pub target: ShellWord,
386    pub heredoc_content: Option<String>,
387    pub fd_var: Option<String>,
388}
389
390/// Redirect operator
391#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
392pub enum RedirectOp {
393    Write,
394    Append,
395    Read,
396    ReadWrite,
397    Clobber,
398    DupRead,
399    DupWrite,
400    HereDoc,
401    HereString,
402    WriteBoth,
403    AppendBoth,
404}
405
406/// Compound command
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub enum CompoundCommand {
409    BraceGroup(Vec<ShellCommand>),
410    Subshell(Vec<ShellCommand>),
411    If {
412        conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
413        else_part: Option<Vec<ShellCommand>>,
414    },
415    For {
416        var: String,
417        words: Option<Vec<ShellWord>>,
418        body: Vec<ShellCommand>,
419    },
420    ForArith {
421        init: String,
422        cond: String,
423        step: String,
424        body: Vec<ShellCommand>,
425    },
426    While {
427        condition: Vec<ShellCommand>,
428        body: Vec<ShellCommand>,
429    },
430    Until {
431        condition: Vec<ShellCommand>,
432        body: Vec<ShellCommand>,
433    },
434    Case {
435        word: ShellWord,
436        cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
437    },
438    Select {
439        var: String,
440        words: Option<Vec<ShellWord>>,
441        body: Vec<ShellCommand>,
442    },
443    Coproc {
444        name: Option<String>,
445        body: Box<ShellCommand>,
446    },
447    /// repeat N do ... done
448    Repeat {
449        count: String,
450        body: Vec<ShellCommand>,
451    },
452    /// { try-block } always { always-block }
453    Try {
454        try_body: Vec<ShellCommand>,
455        always_body: Vec<ShellCommand>,
456    },
457    Arith(String),
458    WithRedirects(Box<ShellCommand>, Vec<Redirect>),
459}
460
461/// Case terminator
462#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
463pub enum CaseTerminator {
464    Break,
465    Fallthrough,
466    Continue,
467}
468
469/// Parse errors
470#[derive(Debug, Clone, Serialize, Deserialize)]
471pub struct ParseError {
472    pub message: String,
473    pub line: u64,
474}
475
476impl std::fmt::Display for ParseError {
477    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478        write!(f, "parse error at line {}: {}", self.line, self.message)
479    }
480}
481
482impl std::error::Error for ParseError {}
483
484/// The Zsh Parser
485pub struct ZshParser<'a> {
486    lexer: ZshLexer<'a>,
487    errors: Vec<ParseError>,
488    /// Global iteration counter to prevent infinite loops
489    global_iterations: usize,
490    /// Recursion depth counter to prevent stack overflow
491    recursion_depth: usize,
492}
493
494const MAX_RECURSION_DEPTH: usize = 500;
495
496/// Saved parse context. Direct port of zsh's `struct parse_stack`
497/// declared in zsh/Src/zsh.h and used by parse.c:295-355
498/// (`parse_context_save` / `parse_context_restore`). Pushes per-
499/// parse-call state so a nested parse (e.g. inside command
500/// substitution) doesn't clobber the outer parse.
501///
502/// zshrs port note: zsh's parse_stack tracks wordcode-buffer state
503/// (ecbuf, eclen, ecused, ecnpats, ecstrs, ecsoffs, ecssub, ecnfunc).
504/// zshrs builds AST trees instead so those fields collapse to a
505/// recursion_depth + global_iterations save. The lexer-side fields
506/// (incmdpos, incond, etc.) live on ZshLexer here so they get saved
507/// via the lexer's own `LexStack` rather than being duplicated here.
508#[derive(Debug, Default, Clone)]
509pub struct ParseStack {
510    pub recursion_depth: usize,
511    pub global_iterations: usize,
512}
513
514/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
515/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
516/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
517/// during scanning (in source order).
518fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
519    for list in &mut prog.lists {
520        fill_in_sublist(&mut list.sublist, bodies);
521    }
522}
523
524fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
525    fill_in_pipe(&mut sub.pipe, bodies);
526    if let Some(next) = &mut sub.next {
527        fill_in_sublist(&mut next.1, bodies);
528    }
529}
530
531fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
532    fill_in_command(&mut pipe.cmd, bodies);
533    if let Some(next) = &mut pipe.next {
534        fill_in_pipe(next, bodies);
535    }
536}
537
538fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
539    match cmd {
540        ZshCommand::Simple(s) => {
541            for r in &mut s.redirs {
542                resolve_redir(r, bodies);
543            }
544        }
545        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
546        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
547        ZshCommand::If(i) => {
548            fill_heredoc_bodies(&mut i.cond, bodies);
549            fill_heredoc_bodies(&mut i.then, bodies);
550            for (c, b) in &mut i.elif {
551                fill_heredoc_bodies(c, bodies);
552                fill_heredoc_bodies(b, bodies);
553            }
554            if let Some(e) = &mut i.else_ {
555                fill_heredoc_bodies(e, bodies);
556            }
557        }
558        ZshCommand::While(w) | ZshCommand::Until(w) => {
559            fill_heredoc_bodies(&mut w.cond, bodies);
560            fill_heredoc_bodies(&mut w.body, bodies);
561        }
562        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
563        ZshCommand::Case(c) => {
564            for arm in &mut c.arms {
565                fill_heredoc_bodies(&mut arm.body, bodies);
566            }
567        }
568        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
569        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
570        ZshCommand::Try(t) => {
571            fill_heredoc_bodies(&mut t.try_block, bodies);
572            fill_heredoc_bodies(&mut t.always, bodies);
573        }
574        ZshCommand::Redirected(inner, redirs) => {
575            for r in redirs {
576                resolve_redir(r, bodies);
577            }
578            fill_in_command(inner, bodies);
579        }
580        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
581    }
582}
583
584fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
585    if let Some(idx) = r.heredoc_idx {
586        if let Some(info) = bodies.get(idx) {
587            r.heredoc = Some(info.clone());
588        }
589    }
590}
591
592/// If `list` is a Simple containing one word that ends in the
593/// `<INPAR><OUTPAR>` token pair (the lexer-port encoding of `()`),
594/// return the bare name. Used by `parse_program_until` to detect
595/// `name() {body}` style function definitions where the lexer
596/// hasn't split the `()` from the name.
597/// Detect the `name() …` shape inside a Simple. Returns the function
598/// name and (when the body was already inlined into the same Simple,
599/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
600/// Returns None for non-funcdef shapes.
601fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
602    if list.flags.async_ || list.sublist.next.is_some() {
603        return None;
604    }
605    let pipe = &list.sublist.pipe;
606    if pipe.next.is_some() {
607        return None;
608    }
609    let simple = match &pipe.cmd {
610        ZshCommand::Simple(s) => s,
611        _ => return None,
612    };
613    if simple.words.is_empty() || !simple.assigns.is_empty() {
614        return None;
615    }
616    let suffix = "\u{88}\u{8a}"; // INPAR + OUTPAR
617                                 // Find the FIRST word ending in `()`. zsh accepts the
618                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
619                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
620                                 // words[i] is `lastname()`. Words after are the body argv
621                                 // (one-line shorthand, `name() cmd args`).
622    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
623    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
624    for w in &simple.words[..par_idx] {
625        // Earlier names must be bare identifiers, NOT contain
626        // tokens that imply they're not function names (no `()`,
627        // no quotes, no expansions). zsh's lexer enforces this
628        // at the wordlist level; we approximate by requiring the
629        // word be an identifier-shaped token after untokenize.
630        let bare = crate::lexer::untokenize(w);
631        let valid = !bare.is_empty()
632            && bare
633                .chars()
634                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
635        if !valid {
636            return None;
637        }
638        names.push(bare);
639    }
640    let last = &simple.words[par_idx];
641    let bare = &last[..last.len() - suffix.len()];
642    if bare.is_empty() {
643        return None;
644    }
645    names.push(crate::lexer::untokenize(bare));
646    let rest = simple.words[par_idx + 1..].to_vec();
647    Some((names, rest))
648}
649
650impl<'a> ZshParser<'a> {
651    /// Create a new parser
652    pub fn new(input: &'a str) -> Self {
653        ZshParser {
654            lexer: ZshLexer::new(input),
655            errors: Vec::new(),
656            global_iterations: 0,
657            recursion_depth: 0,
658        }
659    }
660
661    /// Check iteration limit; returns true if exceeded
662    #[inline]
663    fn check_limit(&mut self) -> bool {
664        self.global_iterations += 1;
665        self.global_iterations > 10_000
666    }
667
668    /// Check recursion depth; returns true if exceeded
669    #[inline]
670    fn check_recursion(&mut self) -> bool {
671        self.recursion_depth > MAX_RECURSION_DEPTH
672    }
673
674    /// Save parse context onto a `ParseStack`. Direct port of
675    /// zsh/Src/parse.c:295-320 `parse_context_save`. Pushes
676    /// recursion_depth + global_iterations and resets to zero so
677    /// a nested parse can't trigger the outer parse's limits.
678    /// Lexer-side state (incmdpos / incond / etc.) saves via the
679    /// lexer's own `LexStack` since those fields live on ZshLexer.
680    pub fn parse_context_save(&mut self, ps: &mut ParseStack) {
681        // parse.c:299-317 — save parser state. zshrs collapses zsh's
682        // wordcode-buffer fields (ecbuf/eclen/ecused/ecnpats/ecstrs/
683        // ecsoffs/ecssub/ecnfunc) into the recursion+iteration pair
684        // since the AST builder doesn't use a flat wordcode buffer.
685        ps.recursion_depth = self.recursion_depth;
686        ps.global_iterations = self.global_iterations;
687        // parse.c:318-319 — clear the buffer + heredoc list so a
688        // nested parse starts from a clean slate.
689        self.recursion_depth = 0;
690        self.global_iterations = 0;
691    }
692
693    /// Restore parse context from a `ParseStack`. Direct port of
694    /// zsh/Src/parse.c:326-355 `parse_context_restore`. Inverse of
695    /// `parse_context_save`. Also clears any half-built AST state
696    /// to prevent leaking into the outer parse.
697    pub fn parse_context_restore(&mut self, ps: &ParseStack) {
698        // parse.c:330-331 — free any in-progress wordcode buffer.
699        // zshrs has no equivalent — AST nodes are owned by their
700        // parent so dropping the parser frees them.
701
702        // parse.c:333-352 — restore saved state.
703        self.recursion_depth = ps.recursion_depth;
704        self.global_iterations = ps.global_iterations;
705
706        // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
707        // error flag so the outer parse sees a clean state. zshrs
708        // tracks errors per-parser; clearing means dropping any
709        // partial errors collected during the nested parse.
710        self.errors.clear();
711    }
712
713    /// Initialize parser status. Direct port of zsh/Src/parse.c:489-503
714    /// `init_parse_status`. Clears the per-parse-call lexer flags
715    /// so a fresh parse starts from cmd-position with no nesting
716    /// state inherited from a prior parse.
717    pub fn init_parse_status(&mut self) {
718        // parse.c:500-502 — `incasepat = incond = inredir = infor =
719        // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
720        self.lexer.incasepat = 0;
721        self.lexer.incond = 0;
722        self.lexer.inredir = false;
723        self.lexer.infor = 0;
724        self.lexer.intypeset = false;
725        self.lexer.incmdpos = true;
726    }
727
728    /// Initialize parser for a fresh parse. Direct port of
729    /// zsh/Src/parse.c:507-525 `init_parse`. C source allocates a
730    /// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
731    /// per-parse-call counters, and calls init_parse_status. zshrs
732    /// has no flat wordcode buffer (AST is built inline) so this
733    /// function reduces to init_parse_status + recursion_depth/
734    /// global_iterations clear.
735    pub fn init_parse(&mut self) {
736        // parse.c:513-520 — init wordcode buffer. zshrs no-op.
737        self.recursion_depth = 0;
738        self.global_iterations = 0;
739        // parse.c:522 — `init_parse_status();`
740        self.init_parse_status();
741    }
742
743    /// Check whether the parsed program is empty. Direct port of
744    /// zsh/Src/parse.c:583-587 `empty_eprog`. C version checks
745    /// `*p->prog == WCB_END()` (single end-of-wordcode marker).
746    /// zshrs version checks the AST node count.
747    pub fn empty_eprog(prog: &ZshProgram) -> bool {
748        prog.lists.is_empty()
749    }
750
751    /// Clear pending here-document list. Direct port of
752    /// zsh/Src/parse.c:589-600 `clear_hdocs`. The C version walks
753    /// the global `hdocs` linked list and frees each node. zshrs
754    /// stores pending heredocs on the lexer's `heredocs` Vec —
755    /// truncating it has the same effect.
756    pub fn clear_hdocs(&mut self) {
757        self.lexer.heredocs.clear();
758    }
759
760    /// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
761    /// 612-631 `parse_event`. Reads one event from the lexer (a
762    /// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
763    /// returns the resulting ZshProgram.
764    ///
765    /// `endtok` is the token that terminates the event — usually
766    /// ENDINPUT, but for command-style substitutions the closing
767    /// `)` (zsh's CMD_SUBST_CLOSE).
768    ///
769    /// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
770    /// allocated wordcode program). zshrs returns a `ZshProgram`
771    /// (AST root). Same role at the parse-output boundary.
772    pub fn parse_event(&mut self, endtok: LexTok) -> Option<ZshProgram> {
773        // parse.c:616-619 — reset state and prime the lexer.
774        self.lexer.tok = LexTok::Endinput;
775        self.lexer.incmdpos = true;
776        self.lexer.zshlex();
777        // parse.c:620 — `init_parse();`
778        self.init_parse();
779
780        // parse.c:622-625 — drive par_event; on failure clear hdocs.
781        if !self.par_event(endtok) {
782            self.clear_hdocs();
783            return None;
784        }
785        // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
786        // parse for a substitution that doesn't need its own eprog.
787        // zshrs returns an empty program in that case (caller
788        // discards).
789        if endtok != LexTok::Endinput {
790            return Some(ZshProgram { lists: Vec::new() });
791        }
792        // parse.c:630 — `bld_eprog(1);` — build the final eprog.
793        // zshrs has already built the AST via parse_program_until,
794        // but parse_event uses par_event directly so we need to
795        // collect what par_event accumulated.
796        Some(self.parse_program_until(None))
797    }
798
799    /// Parse one event (sublist with optional separator). Direct
800    /// port of zsh/Src/parse.c:633-695 `par_event`. Returns true if
801    /// an event was successfully parsed, false on EOF / endtok.
802    ///
803    /// zshrs port note: the C version emits wordcodes via ecadd/
804    /// set_list_code; zshrs's parser builds AST nodes via
805    /// parse_sublist + parse_list. Same flow, different output.
806    pub fn par_event(&mut self, endtok: LexTok) -> bool {
807        // parse.c:639-643 — skip leading SEPERs.
808        while self.lexer.tok == LexTok::Seper {
809            // parse.c:640-641 — at top-level (endtok == ENDINPUT),
810            // a SEPER on a fresh line ends the event.
811            if self.lexer.isnewlin > 0 && endtok == LexTok::Endinput {
812                return false;
813            }
814            self.lexer.zshlex();
815        }
816        // parse.c:644-647 — terminate on EOF or matching close-token.
817        if self.lexer.tok == LexTok::Endinput {
818            return false;
819        }
820        if self.lexer.tok == endtok {
821            return true;
822        }
823        // parse.c:649-... — drive parse_sublist + handle terminator.
824        // zshrs's parse_sublist already builds the AST node directly.
825        match self.parse_sublist() {
826            Some(_) => {
827                // parse.c:651-693 — terminator handling. zshrs's
828                // parse_list wraps this; for parse_event we just
829                // confirm the sublist parsed.
830                true
831            }
832            None => false,
833        }
834    }
835
836    /// Parse one list — non-recursing variant. Direct port of
837    /// zsh/Src/parse.c:807-817 `par_list1`. Like par_list but
838    /// doesn't recurse on the trailing-separator path; used by
839    /// callers that only want one statement (e.g. each arm of a
840    /// case body).
841    pub fn par_list1(&mut self) -> Option<ZshSublist> {
842        // parse.c:810-816 — body is a single par_sublist call wrapped
843        // in the eu/ecused tracking that zshrs doesn't need (no
844        // wordcode buffer).
845        self.parse_sublist()
846    }
847
848    /// Wire a here-document body onto the redirection token that
849    /// requested it. Direct port of zsh/Src/parse.c:2347-2361
850    /// `setheredoc`. Called when a heredoc terminator has been
851    /// matched and the body is ready to be attached to the redir.
852    ///
853    /// zshrs port note: zsh's setheredoc patches the wordcode
854    /// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
855    /// zshrs threads heredoc bodies through `HereDocInfo` structs
856    /// that resolve_redir applies during the post-parse fill_in pass.
857    /// This method is the AST-side equivalent: writes back to the
858    /// matching redir node by index.
859    pub fn setheredoc(
860        &mut self,
861        _pc: usize,
862        _redir_type: i32,
863        _doc: &str,
864        _term: &str,
865        _munged_term: &str,
866    ) {
867        // zshrs's heredoc resolution happens in fill_in_command /
868        // resolve_redir at parser.rs top. This stub exists for API
869        // parity with the C signature; live wiring happens via
870        // self.lexer.heredocs which the post-parse pass consumes.
871    }
872
873    /// Parse a wordlist for `for ... in WORDS;`. Direct port of
874    /// zsh/Src/parse.c:2362-2378 `par_wordlist`. Reads STRING tokens
875    /// until the next SEPER / SEMI / NEWLIN.
876    pub fn par_wordlist(&mut self) -> Vec<String> {
877        let mut out = Vec::new();
878        // parse.c:2362-2378 — collect STRINGs into the wordlist.
879        while self.lexer.tok == LexTok::String {
880            if let Some(text) = self.lexer.tokstr.clone() {
881                out.push(text);
882            }
883            self.lexer.zshlex();
884        }
885        out
886    }
887
888    /// Parse a newline-separated wordlist. Direct port of
889    /// zsh/Src/parse.c:2379-2398 `par_nl_wordlist`. Like
890    /// par_wordlist but tolerates leading/trailing newlines.
891    pub fn par_nl_wordlist(&mut self) -> Vec<String> {
892        // parse.c:2380-2381 — skip leading newlines.
893        while self.lexer.tok == LexTok::Newlin {
894            self.lexer.zshlex();
895        }
896        let out = self.par_wordlist();
897        // parse.c:2395-2397 — skip trailing newlines.
898        while self.lexer.tok == LexTok::Newlin {
899            self.lexer.zshlex();
900        }
901        out
902    }
903
904    /// Get the integer value of the next token in a cond expression.
905    /// Direct port of zsh/Src/parse.c:2643-2658 `get_cond_num`.
906    /// Used for `[[ N OP M ]]` numeric tests where N/M are integer
907    /// literals or variable references.
908    pub fn get_cond_num(&mut self) -> Option<i64> {
909        if self.lexer.tok != LexTok::String {
910            return None;
911        }
912        let text = self.lexer.tokstr.as_ref()?.clone();
913        // parse.c:2647-2655 — parse as integer with optional sign.
914        let parsed = text.parse::<i64>().ok()?;
915        self.lexer.zshlex();
916        Some(parsed)
917    }
918
919    /// Emit a parser-level error. Direct port of zsh/Src/parse.c:
920    /// 2733-2766 `yyerror`. C version fills a per-event error buffer
921    /// + sets errflag. zshrs pushes onto self.errors which the
922    /// caller drains via parse()'s Result return.
923    pub fn yyerror(&mut self, msg: &str) {
924        // parse.c:2735-2765 — zsh's yyerror collects the offending
925        // token's literal text + line number. zshrs already does
926        // this via self.error() with the lexer's toklineno.
927        self.error(msg);
928    }
929
930    // ============================================================
931    // Wordcode emission stubs (parse.c private helpers)
932    //
933    // The following functions are direct counterparts of zsh's
934    // private wordcode-emission helpers in parse.c. zsh uses these
935    // to write u32 opcodes into a flat `ecbuf` array; zshrs builds
936    // an AST tree and never emits wordcode at the parse layer.
937    // The implementations are documented stubs that preserve the
938    // function signatures + cite the C source. Real wordcode would
939    // be emitted later by compile_zsh.rs walking the AST.
940    //
941    // Listed for port-surface completeness so every parse.c symbol
942    // has a Rust counterpart even when the algorithm is moot in the
943    // AST architecture.
944    // ============================================================
945
946    /// Patch a list-placeholder wordcode with its actual opcode +
947    /// jump distance. Direct port of zsh/Src/parse.c:736-749
948    /// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
949    /// par_sublist runs, then comes back through set_list_code to
950    /// rewrite the slot with WCB_LIST(type, distance) once the
951    /// sublist's final length is known.
952    ///
953    /// zshrs port note: zshrs builds AST nodes inline so there's
954    /// no placeholder to patch. The ZshList { sublist, flags }
955    /// node is created with the right flags from the start.
956    /// Stub provided for port-surface completeness.
957    pub fn set_list_code(_p: usize, _type_code: i32, _cmplx: bool) {
958        // parse.c:740-748 — wordcode patching. zshrs no-op.
959    }
960
961    /// Patch a sublist-placeholder wordcode with its actual opcode.
962    /// Direct port of zsh/Src/parse.c:753-763 `set_sublist_code`.
963    /// Same role as set_list_code at the sublist level.
964    pub fn set_sublist_code(_p: usize, _type_code: i32, _flags: i32, _skip: i32, _cmplx: bool) {
965        // parse.c:757-762 — wordcode patching. zshrs no-op.
966    }
967
968    /// Add one wordcode opcode to the buffer. Direct port of
969    /// zsh/Src/parse.c:396-408 `ecadd`. Returns the index of the
970    /// new opcode. zshrs no-op since the AST is built inline.
971    pub fn ecadd(_c: u32) -> usize {
972        // parse.c:399-407 — append to ecbuf with grow-on-demand.
973        // zshrs no-op.
974        0
975    }
976
977    /// Delete a wordcode at position p. Direct port of
978    /// zsh/Src/parse.c:412-421 `ecdel`. zshrs no-op.
979    pub fn ecdel(_p: usize) {
980        // parse.c:415-420 — memmove + decrement ecused. zshrs no-op.
981    }
982
983    /// Encode a string into a wordcode value. Direct port of
984    /// zsh/Src/parse.c:425-471 `ecstrcode`. C source packs short
985    /// strings (≤4 chars) into a single wordcode + uses a binary
986    /// tree (Eccstr) for longer strings; long-string slots are
987    /// de-duplicated via hasher + strcmp. zshrs no-op since the
988    /// AST stores strings directly.
989    pub fn ecstrcode(_s: &str) -> u32 {
990        // parse.c:432-470 — the actual encoding logic. zshrs no-op.
991        0
992    }
993
994    /// Insert N empty wordcode slots at position p. Direct port of
995    /// zsh/Src/parse.c:371-388 `ecispace`. Used to reserve space
996    /// for a forward-jump opcode that will be patched once the
997    /// jump target is known. zshrs no-op since AST jumps are
998    /// resolved at compile_zsh time.
999    pub fn ecispace(_p: usize, _n: usize) {
1000        // parse.c:376-387 — grow + memmove + adjust hdocs. zshrs no-op.
1001    }
1002
1003    /// Adjust pending heredoc pointers when wordcodes shift. Direct
1004    /// port of zsh/Src/parse.c:359-367 `ecadjusthere`. Called
1005    /// internally by ecispace / ecdel after they shift the buffer.
1006    /// zshrs no-op since heredocs are tracked by index in the
1007    /// lexer's Vec, not by absolute wordcode offset.
1008    pub fn ecadjusthere(_p: usize, _d: i32) {
1009        // parse.c:362-366 — walk hdocs list, bump pc by d. zshrs no-op.
1010    }
1011
1012    // ============================================================
1013    // Eprog runtime ops (parse.c:2767-2853)
1014    //
1015    // dupeprog / useeprog / freeeprog are zsh's reference-counting
1016    // helpers for executable programs. zshrs's AST is owned by
1017    // value (Rust ownership); cloning is a tree-deep copy via
1018    // Clone, "use" is a no-op (the executor borrows the AST), and
1019    // "free" is automatic on drop.
1020    // ============================================================
1021
1022    /// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2767-2812
1023    /// `dupeprog`. C version deep-copies the wordcode array + string
1024    /// table + pattern progs. zshrs uses Clone on the AST.
1025    pub fn dupeprog(prog: &ZshProgram) -> ZshProgram {
1026        prog.clone()
1027    }
1028
1029    /// Increment an Eprog's reference count. Direct port of
1030    /// zsh/Src/parse.c:2813-2822 `useeprog`. zshrs no-op (Rust
1031    /// ownership).
1032    pub fn useeprog(_prog: &ZshProgram) {
1033        // parse.c:2815-2821 — `prog->nref++` if not heap-allocated.
1034        // zshrs no-op.
1035    }
1036
1037    /// Decrement / free an Eprog. Direct port of
1038    /// zsh/Src/parse.c:2823-2854 `freeeprog`. zshrs no-op (drop on
1039    /// scope-exit).
1040    pub fn freeeprog(_prog: ZshProgram) {
1041        // parse.c:2825-2853 — decrement nref, free if zero. zshrs
1042        // drops via Rust ownership.
1043    }
1044
1045    // ============================================================
1046    // Wordcode runtime getters (parse.c:2853-3060)
1047    //
1048    // These read packed wordcode out of a running Eprog at execution
1049    // time. zshrs's executor walks the AST directly so these are
1050    // stubs that preserve the C signatures + cite the source.
1051    // ============================================================
1052
1053    /// Read a packed string from the wordcode stream. Direct port of
1054    /// zsh/Src/parse.c:2853-2887 `ecgetstr`. C version unpacks
1055    /// 4-char inline strings + indexes into the strs table for
1056    /// longer ones. zshrs no-op (AST stores strings directly).
1057    pub fn ecgetstr(_dup: bool) -> String {
1058        // parse.c:2858-2886 — wordcode unpack logic. zshrs no-op.
1059        String::new()
1060    }
1061
1062    /// Read a packed string without consuming the wordcode pointer.
1063    /// Direct port of zsh/Src/parse.c:2890-2913 `ecrawstr`. zshrs
1064    /// no-op.
1065    pub fn ecrawstr() -> String {
1066        String::new()
1067    }
1068
1069    /// Read a NUL-terminated string array from wordcode. Direct port
1070    /// of zsh/Src/parse.c:2916-2933 `ecgetarr`. zshrs no-op.
1071    pub fn ecgetarr(_num: usize, _dup: bool) -> Vec<String> {
1072        Vec::new()
1073    }
1074
1075    /// Read a linked-list of strings from wordcode. Direct port of
1076    /// zsh/Src/parse.c:2936-2955 `ecgetlist`. zshrs no-op.
1077    pub fn ecgetlist(_num: usize, _dup: bool) -> Vec<String> {
1078        Vec::new()
1079    }
1080
1081    /// Read a sequence of redirection wordcodes. Direct port of
1082    /// zsh/Src/parse.c:2958-2991 `ecgetredirs`. zshrs no-op
1083    /// (redirections live as AST ZshRedir nodes).
1084    pub fn ecgetredirs() -> Vec<ZshRedir> {
1085        Vec::new()
1086    }
1087
1088    /// Copy consecutive redirection wordcodes into a new Eprog.
1089    /// Direct port of zsh/Src/parse.c:3001-3060 `eccopyredirs`.
1090    /// zshrs no-op.
1091    pub fn eccopyredirs() -> Option<ZshProgram> {
1092        None
1093    }
1094
1095    /// Initialize the dummy Eprog used as a placeholder. Direct port
1096    /// of zsh/Src/parse.c:3068-3075 `init_eprog`. zshrs no-op since
1097    /// the AST has no equivalent dummy node — empty programs are
1098    /// just `ZshProgram { lists: vec![] }`.
1099    pub fn init_eprog() {
1100        // parse.c:3071-3074 — set up dummy_eprog_code = WCB_END().
1101        // zshrs no-op.
1102    }
1103
1104    /// Parse the complete input
1105    pub fn parse(&mut self) -> Result<ZshProgram, Vec<ParseError>> {
1106        self.lexer.zshlex();
1107
1108        let mut program = self.parse_program_until(None);
1109
1110        if !self.errors.is_empty() {
1111            return Err(std::mem::take(&mut self.errors));
1112        }
1113        // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1114        // that the parser silently rolls past. zsh aborts with a
1115        // diagnostic in this case; mirror it.
1116        if let Some(msg) = self.lexer.error.clone() {
1117            return Err(vec![ParseError {
1118                message: msg,
1119                line: 1,
1120            }]);
1121        }
1122
1123        // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1124        // back into ZshRedir.heredoc fields via heredoc_idx.
1125        let bodies: Vec<HereDocInfo> = self
1126            .lexer
1127            .heredocs
1128            .iter()
1129            .map(|h| HereDocInfo {
1130                content: h.content.clone(),
1131                terminator: h.terminator.clone(),
1132                quoted: h.quoted,
1133            })
1134            .collect();
1135        if !bodies.is_empty() {
1136            fill_heredoc_bodies(&mut program, &bodies);
1137        }
1138
1139        Ok(program)
1140    }
1141
1142    /// Parse a program (list of lists)
1143    /// Parse a complete program (top-level entry). Calls
1144    /// parse_program_until with no end-token sentinel. Direct port of
1145    /// zsh/Src/parse.c:614-720 `parse_event` / `parse_list` /
1146    /// `par_event` flow. C distinguishes COND_EVENT (single command
1147    /// for here-string) from full event parse; zshrs's parse_program
1148    /// is the full-event entry.
1149    fn parse_program(&mut self) -> ZshProgram {
1150        self.parse_program_until(None)
1151    }
1152
1153    /// Parse a program until we hit an end token
1154    /// Parse a program until one of `end_tokens` is seen (or EOF).
1155    /// Drives parse_list in a loop. C equivalent: the body of par_event
1156    /// (parse.c:635-695) iterating par_list against the lexer.
1157    fn parse_program_until(&mut self, end_tokens: Option<&[LexTok]>) -> ZshProgram {
1158        let mut lists = Vec::new();
1159
1160        loop {
1161            if self.check_limit() {
1162                self.error("parser exceeded global iteration limit");
1163                break;
1164            }
1165
1166            // Skip separators
1167            while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1168                if self.check_limit() {
1169                    self.error("parser exceeded global iteration limit");
1170                    return ZshProgram { lists };
1171                }
1172                self.lexer.zshlex();
1173            }
1174
1175            if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
1176                break;
1177            }
1178
1179            // Check for end tokens
1180            if let Some(end_toks) = end_tokens {
1181                if end_toks.contains(&self.lexer.tok) {
1182                    break;
1183                }
1184            }
1185
1186            // Also stop at these tokens when not explicitly looking for them
1187            // Note: Else/Elif/Then are NOT here - they're handled by parse_if
1188            // to allow nested if statements inside case arms, loops, etc.
1189            match self.lexer.tok {
1190                LexTok::Outbrace
1191                | LexTok::Dsemi
1192                | LexTok::Semiamp
1193                | LexTok::Semibar
1194                | LexTok::Done
1195                | LexTok::Fi
1196                | LexTok::Esac
1197                | LexTok::Zend => break,
1198                _ => {}
1199            }
1200
1201            match self.parse_list() {
1202                Some(list) => {
1203                    let detected = simple_name_with_inoutpar(&list);
1204                    lists.push(list);
1205                    // Synthesize a FuncDef for the `name() { body }` shape
1206                    // at parse time so body_source is captured while the
1207                    // lexer still has the input. The lexer port emits
1208                    // `name(` as a single Word ending in `<INPAR><OUTPAR>`,
1209                    // so the Simple list is followed by an Inbrace once
1210                    // separators are skipped. For `name() cmd args` the
1211                    // body has already been swallowed into the same
1212                    // Simple's words tail — synthesize directly from there.
1213                    if let Some((names, body_argv)) = detected {
1214                        if !body_argv.is_empty() {
1215                            // One-line body already in the Simple. Build
1216                            // a Simple from body_argv as the function body.
1217                            lists.pop();
1218                            let body_simple = ZshCommand::Simple(ZshSimple {
1219                                assigns: Vec::new(),
1220                                words: body_argv,
1221                                redirs: Vec::new(),
1222                            });
1223                            let body_list = ZshList {
1224                                sublist: ZshSublist {
1225                                    pipe: ZshPipe {
1226                                        cmd: body_simple,
1227                                        next: None,
1228                                        lineno: self.lexer.lineno,
1229                                        merge_stderr: false,
1230                                    },
1231                                    next: None,
1232                                    flags: SublistFlags::default(),
1233                                },
1234                                flags: ListFlags::default(),
1235                            };
1236                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1237                                names,
1238                                body: Box::new(ZshProgram {
1239                                    lists: vec![body_list],
1240                                }),
1241                                tracing: false,
1242                                auto_call_args: None,
1243                                body_source: None,
1244                            });
1245                            let synthetic = ZshList {
1246                                sublist: ZshSublist {
1247                                    pipe: ZshPipe {
1248                                        cmd: funcdef,
1249                                        next: None,
1250                                        lineno: self.lexer.lineno,
1251                                        merge_stderr: false,
1252                                    },
1253                                    next: None,
1254                                    flags: SublistFlags::default(),
1255                                },
1256                                flags: ListFlags::default(),
1257                            };
1258                            lists.push(synthetic);
1259                            continue;
1260                        }
1261                        // Else: words.len() == 1 (only the trailing `name()`
1262                        // word), brace body follows. `names` may carry
1263                        // multiple identifiers from the `fna fnb fnc()`
1264                        // shorthand — all share the same brace body per
1265                        // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
1266                        // Skip separators on the real lexer; safe because
1267                        // parse_program's next iteration would also skip them.
1268                        while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1269                            self.lexer.zshlex();
1270                        }
1271                        if self.lexer.tok == LexTok::Inbrace {
1272                            // Capture body_start BEFORE the lexer
1273                            // advances past the first body token. The
1274                            // outer zshlex() consumed `{`; lexer.pos
1275                            // is now right after `{`. The next
1276                            // `zshlex()` would advance past `echo`,
1277                            // making body_start land mid-body and
1278                            // lose the first word — `typeset -f f`
1279                            // printed `a; echo b` instead of
1280                            // `echo a; echo b` for `f() { echo a;
1281                            // echo b }`.
1282                            let body_start = self.lexer.pos;
1283                            self.lexer.zshlex();
1284                            let body = self.parse_program();
1285                            let body_end = if self.lexer.tok == LexTok::Outbrace {
1286                                self.lexer.pos.saturating_sub(1)
1287                            } else {
1288                                self.lexer.pos
1289                            };
1290                            let body_source = self
1291                                .lexer
1292                                .input
1293                                .get(body_start..body_end)
1294                                .map(|s| s.trim().to_string())
1295                                .filter(|s| !s.is_empty());
1296                            if self.lexer.tok == LexTok::Outbrace {
1297                                self.lexer.zshlex();
1298                            }
1299                            // Replace the Simple list with a FuncDef list.
1300                            lists.pop();
1301                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1302                                names,
1303                                body: Box::new(body),
1304                                tracing: false,
1305                                auto_call_args: None,
1306                                body_source,
1307                            });
1308                            let synthetic = ZshList {
1309                                sublist: ZshSublist {
1310                                    pipe: ZshPipe {
1311                                        cmd: funcdef,
1312                                        next: None,
1313                                        lineno: self.lexer.lineno,
1314                                        merge_stderr: false,
1315                                    },
1316                                    next: None,
1317                                    flags: SublistFlags::default(),
1318                                },
1319                                flags: ListFlags::default(),
1320                            };
1321                            lists.push(synthetic);
1322                        } else if !matches!(
1323                            self.lexer.tok,
1324                            LexTok::Endinput | LexTok::Outbrace | LexTok::Seper | LexTok::Newlin
1325                        ) {
1326                            // No-brace one-line body: `foo() echo hello`.
1327                            // Parse a single command for the body.
1328                            let body_cmd = self.parse_cmd();
1329                            if let Some(cmd) = body_cmd {
1330                                let body_list = ZshList {
1331                                    sublist: ZshSublist {
1332                                        pipe: ZshPipe {
1333                                            cmd,
1334                                            next: None,
1335                                            lineno: self.lexer.lineno,
1336                                            merge_stderr: false,
1337                                        },
1338                                        next: None,
1339                                        flags: SublistFlags::default(),
1340                                    },
1341                                    flags: ListFlags::default(),
1342                                };
1343                                lists.pop();
1344                                let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1345                                    names: names.clone(),
1346                                    body: Box::new(ZshProgram {
1347                                        lists: vec![body_list],
1348                                    }),
1349                                    tracing: false,
1350                                    auto_call_args: None,
1351                                    body_source: None,
1352                                });
1353                                let synthetic = ZshList {
1354                                    sublist: ZshSublist {
1355                                        pipe: ZshPipe {
1356                                            cmd: funcdef,
1357                                            next: None,
1358                                            lineno: self.lexer.lineno,
1359                                            merge_stderr: false,
1360                                        },
1361                                        next: None,
1362                                        flags: SublistFlags::default(),
1363                                    },
1364                                    flags: ListFlags::default(),
1365                                };
1366                                lists.push(synthetic);
1367                            }
1368                        }
1369                    }
1370                }
1371                None => break,
1372            }
1373        }
1374
1375        ZshProgram { lists }
1376    }
1377
1378    /// Parse a list (sublist with optional & or ;).
1379    ///
1380    /// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
1381    /// par_list1 wrapper at parse.c:807-817).
1382    ///
1383    /// **Structural divergence**: zsh's parse.c emits flat wordcode
1384    /// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
1385    /// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
1386    /// builds an AST node `ZshList { sublist, flags }` instead. The
1387    /// async/sync/disown discrimination at parse.c:785-790 maps to
1388    /// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
1389    /// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
1390    /// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
1391    /// representation. This divergence is repository-wide: every
1392    /// `par_*` function emits wordcode in C, every `parse_*` builds
1393    /// AST in Rust. The compile_zsh module then traverses the AST to
1394    /// emit fusevm bytecode, which serves the same role as zsh's
1395    /// wordcode but with a different opcode set and execution model.
1396    fn parse_list(&mut self) -> Option<ZshList> {
1397        let sublist = self.parse_sublist()?;
1398
1399        let flags = match self.lexer.tok {
1400            LexTok::Amper => {
1401                self.lexer.zshlex();
1402                ListFlags {
1403                    async_: true,
1404                    disown: false,
1405                }
1406            }
1407            LexTok::Amperbang => {
1408                self.lexer.zshlex();
1409                ListFlags {
1410                    async_: true,
1411                    disown: true,
1412                }
1413            }
1414            LexTok::Seper | LexTok::Semi | LexTok::Newlin => {
1415                self.lexer.zshlex();
1416                ListFlags::default()
1417            }
1418            _ => ListFlags::default(),
1419        };
1420
1421        Some(ZshList { sublist, flags })
1422    }
1423
1424    /// Parse a sublist (pipelines connected by && or ||).
1425    ///
1426    /// Direct port of zsh/Src/parse.c:825-867 `par_sublist` and
1427    /// par_sublist2 at parse.c:869-892. par_sublist handles the
1428    /// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
1429    /// handles the leading `!` negation and `coproc` keyword.
1430    ///
1431    /// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
1432    /// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
1433    /// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
1434    fn parse_sublist(&mut self) -> Option<ZshSublist> {
1435        self.recursion_depth += 1;
1436        if self.check_recursion() {
1437            self.error("parse_sublist: max recursion depth exceeded");
1438            self.recursion_depth -= 1;
1439            return None;
1440        }
1441
1442        let mut flags = SublistFlags::default();
1443
1444        // Handle coproc and !
1445        if self.lexer.tok == LexTok::Coproc {
1446            flags.coproc = true;
1447            self.lexer.zshlex();
1448        } else if self.lexer.tok == LexTok::Bang {
1449            flags.not = true;
1450            self.lexer.zshlex();
1451        }
1452
1453        let pipe = match self.parse_pipe() {
1454            Some(p) => p,
1455            None => {
1456                self.recursion_depth -= 1;
1457                return None;
1458            }
1459        };
1460
1461        // Check for && or ||
1462        let next = match self.lexer.tok {
1463            LexTok::Damper => {
1464                self.lexer.zshlex();
1465                self.skip_separators();
1466                self.parse_sublist().map(|s| (SublistOp::And, Box::new(s)))
1467            }
1468            LexTok::Dbar => {
1469                self.lexer.zshlex();
1470                self.skip_separators();
1471                self.parse_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1472            }
1473            _ => None,
1474        };
1475
1476        self.recursion_depth -= 1;
1477        Some(ZshSublist { pipe, next, flags })
1478    }
1479
1480    /// Parse a pipeline
1481    /// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1482    /// zsh/Src/parse.c:894-956 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1483    /// C emits WC_PIPE wordcodes per command; same flow.
1484    fn parse_pipe(&mut self) -> Option<ZshPipe> {
1485        self.recursion_depth += 1;
1486        if self.check_recursion() {
1487            self.error("parse_pipe: max recursion depth exceeded");
1488            self.recursion_depth -= 1;
1489            return None;
1490        }
1491
1492        let lineno = self.lexer.toklineno;
1493        let cmd = match self.parse_cmd() {
1494            Some(c) => c,
1495            None => {
1496                self.recursion_depth -= 1;
1497                return None;
1498            }
1499        };
1500
1501        // Check for | or |&
1502        let mut merge_stderr = false;
1503        let next = match self.lexer.tok {
1504            LexTok::Bar | LexTok::Baramp => {
1505                merge_stderr = self.lexer.tok == LexTok::Baramp;
1506                self.lexer.zshlex();
1507                self.skip_separators();
1508                self.parse_pipe().map(Box::new)
1509            }
1510            _ => None,
1511        };
1512
1513        self.recursion_depth -= 1;
1514        Some(ZshPipe {
1515            cmd,
1516            next,
1517            lineno,
1518            merge_stderr,
1519        })
1520    }
1521
1522    /// Parse a command
1523    /// Parse a command — dispatches by leading token (FOR / CASE /
1524    /// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1525    /// INPAR subshell / INBRACE current-shell / TIME / NOCORRECT,
1526    /// else simple). Direct port of zsh/Src/parse.c:958-1085 `par_cmd`.
1527    fn parse_cmd(&mut self) -> Option<ZshCommand> {
1528        // Parse leading redirections
1529        let mut redirs = Vec::new();
1530        while self.lexer.tok.is_redirop() {
1531            if let Some(redir) = self.parse_redir() {
1532                redirs.push(redir);
1533            }
1534        }
1535
1536        let cmd = match self.lexer.tok {
1537            LexTok::For | LexTok::Foreach => self.parse_for(),
1538            LexTok::Select => self.parse_select(),
1539            LexTok::Case => self.parse_case(),
1540            LexTok::If => self.parse_if(),
1541            LexTok::While => self.parse_while(false),
1542            LexTok::Until => self.parse_while(true),
1543            LexTok::Repeat => self.parse_repeat(),
1544            LexTok::Inpar => self.parse_subsh(),
1545            LexTok::Inoutpar => self.parse_anon_funcdef(),
1546            LexTok::Inbrace => self.parse_cursh(),
1547            LexTok::Func => self.parse_funcdef(),
1548            LexTok::Dinbrack => self.parse_cond(),
1549            LexTok::Dinpar => self.parse_arith(),
1550            LexTok::Time => self.parse_time(),
1551            _ => self.parse_simple(redirs),
1552        };
1553
1554        // Parse trailing redirections. For Simple commands the redirs were
1555        // already captured inside parse_simple; for compound forms (Cursh,
1556        // Subsh, If, While, etc.) we collect them here and wrap in
1557        // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1558        if let Some(inner) = cmd {
1559            let mut trailing: Vec<ZshRedir> = Vec::new();
1560            while self.lexer.tok.is_redirop() {
1561                if let Some(redir) = self.parse_redir() {
1562                    trailing.push(redir);
1563                }
1564            }
1565            if trailing.is_empty() {
1566                return Some(inner);
1567            }
1568            // Simple already absorbed its own redirs (compile path expects
1569            // them on ZshSimple), so don't double-wrap.
1570            if matches!(inner, ZshCommand::Simple(_)) {
1571                if let ZshCommand::Simple(mut s) = inner {
1572                    s.redirs.extend(trailing);
1573                    return Some(ZshCommand::Simple(s));
1574                }
1575                unreachable!()
1576            }
1577            return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1578        }
1579
1580        None
1581    }
1582
1583    /// Parse a simple command
1584    /// Parse a simple command (assignments + words + redirections).
1585    /// Direct port of zsh/Src/parse.c:1836-2228 `par_simple` —
1586    /// the largest single function in parse.c. Handles ENVSTRING/
1587    /// ENVARRAY assignments at command head, intermixed redirs,
1588    /// typeset-style multi-assignment commands, and the trailing
1589    /// inout-par `()` that converts a simple command into an inline
1590    /// function definition.
1591    fn parse_simple(&mut self, mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1592        let mut assigns = Vec::new();
1593        let mut words = Vec::new();
1594        const MAX_ITERATIONS: usize = 10_000;
1595        let mut iterations = 0;
1596
1597        // Parse leading assignments
1598        while self.lexer.tok == LexTok::Envstring || self.lexer.tok == LexTok::Envarray {
1599            iterations += 1;
1600            if iterations > MAX_ITERATIONS {
1601                self.error("parse_simple: exceeded max iterations in assignments");
1602                return None;
1603            }
1604            if let Some(assign) = self.parse_assign() {
1605                assigns.push(assign);
1606            }
1607            self.lexer.zshlex();
1608        }
1609
1610        // Parse words and redirections
1611        loop {
1612            iterations += 1;
1613            if iterations > MAX_ITERATIONS {
1614                self.error("parse_simple: exceeded max iterations");
1615                return None;
1616            }
1617            match self.lexer.tok {
1618                LexTok::String | LexTok::Typeset => {
1619                    let s = self.lexer.tokstr.clone();
1620                    if let Some(s) = s {
1621                        words.push(s);
1622                    }
1623                    self.lexer.zshlex();
1624                    // Check for function definition foo() { ... }
1625                    if words.len() == 1 && self.peek_inoutpar() {
1626                        return self.parse_inline_funcdef(words.pop().unwrap());
1627                    }
1628                    // `{name}>file` named-fd redirect: the lexer doesn't
1629                    // recognize this shape, so the bare word `{name}`
1630                    // arrives as a String. If it matches `{IDENT}` and
1631                    // the NEXT token is a redirop, pop it off as the
1632                    // varid for that redir.
1633                    if !words.is_empty() && self.lexer.tok.is_redirop() {
1634                        let last = words.last().unwrap();
1635                        let untoked = crate::lexer::untokenize(last);
1636                        if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1637                            let name = &untoked[1..untoked.len() - 1];
1638                            if !name.is_empty()
1639                                && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1640                                && name
1641                                    .chars()
1642                                    .next()
1643                                    .map(|c| c == '_' || c.is_ascii_alphabetic())
1644                                    .unwrap_or(false)
1645                            {
1646                                let varid = name.to_string();
1647                                words.pop();
1648                                if let Some(mut redir) = self.parse_redir() {
1649                                    redir.varid = Some(varid);
1650                                    redirs.push(redir);
1651                                }
1652                                continue;
1653                            }
1654                        }
1655                    }
1656                }
1657                _ if self.lexer.tok.is_redirop() => {
1658                    match self.parse_redir() {
1659                        Some(redir) => redirs.push(redir),
1660                        None => break, // Error in redir parsing, stop
1661                    }
1662                }
1663                LexTok::Inoutpar if !words.is_empty() => {
1664                    // foo() { ... } style function
1665                    return self.parse_inline_funcdef(words.pop().unwrap());
1666                }
1667                _ => break,
1668            }
1669        }
1670
1671        if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1672            return None;
1673        }
1674
1675        Some(ZshCommand::Simple(ZshSimple {
1676            assigns,
1677            words,
1678            redirs,
1679        }))
1680    }
1681
1682    /// Parse an assignment
1683    /// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
1684    /// Sub-routine of parse_simple. The C source handles assignments
1685    /// inline in par_simple via the ENVSTRING/ENVARRAY token paths
1686    /// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
1687    /// helper for clarity.
1688    fn parse_assign(&mut self) -> Option<ZshAssign> {
1689        use crate::tokens::char_tokens;
1690
1691        let tokstr = self.lexer.tokstr.as_ref()?;
1692
1693        // Parse name=value or name+=value.
1694        let (name, value_str, append) = if self.lexer.tok == LexTok::Envarray {
1695            let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
1696                (stripped, true)
1697            } else {
1698                (tokstr.as_str(), false)
1699            };
1700            (name.to_string(), String::new(), append)
1701        } else if let Some(pos) = tokstr.find(char_tokens::EQUALS) {
1702            let name_part = &tokstr[..pos];
1703            let (name, append) = if name_part.ends_with('+') {
1704                (&name_part[..name_part.len() - 1], true)
1705            } else {
1706                (name_part, false)
1707            };
1708            (
1709                name.to_string(),
1710                tokstr[pos + char_tokens::EQUALS.len_utf8()..].to_string(),
1711                append,
1712            )
1713        } else if let Some(pos) = tokstr.find('=') {
1714            // Fallback to literal '=' for compatibility
1715            let name_part = &tokstr[..pos];
1716            let (name, append) = if name_part.ends_with('+') {
1717                (&name_part[..name_part.len() - 1], true)
1718            } else {
1719                (name_part, false)
1720            };
1721            (name.to_string(), tokstr[pos + 1..].to_string(), append)
1722        } else {
1723            return None;
1724        };
1725
1726        let value = if self.lexer.tok == LexTok::Envarray {
1727            // Array assignment: name=(...)
1728            let mut elements = Vec::new();
1729            self.lexer.zshlex(); // skip past token
1730
1731            let mut arr_iters = 0;
1732            const MAX_ARRAY_ELEMENTS: usize = 10_000;
1733            while matches!(
1734                self.lexer.tok,
1735                LexTok::String | LexTok::Seper | LexTok::Newlin
1736            ) {
1737                arr_iters += 1;
1738                if arr_iters > MAX_ARRAY_ELEMENTS {
1739                    self.error("array assignment exceeded maximum elements");
1740                    break;
1741                }
1742                if self.lexer.tok == LexTok::String {
1743                    if let Some(ref s) = self.lexer.tokstr {
1744                        elements.push(s.clone());
1745                    }
1746                }
1747                self.lexer.zshlex();
1748            }
1749
1750            // The closing OUTPAR is consumed here. The outer parse_simple
1751            // loop will then `zshlex()` past whatever follows (typically
1752            // a separator or the next word) — calling zshlex twice in
1753            // tandem (here AND in parse_simple) over-advances and merges
1754            // a following `name() { … }` funcdef into the same Simple.
1755            // We only consume Outpar; let the caller handle the rest.
1756            // Without this guard `g=(o1); f() { :; }` parsed as one
1757            // Simple with assigns=[g] and words=["f()"] (one token).
1758            if self.lexer.tok == LexTok::Outpar {
1759                // Note: do NOT zshlex() here. parse_simple's `self.lexer
1760                // .zshlex()` after `parse_assign` returns advances past
1761                // the Outpar onto the next significant token.
1762                //
1763                // Force `incmdpos=true` so the next zshlex() recognizes
1764                // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
1765                // The lexer flips incmdpos to false on bare Outpar (which
1766                // is correct for subshell-close context), but for an
1767                // array-assignment close more assigns/words may follow.
1768                self.lexer.incmdpos = true;
1769            }
1770
1771            ZshAssignValue::Array(elements)
1772        } else {
1773            ZshAssignValue::Scalar(value_str)
1774        };
1775
1776        Some(ZshAssign {
1777            name,
1778            value,
1779            append,
1780        })
1781    }
1782
1783    /// Parse a redirection
1784    /// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1785    /// Direct port of zsh/Src/parse.c:2229-2346 `par_redir`. Returns
1786    /// a ZshRedir node carrying the operator type, fd, target word
1787    /// (or here-doc body / pipe-redir command), and any `{var}` style
1788    /// fd-binding parameter.
1789    fn parse_redir(&mut self) -> Option<ZshRedir> {
1790        let rtype = match self.lexer.tok {
1791            LexTok::Outang => RedirType::Write,
1792            LexTok::Outangbang => RedirType::Writenow,
1793            LexTok::Doutang => RedirType::Append,
1794            LexTok::Doutangbang => RedirType::Appendnow,
1795            LexTok::Inang => RedirType::Read,
1796            LexTok::Inoutang => RedirType::ReadWrite,
1797            LexTok::Dinang => RedirType::Heredoc,
1798            LexTok::Dinangdash => RedirType::HeredocDash,
1799            LexTok::Trinang => RedirType::Herestr,
1800            LexTok::Inangamp => RedirType::MergeIn,
1801            LexTok::Outangamp => RedirType::MergeOut,
1802            LexTok::Ampoutang => RedirType::ErrWrite,
1803            LexTok::Outangampbang => RedirType::ErrWritenow,
1804            LexTok::Doutangamp => RedirType::ErrAppend,
1805            LexTok::Doutangampbang => RedirType::ErrAppendnow,
1806            _ => return None,
1807        };
1808
1809        let fd = if self.lexer.tokfd >= 0 {
1810            self.lexer.tokfd
1811        } else if matches!(
1812            rtype,
1813            RedirType::Read
1814                | RedirType::ReadWrite
1815                | RedirType::MergeIn
1816                | RedirType::Heredoc
1817                | RedirType::HeredocDash
1818                | RedirType::Herestr
1819        ) {
1820            0
1821        } else {
1822            1
1823        };
1824
1825        self.lexer.zshlex();
1826
1827        let name = match self.lexer.tok {
1828            LexTok::String | LexTok::Envstring => {
1829                let n = self.lexer.tokstr.clone().unwrap_or_default();
1830                self.lexer.zshlex();
1831                n
1832            }
1833            _ => {
1834                self.error("expected word after redirection");
1835                return None;
1836            }
1837        };
1838
1839        // Heredoc body capture: when reading the terminator above, the
1840        // lexer pushed a HereDoc to self.lexer.heredocs[]. Record the
1841        // index so fill_heredoc_bodies() can wire content back after
1842        // process_heredocs() has run.
1843        let heredoc_idx = if matches!(rtype, RedirType::Heredoc | RedirType::HeredocDash) {
1844            if !self.lexer.heredocs.is_empty() {
1845                Some(self.lexer.heredocs.len() - 1)
1846            } else {
1847                None
1848            }
1849        } else {
1850            None
1851        };
1852
1853        Some(ZshRedir {
1854            rtype,
1855            fd,
1856            name,
1857            heredoc: None,
1858            varid: None,
1859            heredoc_idx,
1860        })
1861    }
1862
1863    /// Parse for/foreach loop
1864    /// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1865    /// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1866    /// of zsh/Src/parse.c:1087-1207 `par_for`. parse_for_cstyle is the
1867    /// inner branch for the `((...))` arithmetic-header variant
1868    /// (parse.c:1100-1140 inside par_for).
1869    fn parse_for(&mut self) -> Option<ZshCommand> {
1870        let is_foreach = self.lexer.tok == LexTok::Foreach;
1871        self.lexer.zshlex();
1872
1873        // Check for C-style: for (( init; cond; step ))
1874        if self.lexer.tok == LexTok::Dinpar {
1875            return self.parse_for_cstyle();
1876        }
1877
1878        // Get variable name(s). zsh parse.c par_for accepts multiple
1879        // identifier tokens before `in`/`(`/newline — `for k v in ...`
1880        // assigns each iteration's pair of values to k and v in turn.
1881        // We store the names space-joined since variable identifiers
1882        // can't contain whitespace.
1883        let mut names: Vec<String> = Vec::new();
1884        loop {
1885            match self.lexer.tok {
1886                LexTok::String => {
1887                    let v = self.lexer.tokstr.clone().unwrap_or_default();
1888                    if v == "in" {
1889                        break;
1890                    }
1891                    names.push(v);
1892                    self.lexer.zshlex();
1893                }
1894                _ => break,
1895            }
1896        }
1897        if names.is_empty() {
1898            self.error("expected variable name in for");
1899            return None;
1900        }
1901        let var = names.join(" ");
1902
1903        // Skip newlines
1904        self.skip_separators();
1905
1906        // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1907        // single String token with the parens lexed-as-content
1908        // (`<INPAR>a b c<OUTPAR>`) instead of as separate Inpar/String/
1909        // Outpar tokens. Detect that shape and split it manually.
1910        let list = if self.lexer.tok == LexTok::String
1911            && self
1912                .lexer
1913                .tokstr
1914                .as_ref()
1915                .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1916                .unwrap_or(false)
1917        {
1918            let raw = self.lexer.tokstr.clone().unwrap_or_default();
1919            // Strip leading INPAR + trailing OUTPAR, then untokenize the
1920            // inner content and split on whitespace for the word list.
1921            let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1922                ..raw
1923                    .char_indices()
1924                    .last()
1925                    .map(|(i, _)| i)
1926                    .unwrap_or(raw.len())];
1927            let cleaned = crate::lexer::untokenize(inner);
1928            let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1929            self.lexer.zshlex();
1930            ForList::Words(words)
1931        } else if self.lexer.tok == LexTok::String {
1932            let s = self.lexer.tokstr.as_ref();
1933            if s.map(|s| s == "in").unwrap_or(false) {
1934                self.lexer.zshlex();
1935                let mut words = Vec::new();
1936                let mut word_count = 0;
1937                while self.lexer.tok == LexTok::String {
1938                    word_count += 1;
1939                    if word_count > 500 || self.check_limit() {
1940                        self.error("for: too many words");
1941                        return None;
1942                    }
1943                    if let Some(ref s) = self.lexer.tokstr {
1944                        words.push(s.clone());
1945                    }
1946                    self.lexer.zshlex();
1947                }
1948                ForList::Words(words)
1949            } else {
1950                ForList::Positional
1951            }
1952        } else if self.lexer.tok == LexTok::Inpar {
1953            // for var (...)
1954            self.lexer.zshlex();
1955            let mut words = Vec::new();
1956            let mut word_count = 0;
1957            while self.lexer.tok == LexTok::String || self.lexer.tok == LexTok::Seper {
1958                word_count += 1;
1959                if word_count > 500 || self.check_limit() {
1960                    self.error("for: too many words in parens");
1961                    return None;
1962                }
1963                if self.lexer.tok == LexTok::String {
1964                    if let Some(ref s) = self.lexer.tokstr {
1965                        words.push(s.clone());
1966                    }
1967                }
1968                self.lexer.zshlex();
1969            }
1970            if self.lexer.tok == LexTok::Outpar {
1971                self.lexer.zshlex();
1972            }
1973            ForList::Words(words)
1974        } else {
1975            ForList::Positional
1976        };
1977
1978        // Skip to body
1979        self.skip_separators();
1980
1981        // Parse body
1982        let body = self.parse_loop_body(is_foreach)?;
1983
1984        Some(ZshCommand::For(ZshFor {
1985            var,
1986            list,
1987            body: Box::new(body),
1988            is_select: false,
1989        }))
1990    }
1991
1992    /// Parse C-style for loop: for (( init; cond; step ))
1993    /// Parse the c-style `for ((init; cond; incr)) do BODY done`.
1994    /// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
1995    /// Recognized when the token after FOR is DINPAR (the `((`
1996    /// detected by gettok via dbparens setup).
1997    fn parse_for_cstyle(&mut self) -> Option<ZshCommand> {
1998        // We're at (( (Dinpar None) - the opening ((
1999        // Lexer returns:
2000        //   Dinpar None     - opening ((
2001        //   Dinpar "init"   - init expression, semicolon consumed
2002        //   Dinpar "cond"   - cond expression, semicolon consumed
2003        //   Doutpar "step"  - step expression, closing )) consumed
2004
2005        self.lexer.zshlex(); // Get init: Dinpar "i=0"
2006
2007        if self.lexer.tok != LexTok::Dinpar {
2008            self.error("expected init expression in for ((");
2009            return None;
2010        }
2011        let init = self.lexer.tokstr.clone().unwrap_or_default();
2012
2013        self.lexer.zshlex(); // Get cond: Dinpar "i<10"
2014
2015        if self.lexer.tok != LexTok::Dinpar {
2016            self.error("expected condition in for ((");
2017            return None;
2018        }
2019        let cond = self.lexer.tokstr.clone().unwrap_or_default();
2020
2021        self.lexer.zshlex(); // Get step: Doutpar "i++"
2022
2023        if self.lexer.tok != LexTok::Doutpar {
2024            self.error("expected )) in for");
2025            return None;
2026        }
2027        let step = self.lexer.tokstr.clone().unwrap_or_default();
2028
2029        self.lexer.zshlex(); // Move past ))
2030
2031        self.skip_separators();
2032        let body = self.parse_loop_body(false)?;
2033
2034        Some(ZshCommand::For(ZshFor {
2035            var: String::new(),
2036            list: ForList::CStyle { init, cond, step },
2037            body: Box::new(body),
2038            is_select: false,
2039        }))
2040    }
2041
2042    /// Parse select loop (same syntax as for)
2043    /// Parse `select NAME in WORDS; do BODY; done`. Same shape as
2044    /// `for NAME in WORDS; do ...` but with menu-prompt semantics in
2045    /// the executor. C equivalent: the SELECT case in par_for at
2046    /// parse.c:1087-1207 (selects share parser flow with foreach).
2047    fn parse_select(&mut self) -> Option<ZshCommand> {
2048        // `select` shares parse_for's grammar (var, words, body) but the
2049        // compile path is different (interactive prompt loop).
2050        match self.parse_for()? {
2051            ZshCommand::For(mut f) => {
2052                f.is_select = true;
2053                Some(ZshCommand::For(f))
2054            }
2055            other => Some(other),
2056        }
2057    }
2058
2059    /// Parse case statement
2060    /// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
2061    /// of zsh/Src/parse.c:1209-1409 `par_case`. Each case arm is a
2062    /// (pattern_list, body, terminator) tuple where terminator is
2063    /// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
2064    fn parse_case(&mut self) -> Option<ZshCommand> {
2065        self.lexer.zshlex(); // skip 'case'
2066
2067        let word = match self.lexer.tok {
2068            LexTok::String => {
2069                let w = self.lexer.tokstr.clone().unwrap_or_default();
2070                self.lexer.zshlex();
2071                w
2072            }
2073            _ => {
2074                self.error("expected word after case");
2075                return None;
2076            }
2077        };
2078
2079        self.skip_separators();
2080
2081        // Expect 'in' or {
2082        let use_brace = self.lexer.tok == LexTok::Inbrace;
2083        if self.lexer.tok == LexTok::String {
2084            let s = self.lexer.tokstr.as_ref();
2085            if s.map(|s| s != "in").unwrap_or(true) {
2086                self.error("expected 'in' in case");
2087                return None;
2088            }
2089        } else if !use_brace {
2090            self.error("expected 'in' or '{' in case");
2091            return None;
2092        }
2093        // Set incasepat=1 BEFORE consuming "in" so the next token (which
2094        // could be a leading `(` of a paren-prefixed pattern like
2095        // `case foo in (a|b) …`) is lexed as Inpar, not as a glob-token.
2096        // Without this the `(` got swallowed into a gettokstr('(', false)
2097        // call and produced a String like "(foo)" — the parser then saw
2098        // the `)` inside a string instead of as a separate Outpar.
2099        self.lexer.incasepat = 1;
2100        self.lexer.zshlex();
2101
2102        let mut arms = Vec::new();
2103        const MAX_ARMS: usize = 10_000;
2104
2105        loop {
2106            if arms.len() > MAX_ARMS {
2107                self.error("parse_case: too many arms");
2108                break;
2109            }
2110
2111            // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
2112            // This affects how [ and | are lexed
2113            self.lexer.incasepat = 1;
2114
2115            self.skip_separators();
2116
2117            // Check for end
2118            // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
2119            let is_esac = self.lexer.tok == LexTok::Esac
2120                || (self.lexer.tok == LexTok::String
2121                    && self
2122                        .lexer
2123                        .tokstr
2124                        .as_ref()
2125                        .map(|s| s == "esac")
2126                        .unwrap_or(false));
2127            if (use_brace && self.lexer.tok == LexTok::Outbrace) || (!use_brace && is_esac) {
2128                self.lexer.incasepat = 0;
2129                self.lexer.zshlex();
2130                break;
2131            }
2132
2133            // Also break on EOF
2134            if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
2135                self.lexer.incasepat = 0;
2136                break;
2137            }
2138
2139            // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
2140            // The leading `(` is paired with a matching `)` that closes
2141            // the pattern itself; the arm-close `)` follows separately.
2142            // Track whether we consumed it so we can skip the matching
2143            // `)` after pattern parsing — otherwise the arm-close would
2144            // be interpreted as the pattern-close and the actual body
2145            // would get the leftover `)`.
2146            let had_leading_paren = self.lexer.tok == LexTok::Inpar;
2147            if had_leading_paren {
2148                self.lexer.zshlex();
2149            }
2150
2151            // incasepat is already set above
2152            let mut patterns = Vec::new();
2153            let mut pattern_iterations = 0;
2154            loop {
2155                pattern_iterations += 1;
2156                if pattern_iterations > 1000 {
2157                    self.error("parse_case: too many pattern iterations");
2158                    self.lexer.incasepat = 0;
2159                    return None;
2160                }
2161
2162                if self.lexer.tok == LexTok::String {
2163                    let s = self.lexer.tokstr.as_ref();
2164                    if s.map(|s| s == "esac").unwrap_or(false) {
2165                        break;
2166                    }
2167                    patterns.push(self.lexer.tokstr.clone().unwrap_or_default());
2168                    // After first pattern token, set incasepat=2 so ( is treated as part of pattern
2169                    self.lexer.incasepat = 2;
2170                    self.lexer.zshlex();
2171                } else if self.lexer.tok != LexTok::Bar {
2172                    break;
2173                }
2174
2175                if self.lexer.tok == LexTok::Bar {
2176                    // Reset to 1 (start of next alternative pattern)
2177                    self.lexer.incasepat = 1;
2178                    self.lexer.zshlex();
2179                } else {
2180                    break;
2181                }
2182            }
2183            self.lexer.incasepat = 0;
2184
2185            // Expect ).  Also handle the `(P))` wrapped-pattern form:
2186            // when a leading `(` was consumed, accept an extra `)` —
2187            // the inner `)` closes the optional-paren wrapper, the
2188            // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
2189            // (bare pattern, leading-paren is just the opt-marker, the
2190            // close is arm-close) and `(P)) BODY` (paren-wrapped
2191            // pattern, then arm-close). The first form is unambiguous
2192            // when the bare pattern was simple; the second is needed
2193            // when the body starts with `(`.
2194            if self.lexer.tok != LexTok::Outpar {
2195                self.error("expected ')' in case pattern");
2196                return None;
2197            }
2198            self.lexer.zshlex();
2199            if had_leading_paren && self.lexer.tok == LexTok::Outpar {
2200                self.lexer.zshlex();
2201            }
2202
2203            // Parse body
2204            let body = self.parse_program();
2205
2206            // Get terminator. Set incasepat=1 BEFORE the zshlex
2207            // advance so the next token (the next arm's pattern, like
2208            // `[a-z]`) gets tokenized in pattern context. Without
2209            // this, a `[`-prefixed pattern after the FIRST arm became
2210            // Inbrack instead of String and the pattern-loop bailed
2211            // out with "expected ')' in case pattern".
2212            let terminator = match self.lexer.tok {
2213                LexTok::Dsemi => {
2214                    self.lexer.incasepat = 1;
2215                    self.lexer.zshlex();
2216                    CaseTerm::Break
2217                }
2218                LexTok::Semiamp => {
2219                    self.lexer.incasepat = 1;
2220                    self.lexer.zshlex();
2221                    CaseTerm::Continue
2222                }
2223                LexTok::Semibar => {
2224                    self.lexer.incasepat = 1;
2225                    self.lexer.zshlex();
2226                    CaseTerm::TestNext
2227                }
2228                _ => CaseTerm::Break,
2229            };
2230
2231            if !patterns.is_empty() {
2232                arms.push(CaseArm {
2233                    patterns,
2234                    body,
2235                    terminator,
2236                });
2237            }
2238        }
2239
2240        Some(ZshCommand::Case(ZshCase { word, arms }))
2241    }
2242
2243    /// Parse if statement
2244    /// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
2245    /// Direct port of zsh/Src/parse.c:1411-1519 `par_if`. The C source
2246    /// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
2247    /// (cond, then_body) tuples plus an optional else_body.
2248    fn parse_if(&mut self) -> Option<ZshCommand> {
2249        self.lexer.zshlex(); // skip 'if'
2250
2251        // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
2252        let cond = Box::new(self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace])));
2253
2254        self.skip_separators();
2255
2256        // Expect 'then' or {
2257        let use_brace = self.lexer.tok == LexTok::Inbrace;
2258        if self.lexer.tok != LexTok::Then && !use_brace {
2259            self.error("expected 'then' or '{' after if condition");
2260            return None;
2261        }
2262        self.lexer.zshlex();
2263
2264        // Parse then-body - stops at else/elif/fi, or } if using brace syntax
2265        let then = if use_brace {
2266            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2267            if self.lexer.tok == LexTok::Outbrace {
2268                self.lexer.zshlex();
2269            }
2270            Box::new(body)
2271        } else {
2272            Box::new(self.parse_program_until(Some(&[LexTok::Else, LexTok::Elif, LexTok::Fi])))
2273        };
2274
2275        // Parse elif and else (only for then/fi syntax, not brace syntax)
2276        let mut elif = Vec::new();
2277        let mut else_ = None;
2278
2279        if !use_brace {
2280            loop {
2281                self.skip_separators();
2282
2283                match self.lexer.tok {
2284                    LexTok::Elif => {
2285                        self.lexer.zshlex();
2286                        // elif condition stops at 'then' or '{'
2287                        let econd =
2288                            self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace]));
2289                        self.skip_separators();
2290
2291                        let elif_use_brace = self.lexer.tok == LexTok::Inbrace;
2292                        if self.lexer.tok != LexTok::Then && !elif_use_brace {
2293                            self.error("expected 'then' after elif");
2294                            return None;
2295                        }
2296                        self.lexer.zshlex();
2297
2298                        // elif body stops at else/elif/fi or } if using braces
2299                        let ebody = if elif_use_brace {
2300                            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2301                            if self.lexer.tok == LexTok::Outbrace {
2302                                self.lexer.zshlex();
2303                            }
2304                            body
2305                        } else {
2306                            self.parse_program_until(Some(&[
2307                                LexTok::Else,
2308                                LexTok::Elif,
2309                                LexTok::Fi,
2310                            ]))
2311                        };
2312
2313                        elif.push((econd, ebody));
2314                    }
2315                    LexTok::Else => {
2316                        self.lexer.zshlex();
2317                        self.skip_separators();
2318
2319                        let else_use_brace = self.lexer.tok == LexTok::Inbrace;
2320                        if else_use_brace {
2321                            self.lexer.zshlex();
2322                        }
2323
2324                        // else body stops at 'fi' or '}'
2325                        else_ = Some(Box::new(if else_use_brace {
2326                            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2327                            if self.lexer.tok == LexTok::Outbrace {
2328                                self.lexer.zshlex();
2329                            }
2330                            body
2331                        } else {
2332                            self.parse_program_until(Some(&[LexTok::Fi]))
2333                        }));
2334
2335                        // Consume the 'fi' if present (not for brace syntax)
2336                        if !else_use_brace && self.lexer.tok == LexTok::Fi {
2337                            self.lexer.zshlex();
2338                        }
2339                        break;
2340                    }
2341                    LexTok::Fi => {
2342                        self.lexer.zshlex();
2343                        break;
2344                    }
2345                    _ => break,
2346                }
2347            }
2348        }
2349
2350        Some(ZshCommand::If(ZshIf {
2351            cond,
2352            then,
2353            elif,
2354            else_,
2355        }))
2356    }
2357
2358    /// Parse while/until loop
2359    /// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
2360    /// Direct port of zsh/Src/parse.c:1521-1563 `par_while`. The
2361    /// `until` variant is the same loop with the condition negated.
2362    fn parse_while(&mut self, until: bool) -> Option<ZshCommand> {
2363        self.lexer.zshlex(); // skip while/until
2364
2365        let cond = Box::new(self.parse_program());
2366
2367        self.skip_separators();
2368        let body = self.parse_loop_body(false)?;
2369
2370        Some(ZshCommand::While(ZshWhile {
2371            cond,
2372            body: Box::new(body),
2373            until,
2374        }))
2375    }
2376
2377    /// Parse repeat loop
2378    /// Parse `repeat N; do BODY; done`. Direct port of
2379    /// zsh/Src/parse.c:1565-1617 `par_repeat`. The C source supports
2380    /// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
2381    /// parser doesn't yet special-case that variant.
2382    fn parse_repeat(&mut self) -> Option<ZshCommand> {
2383        self.lexer.zshlex(); // skip 'repeat'
2384
2385        let count = match self.lexer.tok {
2386            LexTok::String => {
2387                let c = self.lexer.tokstr.clone().unwrap_or_default();
2388                self.lexer.zshlex();
2389                c
2390            }
2391            _ => {
2392                self.error("expected count after repeat");
2393                return None;
2394            }
2395        };
2396
2397        self.skip_separators();
2398        let body = self.parse_loop_body(false)?;
2399
2400        Some(ZshCommand::Repeat(ZshRepeat {
2401            count,
2402            body: Box::new(body),
2403        }))
2404    }
2405
2406    /// Parse loop body (do...done, {...}, or shortloop)
2407    /// Parse the `do BODY done` body of a for/while/until/select/
2408    /// repeat loop. Direct equivalent of zsh's parse.c handling
2409    /// inside the loop builders — they all consume DOLOOP, parse a
2410    /// list until DONE, and return the list. The `foreach_style`
2411    /// flag signals foreach (where short-form `for NAME in WORDS;
2412    /// CMD` may skip do/done) vs c-style (which always requires
2413    /// do/done).
2414    fn parse_loop_body(&mut self, foreach_style: bool) -> Option<ZshProgram> {
2415        if self.lexer.tok == LexTok::Doloop {
2416            self.lexer.zshlex();
2417            let body = self.parse_program();
2418            if self.lexer.tok == LexTok::Done {
2419                self.lexer.zshlex();
2420            }
2421            Some(body)
2422        } else if self.lexer.tok == LexTok::Inbrace {
2423            self.lexer.zshlex();
2424            let body = self.parse_program();
2425            if self.lexer.tok == LexTok::Outbrace {
2426                self.lexer.zshlex();
2427            }
2428            Some(body)
2429        } else if foreach_style {
2430            // foreach allows 'end' terminator
2431            let body = self.parse_program();
2432            if self.lexer.tok == LexTok::Zend {
2433                self.lexer.zshlex();
2434            }
2435            Some(body)
2436        } else {
2437            // Short loop - single command
2438            self.parse_list()
2439                .map(|list| ZshProgram { lists: vec![list] })
2440        }
2441    }
2442
2443    /// Parse (...) subshell
2444    /// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619-1670
2445    /// `par_subsh`. Body parses as a normal list; the subshell wrapper
2446    /// fork-isolates execution in the executor.
2447    fn parse_subsh(&mut self) -> Option<ZshCommand> {
2448        self.lexer.zshlex(); // skip (
2449        let prog = self.parse_program();
2450        if self.lexer.tok == LexTok::Outpar {
2451            self.lexer.zshlex();
2452        }
2453        Some(ZshCommand::Subsh(Box::new(prog)))
2454    }
2455
2456    /// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
2457    /// function named `_zshrs_anon_N`, invokes it with the args, and the
2458    /// body runs with positional params set. Implemented as the desugared
2459    /// pair (FuncDef + Simple call) so the compile path doesn't need new
2460    /// machinery.
2461    /// Parse an anonymous function definition `() { BODY }` followed
2462    /// by call args. zsh treats `() { echo hi; } a b c` as defining
2463    /// and immediately calling an anon fn with args a/b/c. C
2464    /// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
2465    /// triggers an anon-funcdef path.
2466    fn parse_anon_funcdef(&mut self) -> Option<ZshCommand> {
2467        self.lexer.zshlex(); // skip ()
2468        self.skip_separators();
2469        // No `{` after `()` → bare empty subshell shape `()`. Fall back
2470        // to a Subsh with an empty program so the status is 0 (matches
2471        // zsh's `()` no-op behavior).
2472        if self.lexer.tok != LexTok::Inbrace {
2473            return Some(ZshCommand::Subsh(Box::new(ZshProgram {
2474                lists: Vec::new(),
2475            })));
2476        }
2477        self.lexer.zshlex(); // skip {
2478        let body = self.parse_program();
2479        if self.lexer.tok == LexTok::Outbrace {
2480            self.lexer.zshlex();
2481        }
2482        // Collect any trailing args until a separator. zsh's anon-fn form
2483        // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
2484        let mut args = Vec::new();
2485        while self.lexer.tok == LexTok::String {
2486            if let Some(s) = self.lexer.tokstr.clone() {
2487                args.push(s);
2488            }
2489            self.lexer.zshlex();
2490        }
2491
2492        // Generate a unique name. Module-level static would be cleaner but
2493        // a thread-local atomic is enough — anonymous functions are
2494        // ephemeral and the name isn't user-visible.
2495        use std::sync::atomic::{AtomicUsize, Ordering};
2496        static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2497        let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2498        let name = format!("_zshrs_anon_{}", n);
2499        Some(ZshCommand::FuncDef(ZshFuncDef {
2500            names: vec![name],
2501            body: Box::new(body),
2502            tracing: false,
2503            auto_call_args: Some(args),
2504            body_source: None,
2505        }))
2506    }
2507
2508    /// Parse {...} cursh
2509    /// Parse a current-shell brace block `{ BODY }`. C source:
2510    /// par_cmd at parse.c:958-1085 handles INBRACE → emit WC_CURSH
2511    /// + recurse into list. zshrs's parse_cursh extracts that arm
2512    /// into a dedicated method.
2513    fn parse_cursh(&mut self) -> Option<ZshCommand> {
2514        self.lexer.zshlex(); // skip {
2515        let prog = self.parse_program();
2516
2517        // Check for { ... } always { ... }
2518        if self.lexer.tok == LexTok::Outbrace {
2519            self.lexer.zshlex();
2520
2521            // Check for 'always'
2522            if self.lexer.tok == LexTok::String {
2523                let s = self.lexer.tokstr.as_ref();
2524                if s.map(|s| s == "always").unwrap_or(false) {
2525                    self.lexer.zshlex();
2526                    self.skip_separators();
2527
2528                    if self.lexer.tok == LexTok::Inbrace {
2529                        self.lexer.zshlex();
2530                        let always = self.parse_program();
2531                        if self.lexer.tok == LexTok::Outbrace {
2532                            self.lexer.zshlex();
2533                        }
2534                        return Some(ZshCommand::Try(ZshTry {
2535                            try_block: Box::new(prog),
2536                            always: Box::new(always),
2537                        }));
2538                    }
2539                }
2540            }
2541        }
2542
2543        Some(ZshCommand::Cursh(Box::new(prog)))
2544    }
2545
2546    /// Parse function definition
2547    /// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
2548    /// port of zsh/Src/parse.c:1672-1785 `par_funcdef`. zsh handles
2549    /// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
2550    /// the optional `[fname1 fname2 ...]` for multi-name function defs,
2551    /// and the `function FOO () { ... }` traditional/POSIX hybrid form.
2552    fn parse_funcdef(&mut self) -> Option<ZshCommand> {
2553        self.lexer.zshlex(); // skip 'function'
2554
2555        let mut names = Vec::new();
2556        let mut tracing = false;
2557
2558        // Handle options like -T and function names. Two subtleties:
2559        //
2560        //   1. Flags: zsh's lexer encodes a leading `-` as
2561        //      `char_tokens::DASH` (\u{9b}) inside the String tokstr.
2562        //      The previous `s.starts_with('-')` check failed for
2563        //      `\u{9b}T`, so `function -T NAME { body }` slipped the
2564        //      `-T` token into `names` and the function got registered
2565        //      as `T` plus the intended `NAME`.
2566        //
2567        //   2. Body opener: zsh's lexer emits the opening `{` as a
2568        //      String (not LexTok::Inbrace) when it follows the String
2569        //      NAME — the preceding name token resets incmdpos to
2570        //      false, and only `{` immediately followed by `}` (the
2571        //      empty-body case) gets promoted to Inbrace. The funcdef
2572        //      parser must recognise the bare-`{` String as the body
2573        //      opener; otherwise `function NAME { body }` falls through
2574        //      to `_ => break`, no body parses, and the FuncDef never
2575        //      lands in the AST. This is consistent with C zsh's
2576        //      par_funcdef which knows it's in funcdef-header context
2577        //      and accepts the brace either way.
2578        loop {
2579            match self.lexer.tok {
2580                LexTok::String => {
2581                    let s = self.lexer.tokstr.as_ref()?;
2582                    if s == "{" {
2583                        // Funcdef body opener — break, body-parser branch handles it.
2584                        break;
2585                    }
2586                    let first = s.chars().next();
2587                    if matches!(first, Some('-') | Some('+'))
2588                        || matches!(first, Some(c) if c == crate::tokens::char_tokens::DASH)
2589                    {
2590                        if s.contains('T') {
2591                            tracing = true;
2592                        }
2593                        self.lexer.zshlex();
2594                        continue;
2595                    }
2596                    names.push(s.clone());
2597                    self.lexer.zshlex();
2598                }
2599                LexTok::Inbrace | LexTok::Inoutpar | LexTok::Seper | LexTok::Newlin => break,
2600                _ => break,
2601            }
2602        }
2603
2604        // Optional ()
2605        let saw_paren = self.lexer.tok == LexTok::Inoutpar;
2606        if saw_paren {
2607            self.lexer.zshlex();
2608        }
2609
2610        self.skip_separators();
2611
2612        // Body opener: real Inbrace OR a String("{") (the lexer emits
2613        // the latter after a String NAME — see comment above).
2614        let body_opener_is_string_brace = self.lexer.tok == LexTok::String
2615            && self.lexer.tokstr.as_deref() == Some("{");
2616        if self.lexer.tok == LexTok::Inbrace || body_opener_is_string_brace {
2617            // Capture body_start BEFORE the lexer advances past the
2618            // first body token. After the previous zshlex consumed
2619            // `{`, lexer.pos points just past `{` (which is where the
2620            // body source starts). The next `zshlex()` would advance
2621            // past the first token (`echo`), making body_start land
2622            // mid-body and lose the first word — `typeset -f f` would
2623            // print `a; echo b` for `{ echo a; echo b }`.
2624            let body_start = self.lexer.pos;
2625            self.lexer.zshlex();
2626            let body = self.parse_program();
2627            let body_end = if self.lexer.tok == LexTok::Outbrace {
2628                // Lexer has just consumed `}`; pos is past it. Body content
2629                // ends one byte before pos.
2630                self.lexer.pos.saturating_sub(1)
2631            } else {
2632                self.lexer.pos
2633            };
2634            let body_source = self
2635                .lexer
2636                .input
2637                .get(body_start..body_end)
2638                .map(|s| s.trim().to_string())
2639                .filter(|s| !s.is_empty());
2640            if self.lexer.tok == LexTok::Outbrace {
2641                self.lexer.zshlex();
2642            }
2643
2644            // Anonymous form `function () { body } a b c` (with `()`) or
2645            // `function { body } a b c` (zsh-only shorthand, no `()`). No
2646            // name was collected. Mirror parse_anon_funcdef: synthesize
2647            // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2648            // so compile_funcdef registers + immediately calls the
2649            // function with the args as positional params.
2650            if names.is_empty() {
2651                let mut args = Vec::new();
2652                while self.lexer.tok == LexTok::String {
2653                    if let Some(s) = self.lexer.tokstr.clone() {
2654                        args.push(s);
2655                    }
2656                    self.lexer.zshlex();
2657                }
2658                use std::sync::atomic::{AtomicUsize, Ordering};
2659                static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2660                let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2661                let name = format!("_zshrs_anon_kw_{}", n);
2662                return Some(ZshCommand::FuncDef(ZshFuncDef {
2663                    names: vec![name],
2664                    body: Box::new(body),
2665                    tracing,
2666                    auto_call_args: Some(args),
2667                    body_source,
2668                }));
2669            }
2670
2671            Some(ZshCommand::FuncDef(ZshFuncDef {
2672                names,
2673                body: Box::new(body),
2674                tracing,
2675                auto_call_args: None,
2676                body_source,
2677            }))
2678        } else {
2679            // Short form
2680            self.parse_list().map(|list| {
2681                ZshCommand::FuncDef(ZshFuncDef {
2682                    names,
2683                    body: Box::new(ZshProgram { lists: vec![list] }),
2684                    tracing,
2685                    auto_call_args: None,
2686                    body_source: None,
2687                })
2688            })
2689        }
2690    }
2691
2692    /// Parse inline function definition: name() { ... }
2693    /// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
2694    /// without the `function` keyword). The name has already been
2695    /// consumed and pushed by parse_simple before this method fires.
2696    /// C source: handled inline in par_simple's INOUTPAR-after-name
2697    /// arm (parse.c:1836-2228).
2698    fn parse_inline_funcdef(&mut self, name: String) -> Option<ZshCommand> {
2699        // Skip ()
2700        if self.lexer.tok == LexTok::Inoutpar {
2701            self.lexer.zshlex();
2702        }
2703
2704        self.skip_separators();
2705
2706        // Parse body
2707        if self.lexer.tok == LexTok::Inbrace {
2708            // Same body_start-before-zshlex fix as parse_funcdef.
2709            let body_start = self.lexer.pos;
2710            self.lexer.zshlex();
2711            let body = self.parse_program();
2712            let body_end = if self.lexer.tok == LexTok::Outbrace {
2713                self.lexer.pos.saturating_sub(1)
2714            } else {
2715                self.lexer.pos
2716            };
2717            let body_source = self
2718                .lexer
2719                .input
2720                .get(body_start..body_end)
2721                .map(|s| s.trim().to_string())
2722                .filter(|s| !s.is_empty());
2723            if self.lexer.tok == LexTok::Outbrace {
2724                self.lexer.zshlex();
2725            }
2726            Some(ZshCommand::FuncDef(ZshFuncDef {
2727                names: vec![name],
2728                body: Box::new(body),
2729                tracing: false,
2730                auto_call_args: None,
2731                body_source,
2732            }))
2733        } else {
2734            match self.parse_cmd() {
2735                Some(cmd) => {
2736                    let list = ZshList {
2737                        sublist: ZshSublist {
2738                            pipe: ZshPipe {
2739                                cmd,
2740                                next: None,
2741                                lineno: self.lexer.lineno,
2742                                merge_stderr: false,
2743                            },
2744                            next: None,
2745                            flags: SublistFlags::default(),
2746                        },
2747                        flags: ListFlags::default(),
2748                    };
2749                    Some(ZshCommand::FuncDef(ZshFuncDef {
2750                        names: vec![name],
2751                        body: Box::new(ZshProgram { lists: vec![list] }),
2752                        tracing: false,
2753                        auto_call_args: None,
2754                        body_source: None,
2755                    }))
2756                }
2757                None => None,
2758            }
2759        }
2760    }
2761
2762    /// Parse [[ ... ]] conditional
2763    /// Parse `[[ EXPR ]]` conditional expression. Direct port of
2764    /// zsh/Src/parse.c:2409-2731 `par_cond` (and helpers par_cond_1,
2765    /// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2766    /// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2767    /// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2768    /// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2769    fn parse_cond(&mut self) -> Option<ZshCommand> {
2770        self.lexer.zshlex(); // skip [[
2771                             // Empty cond `[[ ]]` is a parse error in zsh — emit the
2772                             // diagnostic and return None so the caller produces a
2773                             // non-zero exit. Without this, `[[ ]]` silently passed and
2774                             // returned exit 0.
2775        if self.lexer.tok == LexTok::Doutbrack {
2776            self.error("parse error near `]]'");
2777            self.lexer.zshlex();
2778            return None;
2779        }
2780        let cond = self.parse_cond_expr();
2781
2782        if self.lexer.tok == LexTok::Doutbrack {
2783            self.lexer.zshlex();
2784        }
2785
2786        cond.map(ZshCommand::Cond)
2787    }
2788
2789    /// Parse conditional expression
2790    /// Top of `[[ ]]` cond-expression parsing — entry to recursive
2791    /// descent (or → and → not → primary). Direct port of zsh's
2792    /// par_cond_1 at parse.c:2434-2475.
2793    fn parse_cond_expr(&mut self) -> Option<ZshCond> {
2794        self.parse_cond_or()
2795    }
2796
2797    /// Cond-expression `||` level. C: inside par_cond_1 at
2798    /// parse.c:2434-2475 (the `cond_or` ladder).
2799    fn parse_cond_or(&mut self) -> Option<ZshCond> {
2800        self.recursion_depth += 1;
2801        if self.check_recursion() {
2802            self.error("parse_cond_or: max recursion depth exceeded");
2803            self.recursion_depth -= 1;
2804            return None;
2805        }
2806
2807        let left = match self.parse_cond_and() {
2808            Some(l) => l,
2809            None => {
2810                self.recursion_depth -= 1;
2811                return None;
2812            }
2813        };
2814
2815        self.skip_cond_separators();
2816
2817        let result = if self.lexer.tok == LexTok::Dbar {
2818            self.lexer.zshlex();
2819            self.skip_cond_separators();
2820            self.parse_cond_or()
2821                .map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
2822        } else {
2823            Some(left)
2824        };
2825
2826        self.recursion_depth -= 1;
2827        result
2828    }
2829
2830    /// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
2831    fn parse_cond_and(&mut self) -> Option<ZshCond> {
2832        self.recursion_depth += 1;
2833        if self.check_recursion() {
2834            self.error("parse_cond_and: max recursion depth exceeded");
2835            self.recursion_depth -= 1;
2836            return None;
2837        }
2838
2839        let left = match self.parse_cond_not() {
2840            Some(l) => l,
2841            None => {
2842                self.recursion_depth -= 1;
2843                return None;
2844            }
2845        };
2846
2847        self.skip_cond_separators();
2848
2849        let result = if self.lexer.tok == LexTok::Damper {
2850            self.lexer.zshlex();
2851            self.skip_cond_separators();
2852            self.parse_cond_and()
2853                .map(|right| ZshCond::And(Box::new(left), Box::new(right)))
2854        } else {
2855            Some(left)
2856        };
2857
2858        self.recursion_depth -= 1;
2859        result
2860    }
2861
2862    /// Cond-expression `!` negation level. C: handled inside
2863    /// par_cond_2 at parse.c:2476-2625 via the BANG token check.
2864    fn parse_cond_not(&mut self) -> Option<ZshCond> {
2865        self.recursion_depth += 1;
2866        if self.check_recursion() {
2867            self.error("parse_cond_not: max recursion depth exceeded");
2868            self.recursion_depth -= 1;
2869            return None;
2870        }
2871
2872        self.skip_cond_separators();
2873
2874        // ! can be either LexTok::Bang or String "!"
2875        let is_not = self.lexer.tok == LexTok::Bang
2876            || (self.lexer.tok == LexTok::String
2877                && self
2878                    .lexer
2879                    .tokstr
2880                    .as_ref()
2881                    .map(|s| s == "!")
2882                    .unwrap_or(false));
2883        if is_not {
2884            self.lexer.zshlex();
2885            let inner = match self.parse_cond_not() {
2886                Some(i) => i,
2887                None => {
2888                    self.recursion_depth -= 1;
2889                    return None;
2890                }
2891            };
2892            self.recursion_depth -= 1;
2893            return Some(ZshCond::Not(Box::new(inner)));
2894        }
2895
2896        if self.lexer.tok == LexTok::Inpar {
2897            self.lexer.zshlex();
2898            self.skip_cond_separators();
2899            let inner = match self.parse_cond_expr() {
2900                Some(i) => i,
2901                None => {
2902                    self.recursion_depth -= 1;
2903                    return None;
2904                }
2905            };
2906            self.skip_cond_separators();
2907            if self.lexer.tok == LexTok::Outpar {
2908                self.lexer.zshlex();
2909            }
2910            self.recursion_depth -= 1;
2911            return Some(inner);
2912        }
2913
2914        let result = self.parse_cond_primary();
2915        self.recursion_depth -= 1;
2916        result
2917    }
2918
2919    /// Cond-expression primary: unary tests (-f, -d, ...), binary
2920    /// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
2921    /// sub-expressions. Direct port of par_cond_double / par_cond_triple
2922    /// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
2923    fn parse_cond_primary(&mut self) -> Option<ZshCond> {
2924        let s1 = match self.lexer.tok {
2925            LexTok::String => {
2926                let s = self.lexer.tokstr.clone().unwrap_or_default();
2927                self.lexer.zshlex();
2928                s
2929            }
2930            _ => return None,
2931        };
2932
2933        self.skip_cond_separators();
2934
2935        // Check for unary operator
2936        if s1.starts_with('-') && s1.len() == 2 {
2937            let s2 = match self.lexer.tok {
2938                LexTok::String => {
2939                    let s = self.lexer.tokstr.clone().unwrap_or_default();
2940                    self.lexer.zshlex();
2941                    s
2942                }
2943                _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2944            };
2945            return Some(ZshCond::Unary(s1, s2));
2946        }
2947
2948        // Check for binary operator
2949        let op = match self.lexer.tok {
2950            LexTok::String => {
2951                let s = self.lexer.tokstr.clone().unwrap_or_default();
2952                self.lexer.zshlex();
2953                s
2954            }
2955            LexTok::Inang => {
2956                self.lexer.zshlex();
2957                "<".to_string()
2958            }
2959            LexTok::Outang => {
2960                self.lexer.zshlex();
2961                ">".to_string()
2962            }
2963            _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2964        };
2965
2966        self.skip_cond_separators();
2967
2968        let s2 = match self.lexer.tok {
2969            LexTok::String => {
2970                let s = self.lexer.tokstr.clone().unwrap_or_default();
2971                self.lexer.zshlex();
2972                s
2973            }
2974            _ => return Some(ZshCond::Binary(s1, op, String::new())),
2975        };
2976
2977        if op == "=~" {
2978            Some(ZshCond::Regex(s1, s2))
2979        } else {
2980            Some(ZshCond::Binary(s1, op, s2))
2981        }
2982    }
2983
2984    fn skip_cond_separators(&mut self) {
2985        while self.lexer.tok == LexTok::Seper && {
2986            let s = self.lexer.tokstr.as_ref();
2987            s.map(|s| !s.contains(';')).unwrap_or(true)
2988        } {
2989            self.lexer.zshlex();
2990        }
2991    }
2992
2993    /// Parse (( ... )) arithmetic command
2994    /// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
2995    /// `par_dinbrack` (despite the name; the function actually handles
2996    /// DINPAR `(( ))` blocks too).
2997    fn parse_arith(&mut self) -> Option<ZshCommand> {
2998        let expr = self.lexer.tokstr.clone().unwrap_or_default();
2999        self.lexer.zshlex();
3000        Some(ZshCommand::Arith(expr))
3001    }
3002
3003    /// Parse time command
3004    /// Parse `time CMD` (POSIX time keyword). Direct port of
3005    /// zsh/Src/parse.c:1787-1808 `par_time`. The `time` keyword
3006    /// times the execution of the following pipeline / cmd.
3007    fn parse_time(&mut self) -> Option<ZshCommand> {
3008        self.lexer.zshlex(); // skip 'time'
3009
3010        // Check if there's a pipeline to time
3011        if self.lexer.tok == LexTok::Seper
3012            || self.lexer.tok == LexTok::Newlin
3013            || self.lexer.tok == LexTok::Endinput
3014        {
3015            Some(ZshCommand::Time(None))
3016        } else {
3017            let sublist = self.parse_sublist();
3018            Some(ZshCommand::Time(sublist.map(Box::new)))
3019        }
3020    }
3021
3022    /// Check if next token is ()
3023    fn peek_inoutpar(&mut self) -> bool {
3024        self.lexer.tok == LexTok::Inoutpar
3025    }
3026
3027    /// Skip separator tokens
3028    fn skip_separators(&mut self) {
3029        let mut iterations = 0;
3030        while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
3031            iterations += 1;
3032            if iterations > 100_000 {
3033                self.error("skip_separators: too many iterations");
3034                return;
3035            }
3036            self.lexer.zshlex();
3037        }
3038    }
3039
3040    /// Record an error
3041    fn error(&mut self, msg: &str) {
3042        self.errors.push(ParseError {
3043            message: msg.to_string(),
3044            line: self.lexer.lineno,
3045        });
3046    }
3047}
3048
3049#[cfg(test)]
3050mod tests {
3051    use super::*;
3052
3053    fn parse(input: &str) -> Result<ZshProgram, Vec<ParseError>> {
3054        let mut parser = ZshParser::new(input);
3055        parser.parse()
3056    }
3057
3058    #[test]
3059    fn test_simple_command() {
3060        let prog = parse("echo hello world").unwrap();
3061        assert_eq!(prog.lists.len(), 1);
3062        match &prog.lists[0].sublist.pipe.cmd {
3063            ZshCommand::Simple(s) => {
3064                assert_eq!(s.words, vec!["echo", "hello", "world"]);
3065            }
3066            _ => panic!("expected simple command"),
3067        }
3068    }
3069
3070    #[test]
3071    fn test_pipeline() {
3072        let prog = parse("ls | grep foo | wc -l").unwrap();
3073        assert_eq!(prog.lists.len(), 1);
3074
3075        let pipe = &prog.lists[0].sublist.pipe;
3076        assert!(pipe.next.is_some());
3077
3078        let pipe2 = pipe.next.as_ref().unwrap();
3079        assert!(pipe2.next.is_some());
3080    }
3081
3082    #[test]
3083    fn test_and_or() {
3084        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
3085        let sublist = &prog.lists[0].sublist;
3086
3087        assert!(sublist.next.is_some());
3088        let (op, _) = sublist.next.as_ref().unwrap();
3089        assert_eq!(*op, SublistOp::And);
3090    }
3091
3092    #[test]
3093    fn test_if_then() {
3094        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
3095        match &prog.lists[0].sublist.pipe.cmd {
3096            ZshCommand::If(_) => {}
3097            _ => panic!("expected if command"),
3098        }
3099    }
3100
3101    #[test]
3102    fn test_for_loop() {
3103        let prog = parse("for i in a b c; do echo $i; done").unwrap();
3104        match &prog.lists[0].sublist.pipe.cmd {
3105            ZshCommand::For(f) => {
3106                assert_eq!(f.var, "i");
3107                match &f.list {
3108                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
3109                    _ => panic!("expected word list"),
3110                }
3111            }
3112            _ => panic!("expected for command"),
3113        }
3114    }
3115
3116    #[test]
3117    fn test_case() {
3118        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
3119        match &prog.lists[0].sublist.pipe.cmd {
3120            ZshCommand::Case(c) => {
3121                assert_eq!(c.arms.len(), 2);
3122            }
3123            _ => panic!("expected case command"),
3124        }
3125    }
3126
3127    #[test]
3128    fn test_function() {
3129        // First test just parsing "function foo" to see what happens
3130        let prog = parse("function foo { }").unwrap();
3131        match &prog.lists[0].sublist.pipe.cmd {
3132            ZshCommand::FuncDef(f) => {
3133                assert_eq!(f.names, vec!["foo"]);
3134            }
3135            _ => panic!(
3136                "expected function, got {:?}",
3137                prog.lists[0].sublist.pipe.cmd
3138            ),
3139        }
3140    }
3141
3142    #[test]
3143    fn test_redirection() {
3144        let prog = parse("echo hello > file.txt").unwrap();
3145        match &prog.lists[0].sublist.pipe.cmd {
3146            ZshCommand::Simple(s) => {
3147                assert_eq!(s.redirs.len(), 1);
3148                assert_eq!(s.redirs[0].rtype, RedirType::Write);
3149            }
3150            _ => panic!("expected simple command"),
3151        }
3152    }
3153
3154    #[test]
3155    fn test_assignment() {
3156        let prog = parse("FOO=bar echo $FOO").unwrap();
3157        match &prog.lists[0].sublist.pipe.cmd {
3158            ZshCommand::Simple(s) => {
3159                assert_eq!(s.assigns.len(), 1);
3160                assert_eq!(s.assigns[0].name, "FOO");
3161            }
3162            _ => panic!("expected simple command"),
3163        }
3164    }
3165
3166    #[test]
3167    fn test_parse_completion_function() {
3168        let input = r#"_2to3_fixes() {
3169  local -a fixes
3170  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3171  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3172}"#;
3173        let result = parse(input);
3174        assert!(
3175            result.is_ok(),
3176            "Failed to parse completion function: {:?}",
3177            result.err()
3178        );
3179        let prog = result.unwrap();
3180        assert!(
3181            !prog.lists.is_empty(),
3182            "Expected at least one list in program"
3183        );
3184    }
3185
3186    #[test]
3187    fn test_parse_array_with_complex_elements() {
3188        let input = r#"arguments=(
3189  '(- * :)'{-h,--help}'[show this help message and exit]'
3190  {-d,--doctests_only}'[fix up doctests only]'
3191  '*:filename:_files'
3192)"#;
3193        let result = parse(input);
3194        assert!(
3195            result.is_ok(),
3196            "Failed to parse array assignment: {:?}",
3197            result.err()
3198        );
3199    }
3200
3201    #[test]
3202    fn test_parse_full_completion_file() {
3203        let input = r##"#compdef 2to3
3204
3205# zsh completions for '2to3'
3206
3207_2to3_fixes() {
3208  local -a fixes
3209  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3210  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3211}
3212
3213local -a arguments
3214
3215arguments=(
3216  '(- * :)'{-h,--help}'[show this help message and exit]'
3217  {-d,--doctests_only}'[fix up doctests only]'
3218  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
3219  {-j,--processes}'[run 2to3 concurrently]:number: '
3220  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
3221  {-l,--list-fixes}'[list available transformations]'
3222  {-p,--print-function}'[modify the grammar so that print() is a function]'
3223  {-v,--verbose}'[more verbose logging]'
3224  '--no-diffs[do not show diffs of the refactoring]'
3225  {-w,--write}'[write back modified files]'
3226  {-n,--nobackups}'[do not write backups for modified files]'
3227  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
3228  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
3229  '--add-suffix[append this string to all output filenames]:suffix: '
3230  '*:filename:_files'
3231)
3232
3233_arguments -s -S $arguments
3234"##;
3235        let result = parse(input);
3236        assert!(
3237            result.is_ok(),
3238            "Failed to parse full completion file: {:?}",
3239            result.err()
3240        );
3241        let prog = result.unwrap();
3242        // Should have parsed successfully with at least one statement
3243        assert!(!prog.lists.is_empty(), "Expected at least one list");
3244    }
3245
3246    #[test]
3247    fn test_parse_logs_sh() {
3248        let input = r#"#!/usr/bin/env bash
3249shopt -s globstar
3250
3251if [[ $(uname) == Darwin ]]; then
3252    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
3253else
3254    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
3255        tail -f /var/log/**/*.log | lolcat
3256    else
3257        printf "Unsupported...\n" >&2
3258    fi
3259fi
3260"#;
3261        let result = parse(input);
3262        assert!(
3263            result.is_ok(),
3264            "Failed to parse logs.sh: {:?}",
3265            result.err()
3266        );
3267    }
3268
3269    #[test]
3270    fn test_parse_case_with_glob() {
3271        let input = r#"case "$ZPWR_OS_TYPE" in
3272    darwin*)  open_cmd='open'
3273      ;;
3274    cygwin*)  open_cmd='cygstart'
3275      ;;
3276    linux*)
3277        open_cmd='xdg-open'
3278      ;;
3279esac"#;
3280        let result = parse(input);
3281        assert!(
3282            result.is_ok(),
3283            "Failed to parse case with glob: {:?}",
3284            result.err()
3285        );
3286    }
3287
3288    #[test]
3289    fn test_parse_case_with_nested_if() {
3290        // Test case with nested if and glob patterns
3291        let input = r##"function zpwrGetOpenCommand(){
3292    local open_cmd
3293    case "$ZPWR_OS_TYPE" in
3294        darwin*)  open_cmd='open' ;;
3295        cygwin*)  open_cmd='cygstart' ;;
3296        linux*)
3297            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
3298                open_cmd='nohup xdg-open'
3299            fi
3300            ;;
3301    esac
3302}"##;
3303        let result = parse(input);
3304        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
3305    }
3306
3307    #[test]
3308    fn test_parse_zpwr_scripts() {
3309        use std::fs;
3310        use std::path::Path;
3311        use std::sync::mpsc;
3312        use std::thread;
3313        use std::time::{Duration, Instant};
3314
3315        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
3316        if !scripts_dir.exists() {
3317            eprintln!("Skipping test: scripts directory not found");
3318            return;
3319        }
3320
3321        let mut total = 0;
3322        let mut passed = 0;
3323        let mut failed_files = Vec::new();
3324        let mut timeout_files = Vec::new();
3325
3326        for ext in &["sh", "zsh"] {
3327            let pattern = scripts_dir.join(format!("*.{}", ext));
3328            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
3329                for entry in entries.flatten() {
3330                    total += 1;
3331                    let file_path = entry.display().to_string();
3332                    let content = match fs::read_to_string(&entry) {
3333                        Ok(c) => c,
3334                        Err(e) => {
3335                            failed_files.push((file_path, format!("read error: {}", e)));
3336                            continue;
3337                        }
3338                    };
3339
3340                    // Parse with timeout
3341                    let content_clone = content.clone();
3342                    let (tx, rx) = mpsc::channel();
3343                    let handle = thread::spawn(move || {
3344                        let result = parse(&content_clone);
3345                        let _ = tx.send(result);
3346                    });
3347
3348                    match rx.recv_timeout(Duration::from_secs(2)) {
3349                        Ok(Ok(_)) => passed += 1,
3350                        Ok(Err(errors)) => {
3351                            let first_err = errors
3352                                .first()
3353                                .map(|e| format!("line {}: {}", e.line, e.message))
3354                                .unwrap_or_default();
3355                            failed_files.push((file_path, first_err));
3356                        }
3357                        Err(_) => {
3358                            timeout_files.push(file_path);
3359                            // Thread will be abandoned
3360                        }
3361                    }
3362                }
3363            }
3364        }
3365
3366        eprintln!("\n=== ZPWR Scripts Parse Results ===");
3367        eprintln!("Passed: {}/{}", passed, total);
3368
3369        if !timeout_files.is_empty() {
3370            eprintln!("\nTimeout files (>2s):");
3371            for file in &timeout_files {
3372                eprintln!("  {}", file);
3373            }
3374        }
3375
3376        if !failed_files.is_empty() {
3377            eprintln!("\nFailed files:");
3378            for (file, err) in &failed_files {
3379                eprintln!("  {} - {}", file, err);
3380            }
3381        }
3382
3383        // Allow some failures initially, but track progress
3384        let pass_rate = if total > 0 {
3385            (passed as f64 / total as f64) * 100.0
3386        } else {
3387            0.0
3388        };
3389        eprintln!("Pass rate: {:.1}%", pass_rate);
3390
3391        // Require at least 50% pass rate for now
3392        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3393    }
3394
3395    #[test]
3396    #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
3397    fn test_parse_zsh_stdlib_functions() {
3398        use std::fs;
3399        use std::path::Path;
3400        use std::sync::mpsc;
3401        use std::thread;
3402        use std::time::Duration;
3403
3404        let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
3405        if !functions_dir.exists() {
3406            eprintln!(
3407                "Skipping test: zsh_functions directory not found at {:?}",
3408                functions_dir
3409            );
3410            return;
3411        }
3412
3413        let mut total = 0;
3414        let mut passed = 0;
3415        let mut failed_files = Vec::new();
3416        let mut timeout_files = Vec::new();
3417
3418        if let Ok(entries) = fs::read_dir(&functions_dir) {
3419            for entry in entries.flatten() {
3420                let path = entry.path();
3421                if !path.is_file() {
3422                    continue;
3423                }
3424
3425                total += 1;
3426                let file_path = path.display().to_string();
3427                let content = match fs::read_to_string(&path) {
3428                    Ok(c) => c,
3429                    Err(e) => {
3430                        failed_files.push((file_path, format!("read error: {}", e)));
3431                        continue;
3432                    }
3433                };
3434
3435                // Parse with timeout
3436                let content_clone = content.clone();
3437                let (tx, rx) = mpsc::channel();
3438                thread::spawn(move || {
3439                    let result = parse(&content_clone);
3440                    let _ = tx.send(result);
3441                });
3442
3443                match rx.recv_timeout(Duration::from_secs(2)) {
3444                    Ok(Ok(_)) => passed += 1,
3445                    Ok(Err(errors)) => {
3446                        let first_err = errors
3447                            .first()
3448                            .map(|e| format!("line {}: {}", e.line, e.message))
3449                            .unwrap_or_default();
3450                        failed_files.push((file_path, first_err));
3451                    }
3452                    Err(_) => {
3453                        timeout_files.push(file_path);
3454                    }
3455                }
3456            }
3457        }
3458
3459        eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
3460        eprintln!("Passed: {}/{}", passed, total);
3461
3462        if !timeout_files.is_empty() {
3463            eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
3464            for file in timeout_files.iter().take(10) {
3465                eprintln!("  {}", file);
3466            }
3467            if timeout_files.len() > 10 {
3468                eprintln!("  ... and {} more", timeout_files.len() - 10);
3469            }
3470        }
3471
3472        if !failed_files.is_empty() {
3473            eprintln!("\nFailed files: {}", failed_files.len());
3474            for (file, err) in failed_files.iter().take(20) {
3475                let filename = Path::new(file)
3476                    .file_name()
3477                    .unwrap_or_default()
3478                    .to_string_lossy();
3479                eprintln!("  {} - {}", filename, err);
3480            }
3481            if failed_files.len() > 20 {
3482                eprintln!("  ... and {} more", failed_files.len() - 20);
3483            }
3484        }
3485
3486        let pass_rate = if total > 0 {
3487            (passed as f64 / total as f64) * 100.0
3488        } else {
3489            0.0
3490        };
3491        eprintln!("Pass rate: {:.1}%", pass_rate);
3492
3493        // Require at least 50% pass rate
3494        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3495    }
3496}
zshrs_parse/parser.rs

zshrs_parse/
parser.rs