zshrs_parse/
parser.rs

1//! Zsh parser - Direct port from zsh/Src/parse.c
2//!
3//! This parser takes tokens from the ZshLexer and builds an AST.
4//! It follows the zsh grammar closely, producing structures that
5//! can be executed by the shell executor.
6
7use crate::lexer::ZshLexer;
8use crate::tokens::LexTok;
9use serde::{Deserialize, Serialize};
10
11/// AST node for a complete program (list of commands)
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ZshProgram {
14    pub lists: Vec<ZshList>,
15}
16
17/// A list is a sequence of sublists separated by ; or & or newline
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ZshList {
20    pub sublist: ZshSublist,
21    pub flags: ListFlags,
22}
23
24#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
25pub struct ListFlags {
26    /// Run asynchronously (&)
27    pub async_: bool,
28    /// Disown after running (&| or &!)
29    pub disown: bool,
30}
31
32/// A sublist is pipelines connected by && or ||
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ZshSublist {
35    pub pipe: ZshPipe,
36    pub next: Option<(SublistOp, Box<ZshSublist>)>,
37    pub flags: SublistFlags,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum SublistOp {
42    And, // &&
43    Or,  // ||
44}
45
46#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
47pub struct SublistFlags {
48    /// Coproc
49    pub coproc: bool,
50    /// Negated with !
51    pub not: bool,
52}
53
54/// A pipeline is commands connected by |
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ZshPipe {
57    pub cmd: ZshCommand,
58    pub next: Option<Box<ZshPipe>>,
59    pub lineno: u64,
60    /// `|&` between this stage and the next — merge stderr into the
61    /// pipe so the next stage's stdin sees both stdout AND stderr from
62    /// this stage. When `next` is None this flag is meaningless.
63    #[serde(default)]
64    pub merge_stderr: bool,
65}
66
67/// A command
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum ZshCommand {
70    Simple(ZshSimple),
71    Subsh(Box<ZshProgram>), // (list)
72    Cursh(Box<ZshProgram>), // {list}
73    For(ZshFor),
74    Case(ZshCase),
75    If(ZshIf),
76    While(ZshWhile),
77    Until(ZshWhile),
78    Repeat(ZshRepeat),
79    FuncDef(ZshFuncDef),
80    Time(Option<Box<ZshSublist>>),
81    Cond(ZshCond), // [[ ... ]]
82    Arith(String), // (( ... ))
83    Try(ZshTry),   // { ... } always { ... }
84    /// Compound command with trailing redirects:
85    /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
86    /// Simple commands carry redirects in their own struct; this wrapper
87    /// is only used for compound forms.
88    Redirected(Box<ZshCommand>, Vec<ZshRedir>),
89}
90
91/// A simple command (assignments, words, redirections)
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ZshSimple {
94    pub assigns: Vec<ZshAssign>,
95    pub words: Vec<String>,
96    pub redirs: Vec<ZshRedir>,
97}
98
99/// An assignment
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ZshAssign {
102    pub name: String,
103    pub value: ZshAssignValue,
104    pub append: bool, // +=
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum ZshAssignValue {
109    Scalar(String),
110    Array(Vec<String>),
111}
112
113/// A redirection
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshRedir {
116    pub rtype: RedirType,
117    pub fd: i32,
118    pub name: String,
119    pub heredoc: Option<HereDocInfo>,
120    pub varid: Option<String>, // {var}>file
121    /// Index into ZshLexer.heredocs[] for body lookup. Filled in by
122    /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
123    /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
124    /// has run for the line.
125    #[serde(skip)]
126    pub heredoc_idx: Option<usize>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HereDocInfo {
131    pub content: String,
132    pub terminator: String,
133    /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true the
134    /// body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
135    /// expansion. Plain `<<EOF` runs all expansions.
136    #[serde(default)]
137    pub quoted: bool,
138}
139
140/// Redirection type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
142pub enum RedirType {
143    Write,        // >
144    Writenow,     // >|
145    Append,       // >>
146    Appendnow,    // >>|
147    Read,         // <
148    ReadWrite,    // <>
149    Heredoc,      // <<
150    HeredocDash,  // <<-
151    Herestr,      // <<<
152    MergeIn,      // <&
153    MergeOut,     // >&
154    ErrWrite,     // &>
155    ErrWritenow,  // &>|
156    ErrAppend,    // >>&
157    ErrAppendnow, // >>&|
158    InPipe,       // < <(...)
159    OutPipe,      // > >(...)
160}
161
162/// For loop
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct ZshFor {
165    pub var: String,
166    pub list: ForList,
167    pub body: Box<ZshProgram>,
168    /// True if this was parsed as `select` rather than `for`. Both share
169    /// the same parser, so the compiler routes on this flag.
170    #[serde(default)]
171    pub is_select: bool,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub enum ForList {
176    Words(Vec<String>),
177    CStyle {
178        init: String,
179        cond: String,
180        step: String,
181    },
182    Positional,
183}
184
185/// Case statement
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ZshCase {
188    pub word: String,
189    pub arms: Vec<CaseArm>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct CaseArm {
194    pub patterns: Vec<String>,
195    pub body: ZshProgram,
196    pub terminator: CaseTerm,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CaseTerm {
201    Break,    // ;;
202    Continue, // ;&
203    TestNext, // ;|
204}
205
206/// If statement
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct ZshIf {
209    pub cond: Box<ZshProgram>,
210    pub then: Box<ZshProgram>,
211    pub elif: Vec<(ZshProgram, ZshProgram)>,
212    pub else_: Option<Box<ZshProgram>>,
213}
214
215/// While/Until loop
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ZshWhile {
218    pub cond: Box<ZshProgram>,
219    pub body: Box<ZshProgram>,
220    pub until: bool,
221}
222
223/// Repeat loop
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ZshRepeat {
226    pub count: String,
227    pub body: Box<ZshProgram>,
228}
229
230/// Function definition
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct ZshFuncDef {
233    pub names: Vec<String>,
234    pub body: Box<ZshProgram>,
235    pub tracing: bool,
236    /// Anonymous-function call args. `() { body } a b` parses as a
237    /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
238    /// compile_funcdef registers the function then emits a Simple call
239    /// with these args.
240    #[serde(default)]
241    pub auto_call_args: Option<Vec<String>>,
242    /// Original source text of the function body (the bytes between
243    /// `{` and `}`, without the braces themselves), captured at parse
244    /// time. Populated for `function name { body }` and `function name() { body }`
245    /// forms; left None for the synthesized inline-funcdef recovery
246    /// path. ZshCompiler::compile_funcdef forwards it to
247    /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
248    /// `${functions[name]}`) has canonical source text.
249    #[serde(default)]
250    pub body_source: Option<String>,
251}
252
253/// Conditional expression [[ ... ]]
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub enum ZshCond {
256    Not(Box<ZshCond>),
257    And(Box<ZshCond>, Box<ZshCond>),
258    Or(Box<ZshCond>, Box<ZshCond>),
259    Unary(String, String),          // -f file, -n str, etc.
260    Binary(String, String, String), // str = pat, a -eq b, etc.
261    Regex(String, String),          // str =~ regex
262}
263
264/// Try/always block
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct ZshTry {
267    pub try_block: Box<ZshProgram>,
268    pub always: Box<ZshProgram>,
269}
270
271/// Zsh parameter expansion flags
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub enum ZshParamFlag {
274    Lower,                 // L - lowercase
275    Upper,                 // U - uppercase
276    Capitalize,            // C - capitalize words
277    Join(String),          // j:sep: - join array with separator
278    JoinNewline,           // F - join with newlines
279    Split(String),         // s:sep: - split string into array
280    SplitLines,            // f - split on newlines
281    SplitWords,            // z - split into words (shell parsing)
282    Type,                  // t - type of variable
283    Words,                 // w - word splitting
284    Quote,                 // qq - single-quote always
285    QuoteIfNeeded,         // q+ - single-quote only if needed
286    DoubleQuote,           // qqq - double-quote
287    DollarQuote,           // qqqq - $'...' style
288    QuoteBackslash,        // q / b / B - backslash-escape special chars
289    Unique,                // u - unique elements only
290    Reverse,               // O - reverse sort
291    Sort,                  // o - sort
292    NumericSort,           // n - numeric sort
293    IndexSort,             // a - sort in array index order
294    Keys,                  // k - associative array keys
295    Values,                // v - associative array values
296    Length,                // # - length (character codes)
297    CountChars,            // c - count total characters
298    Expand,                // e - perform shell expansions
299    PromptExpand,          // % - expand prompt escapes
300    PromptExpandFull,      // %% - full prompt expansion
301    Visible,               // V - make non-printable chars visible
302    Directory,             // D - substitute directory names
303    Head(usize),           // [1,n] - first n elements
304    Tail(usize),           // [-n,-1] - last n elements
305    PadLeft(usize, char),  // l:len:fill: - pad left
306    PadRight(usize, char), // r:len:fill: - pad right
307    Width(usize),          // m - use width for padding
308    Match,                 // M - include matched portion
309    Remove,                // R - include non-matched portion (complement of M)
310    Subscript,             // S - subscript scanning
311    Parameter,             // P - use value as parameter name (indirection)
312    Glob,                  // ~ - glob patterns in pattern
313    /// `@` flag — force array-context behavior even inside DQ. zsh's
314    /// `"${(@o)arr}"` keeps the sort active and splices each element as
315    /// its own word. Without this, the array-only flags became no-ops
316    /// in DQ.
317    At,
318}
319
320/// List operator (for shell command lists)
321#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
322pub enum ListOp {
323    And,     // &&
324    Or,      // ||
325    Semi,    // ;
326    Amp,     // &
327    Newline, // \n
328}
329
330/// Shell word - can be simple literal or complex expansion
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum ShellWord {
333    /// Plain text token. Most ZWC-decoded words land here. Goes through
334    /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
335    /// final output.
336    Literal(String),
337    /// Concatenation of sub-words. ZWC array decoding produces this with
338    /// child Literals; nothing else constructs it now that the legacy
339    /// hand-rolled parser is gone.
340    Concat(Vec<ShellWord>),
341}
342
343/// Variable modifier for parameter expansion
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub enum VarModifier {
346    Default(ShellWord),
347    DefaultAssign(ShellWord),
348    Error(ShellWord),
349    Alternate(ShellWord),
350    Length,
351    Substring(i64, Option<i64>),
352    RemovePrefix(ShellWord),
353    RemovePrefixLong(ShellWord),
354    RemoveSuffix(ShellWord),
355    RemoveSuffixLong(ShellWord),
356    Replace(ShellWord, ShellWord),
357    ReplaceAll(ShellWord, ShellWord),
358    Upper,
359    Lower,
360}
361
362/// Shell command - the old shell_ast compatible type
363#[derive(Debug, Clone, Serialize, Deserialize)]
364pub enum ShellCommand {
365    Simple(SimpleCommand),
366    Pipeline(Vec<ShellCommand>, bool),
367    List(Vec<(ShellCommand, ListOp)>),
368    Compound(CompoundCommand),
369    FunctionDef(String, Box<ShellCommand>),
370}
371
372/// Simple command with assignments, words, and redirects
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct SimpleCommand {
375    pub assignments: Vec<(String, ShellWord, bool)>,
376    pub words: Vec<ShellWord>,
377    pub redirects: Vec<Redirect>,
378}
379
380/// Redirect
381#[derive(Debug, Clone, Serialize, Deserialize)]
382pub struct Redirect {
383    pub fd: Option<i32>,
384    pub op: RedirectOp,
385    pub target: ShellWord,
386    pub heredoc_content: Option<String>,
387    pub fd_var: Option<String>,
388}
389
390/// Redirect operator
391#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
392pub enum RedirectOp {
393    Write,
394    Append,
395    Read,
396    ReadWrite,
397    Clobber,
398    DupRead,
399    DupWrite,
400    HereDoc,
401    HereString,
402    WriteBoth,
403    AppendBoth,
404}
405
406/// Compound command
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub enum CompoundCommand {
409    BraceGroup(Vec<ShellCommand>),
410    Subshell(Vec<ShellCommand>),
411    If {
412        conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
413        else_part: Option<Vec<ShellCommand>>,
414    },
415    For {
416        var: String,
417        words: Option<Vec<ShellWord>>,
418        body: Vec<ShellCommand>,
419    },
420    ForArith {
421        init: String,
422        cond: String,
423        step: String,
424        body: Vec<ShellCommand>,
425    },
426    While {
427        condition: Vec<ShellCommand>,
428        body: Vec<ShellCommand>,
429    },
430    Until {
431        condition: Vec<ShellCommand>,
432        body: Vec<ShellCommand>,
433    },
434    Case {
435        word: ShellWord,
436        cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
437    },
438    Select {
439        var: String,
440        words: Option<Vec<ShellWord>>,
441        body: Vec<ShellCommand>,
442    },
443    Coproc {
444        name: Option<String>,
445        body: Box<ShellCommand>,
446    },
447    /// repeat N do ... done
448    Repeat {
449        count: String,
450        body: Vec<ShellCommand>,
451    },
452    /// { try-block } always { always-block }
453    Try {
454        try_body: Vec<ShellCommand>,
455        always_body: Vec<ShellCommand>,
456    },
457    Arith(String),
458    WithRedirects(Box<ShellCommand>, Vec<Redirect>),
459}
460
461/// Case terminator
462#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
463pub enum CaseTerminator {
464    Break,
465    Fallthrough,
466    Continue,
467}
468
469/// Parse errors
470#[derive(Debug, Clone, Serialize, Deserialize)]
471pub struct ParseError {
472    pub message: String,
473    pub line: u64,
474}
475
476impl std::fmt::Display for ParseError {
477    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478        write!(f, "parse error at line {}: {}", self.line, self.message)
479    }
480}
481
482impl std::error::Error for ParseError {}
483
484/// The Zsh Parser
485pub struct ZshParser<'a> {
486    lexer: ZshLexer<'a>,
487    errors: Vec<ParseError>,
488    /// Global iteration counter to prevent infinite loops
489    global_iterations: usize,
490    /// Recursion depth counter to prevent stack overflow
491    recursion_depth: usize,
492}
493
494const MAX_RECURSION_DEPTH: usize = 500;
495
496/// Saved parse context. Direct port of zsh's `struct parse_stack`
497/// declared in zsh/Src/zsh.h and used by parse.c:295-355
498/// (`parse_context_save` / `parse_context_restore`). Pushes per-
499/// parse-call state so a nested parse (e.g. inside command
500/// substitution) doesn't clobber the outer parse.
501///
502/// zshrs port note: zsh's parse_stack tracks wordcode-buffer state
503/// (ecbuf, eclen, ecused, ecnpats, ecstrs, ecsoffs, ecssub, ecnfunc).
504/// zshrs builds AST trees instead so those fields collapse to a
505/// recursion_depth + global_iterations save. The lexer-side fields
506/// (incmdpos, incond, etc.) live on ZshLexer here so they get saved
507/// via the lexer's own `LexStack` rather than being duplicated here.
508#[derive(Debug, Default, Clone)]
509pub struct ParseStack {
510    pub recursion_depth: usize,
511    pub global_iterations: usize,
512}
513
514/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
515/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
516/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
517/// during scanning (in source order).
518fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
519    for list in &mut prog.lists {
520        fill_in_sublist(&mut list.sublist, bodies);
521    }
522}
523
524fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
525    fill_in_pipe(&mut sub.pipe, bodies);
526    if let Some(next) = &mut sub.next {
527        fill_in_sublist(&mut next.1, bodies);
528    }
529}
530
531fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
532    fill_in_command(&mut pipe.cmd, bodies);
533    if let Some(next) = &mut pipe.next {
534        fill_in_pipe(next, bodies);
535    }
536}
537
538fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
539    match cmd {
540        ZshCommand::Simple(s) => {
541            for r in &mut s.redirs {
542                resolve_redir(r, bodies);
543            }
544        }
545        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
546        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
547        ZshCommand::If(i) => {
548            fill_heredoc_bodies(&mut i.cond, bodies);
549            fill_heredoc_bodies(&mut i.then, bodies);
550            for (c, b) in &mut i.elif {
551                fill_heredoc_bodies(c, bodies);
552                fill_heredoc_bodies(b, bodies);
553            }
554            if let Some(e) = &mut i.else_ {
555                fill_heredoc_bodies(e, bodies);
556            }
557        }
558        ZshCommand::While(w) | ZshCommand::Until(w) => {
559            fill_heredoc_bodies(&mut w.cond, bodies);
560            fill_heredoc_bodies(&mut w.body, bodies);
561        }
562        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
563        ZshCommand::Case(c) => {
564            for arm in &mut c.arms {
565                fill_heredoc_bodies(&mut arm.body, bodies);
566            }
567        }
568        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
569        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
570        ZshCommand::Try(t) => {
571            fill_heredoc_bodies(&mut t.try_block, bodies);
572            fill_heredoc_bodies(&mut t.always, bodies);
573        }
574        ZshCommand::Redirected(inner, redirs) => {
575            for r in redirs {
576                resolve_redir(r, bodies);
577            }
578            fill_in_command(inner, bodies);
579        }
580        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
581    }
582}
583
584fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
585    if let Some(idx) = r.heredoc_idx {
586        if let Some(info) = bodies.get(idx) {
587            r.heredoc = Some(info.clone());
588        }
589    }
590}
591
592/// If `list` is a Simple containing one word that ends in the
593/// `<INPAR><OUTPAR>` token pair (the lexer-port encoding of `()`),
594/// return the bare name. Used by `parse_program_until` to detect
595/// `name() {body}` style function definitions where the lexer
596/// hasn't split the `()` from the name.
597/// Detect the `name() …` shape inside a Simple. Returns the function
598/// name and (when the body was already inlined into the same Simple,
599/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
600/// Returns None for non-funcdef shapes.
601fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
602    if list.flags.async_ || list.sublist.next.is_some() {
603        return None;
604    }
605    let pipe = &list.sublist.pipe;
606    if pipe.next.is_some() {
607        return None;
608    }
609    let simple = match &pipe.cmd {
610        ZshCommand::Simple(s) => s,
611        _ => return None,
612    };
613    if simple.words.is_empty() || !simple.assigns.is_empty() {
614        return None;
615    }
616    let suffix = "\u{88}\u{8a}"; // INPAR + OUTPAR
617                                 // Find the FIRST word ending in `()`. zsh accepts the
618                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
619                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
620                                 // words[i] is `lastname()`. Words after are the body argv
621                                 // (one-line shorthand, `name() cmd args`).
622    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
623    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
624    for w in &simple.words[..par_idx] {
625        // Earlier names must be bare identifiers, NOT contain
626        // tokens that imply they're not function names (no `()`,
627        // no quotes, no expansions). zsh's lexer enforces this
628        // at the wordlist level; we approximate by requiring the
629        // word be an identifier-shaped token after untokenize.
630        let bare = crate::lexer::untokenize(w);
631        let valid = !bare.is_empty()
632            && bare
633                .chars()
634                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
635        if !valid {
636            return None;
637        }
638        names.push(bare);
639    }
640    let last = &simple.words[par_idx];
641    let bare = &last[..last.len() - suffix.len()];
642    if bare.is_empty() {
643        return None;
644    }
645    names.push(crate::lexer::untokenize(bare));
646    let rest = simple.words[par_idx + 1..].to_vec();
647    Some((names, rest))
648}
649
650impl<'a> ZshParser<'a> {
651    /// Create a new parser
652    pub fn new(input: &'a str) -> Self {
653        ZshParser {
654            lexer: ZshLexer::new(input),
655            errors: Vec::new(),
656            global_iterations: 0,
657            recursion_depth: 0,
658        }
659    }
660
661    /// Check iteration limit; returns true if exceeded
662    #[inline]
663    fn check_limit(&mut self) -> bool {
664        self.global_iterations += 1;
665        self.global_iterations > 10_000
666    }
667
668    /// Check recursion depth; returns true if exceeded
669    #[inline]
670    fn check_recursion(&mut self) -> bool {
671        self.recursion_depth > MAX_RECURSION_DEPTH
672    }
673
674    /// Save parse context onto a `ParseStack`. Direct port of
675    /// zsh/Src/parse.c:295-320 `parse_context_save`. Pushes
676    /// recursion_depth + global_iterations and resets to zero so
677    /// a nested parse can't trigger the outer parse's limits.
678    /// Lexer-side state (incmdpos / incond / etc.) saves via the
679    /// lexer's own `LexStack` since those fields live on ZshLexer.
680    pub fn parse_context_save(&mut self, ps: &mut ParseStack) {
681        // parse.c:299-317 — save parser state. zshrs collapses zsh's
682        // wordcode-buffer fields (ecbuf/eclen/ecused/ecnpats/ecstrs/
683        // ecsoffs/ecssub/ecnfunc) into the recursion+iteration pair
684        // since the AST builder doesn't use a flat wordcode buffer.
685        ps.recursion_depth = self.recursion_depth;
686        ps.global_iterations = self.global_iterations;
687        // parse.c:318-319 — clear the buffer + heredoc list so a
688        // nested parse starts from a clean slate.
689        self.recursion_depth = 0;
690        self.global_iterations = 0;
691    }
692
693    /// Restore parse context from a `ParseStack`. Direct port of
694    /// zsh/Src/parse.c:326-355 `parse_context_restore`. Inverse of
695    /// `parse_context_save`. Also clears any half-built AST state
696    /// to prevent leaking into the outer parse.
697    pub fn parse_context_restore(&mut self, ps: &ParseStack) {
698        // parse.c:330-331 — free any in-progress wordcode buffer.
699        // zshrs has no equivalent — AST nodes are owned by their
700        // parent so dropping the parser frees them.
701
702        // parse.c:333-352 — restore saved state.
703        self.recursion_depth = ps.recursion_depth;
704        self.global_iterations = ps.global_iterations;
705
706        // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
707        // error flag so the outer parse sees a clean state. zshrs
708        // tracks errors per-parser; clearing means dropping any
709        // partial errors collected during the nested parse.
710        self.errors.clear();
711    }
712
713    /// Initialize parser status. Direct port of zsh/Src/parse.c:489-503
714    /// `init_parse_status`. Clears the per-parse-call lexer flags
715    /// so a fresh parse starts from cmd-position with no nesting
716    /// state inherited from a prior parse.
717    pub fn init_parse_status(&mut self) {
718        // parse.c:500-502 — `incasepat = incond = inredir = infor =
719        // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
720        self.lexer.incasepat = 0;
721        self.lexer.incond = 0;
722        self.lexer.inredir = false;
723        self.lexer.infor = 0;
724        self.lexer.intypeset = false;
725        self.lexer.incmdpos = true;
726    }
727
728    /// Initialize parser for a fresh parse. Direct port of
729    /// zsh/Src/parse.c:507-525 `init_parse`. C source allocates a
730    /// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
731    /// per-parse-call counters, and calls init_parse_status. zshrs
732    /// has no flat wordcode buffer (AST is built inline) so this
733    /// function reduces to init_parse_status + recursion_depth/
734    /// global_iterations clear.
735    pub fn init_parse(&mut self) {
736        // parse.c:513-520 — init wordcode buffer. zshrs no-op.
737        self.recursion_depth = 0;
738        self.global_iterations = 0;
739        // parse.c:522 — `init_parse_status();`
740        self.init_parse_status();
741    }
742
743    /// Check whether the parsed program is empty. Direct port of
744    /// zsh/Src/parse.c:583-587 `empty_eprog`. C version checks
745    /// `*p->prog == WCB_END()` (single end-of-wordcode marker).
746    /// zshrs version checks the AST node count.
747    pub fn empty_eprog(prog: &ZshProgram) -> bool {
748        prog.lists.is_empty()
749    }
750
751    /// Clear pending here-document list. Direct port of
752    /// zsh/Src/parse.c:589-600 `clear_hdocs`. The C version walks
753    /// the global `hdocs` linked list and frees each node. zshrs
754    /// stores pending heredocs on the lexer's `heredocs` Vec —
755    /// truncating it has the same effect.
756    pub fn clear_hdocs(&mut self) {
757        self.lexer.heredocs.clear();
758    }
759
760    /// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
761    /// 612-631 `parse_event`. Reads one event from the lexer (a
762    /// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
763    /// returns the resulting ZshProgram.
764    ///
765    /// `endtok` is the token that terminates the event — usually
766    /// ENDINPUT, but for command-style substitutions the closing
767    /// `)` (zsh's CMD_SUBST_CLOSE).
768    ///
769    /// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
770    /// allocated wordcode program). zshrs returns a `ZshProgram`
771    /// (AST root). Same role at the parse-output boundary.
772    pub fn parse_event(&mut self, endtok: LexTok) -> Option<ZshProgram> {
773        // parse.c:616-619 — reset state and prime the lexer.
774        self.lexer.tok = LexTok::Endinput;
775        self.lexer.incmdpos = true;
776        self.lexer.zshlex();
777        // parse.c:620 — `init_parse();`
778        self.init_parse();
779
780        // parse.c:622-625 — drive par_event; on failure clear hdocs.
781        if !self.par_event(endtok) {
782            self.clear_hdocs();
783            return None;
784        }
785        // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
786        // parse for a substitution that doesn't need its own eprog.
787        // zshrs returns an empty program in that case (caller
788        // discards).
789        if endtok != LexTok::Endinput {
790            return Some(ZshProgram { lists: Vec::new() });
791        }
792        // parse.c:630 — `bld_eprog(1);` — build the final eprog.
793        // zshrs has already built the AST via parse_program_until,
794        // but parse_event uses par_event directly so we need to
795        // collect what par_event accumulated.
796        Some(self.parse_program_until(None))
797    }
798
799    /// Parse one event (sublist with optional separator). Direct
800    /// port of zsh/Src/parse.c:633-695 `par_event`. Returns true if
801    /// an event was successfully parsed, false on EOF / endtok.
802    ///
803    /// zshrs port note: the C version emits wordcodes via ecadd/
804    /// set_list_code; zshrs's parser builds AST nodes via
805    /// parse_sublist + parse_list. Same flow, different output.
806    pub fn par_event(&mut self, endtok: LexTok) -> bool {
807        // parse.c:639-643 — skip leading SEPERs.
808        while self.lexer.tok == LexTok::Seper {
809            // parse.c:640-641 — at top-level (endtok == ENDINPUT),
810            // a SEPER on a fresh line ends the event.
811            if self.lexer.isnewlin > 0 && endtok == LexTok::Endinput {
812                return false;
813            }
814            self.lexer.zshlex();
815        }
816        // parse.c:644-647 — terminate on EOF or matching close-token.
817        if self.lexer.tok == LexTok::Endinput {
818            return false;
819        }
820        if self.lexer.tok == endtok {
821            return true;
822        }
823        // parse.c:649-... — drive parse_sublist + handle terminator.
824        // zshrs's parse_sublist already builds the AST node directly.
825        match self.parse_sublist() {
826            Some(_) => {
827                // parse.c:651-693 — terminator handling. zshrs's
828                // parse_list wraps this; for parse_event we just
829                // confirm the sublist parsed.
830                true
831            }
832            None => false,
833        }
834    }
835
836    /// Parse one list — non-recursing variant. Direct port of
837    /// zsh/Src/parse.c:807-817 `par_list1`. Like par_list but
838    /// doesn't recurse on the trailing-separator path; used by
839    /// callers that only want one statement (e.g. each arm of a
840    /// case body).
841    pub fn par_list1(&mut self) -> Option<ZshSublist> {
842        // parse.c:810-816 — body is a single par_sublist call wrapped
843        // in the eu/ecused tracking that zshrs doesn't need (no
844        // wordcode buffer).
845        self.parse_sublist()
846    }
847
848    /// Wire a here-document body onto the redirection token that
849    /// requested it. Direct port of zsh/Src/parse.c:2347-2361
850    /// `setheredoc`. Called when a heredoc terminator has been
851    /// matched and the body is ready to be attached to the redir.
852    ///
853    /// zshrs port note: zsh's setheredoc patches the wordcode
854    /// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
855    /// zshrs threads heredoc bodies through `HereDocInfo` structs
856    /// that resolve_redir applies during the post-parse fill_in pass.
857    /// This method is the AST-side equivalent: writes back to the
858    /// matching redir node by index.
859    pub fn setheredoc(
860        &mut self,
861        _pc: usize,
862        _redir_type: i32,
863        _doc: &str,
864        _term: &str,
865        _munged_term: &str,
866    ) {
867        // zshrs's heredoc resolution happens in fill_in_command /
868        // resolve_redir at parser.rs top. This stub exists for API
869        // parity with the C signature; live wiring happens via
870        // self.lexer.heredocs which the post-parse pass consumes.
871    }
872
873    /// Parse a wordlist for `for ... in WORDS;`. Direct port of
874    /// zsh/Src/parse.c:2362-2378 `par_wordlist`. Reads STRING tokens
875    /// until the next SEPER / SEMI / NEWLIN.
876    pub fn par_wordlist(&mut self) -> Vec<String> {
877        let mut out = Vec::new();
878        // parse.c:2362-2378 — collect STRINGs into the wordlist.
879        while self.lexer.tok == LexTok::String {
880            if let Some(text) = self.lexer.tokstr.clone() {
881                out.push(text);
882            }
883            self.lexer.zshlex();
884        }
885        out
886    }
887
888    /// Parse a newline-separated wordlist. Direct port of
889    /// zsh/Src/parse.c:2379-2398 `par_nl_wordlist`. Like
890    /// par_wordlist but tolerates leading/trailing newlines.
891    pub fn par_nl_wordlist(&mut self) -> Vec<String> {
892        // parse.c:2380-2381 — skip leading newlines.
893        while self.lexer.tok == LexTok::Newlin {
894            self.lexer.zshlex();
895        }
896        let out = self.par_wordlist();
897        // parse.c:2395-2397 — skip trailing newlines.
898        while self.lexer.tok == LexTok::Newlin {
899            self.lexer.zshlex();
900        }
901        out
902    }
903
904    /// Get the integer value of the next token in a cond expression.
905    /// Direct port of zsh/Src/parse.c:2643-2658 `get_cond_num`.
906    /// Used for `[[ N OP M ]]` numeric tests where N/M are integer
907    /// literals or variable references.
908    pub fn get_cond_num(&mut self) -> Option<i64> {
909        if self.lexer.tok != LexTok::String {
910            return None;
911        }
912        let text = self.lexer.tokstr.as_ref()?.clone();
913        // parse.c:2647-2655 — parse as integer with optional sign.
914        let parsed = text.parse::<i64>().ok()?;
915        self.lexer.zshlex();
916        Some(parsed)
917    }
918
919    /// Emit a parser-level error. Direct port of zsh/Src/parse.c:
920    /// 2733-2766 `yyerror`. C version fills a per-event error buffer
921    /// + sets errflag. zshrs pushes onto self.errors which the
922    /// caller drains via parse()'s Result return.
923    pub fn yyerror(&mut self, msg: &str) {
924        // parse.c:2735-2765 — zsh's yyerror collects the offending
925        // token's literal text + line number. zshrs already does
926        // this via self.error() with the lexer's toklineno.
927        self.error(msg);
928    }
929
930    // ============================================================
931    // Wordcode emission stubs (parse.c private helpers)
932    //
933    // The following functions are direct counterparts of zsh's
934    // private wordcode-emission helpers in parse.c. zsh uses these
935    // to write u32 opcodes into a flat `ecbuf` array; zshrs builds
936    // an AST tree and never emits wordcode at the parse layer.
937    // The implementations are documented stubs that preserve the
938    // function signatures + cite the C source. Real wordcode would
939    // be emitted later by compile_zsh.rs walking the AST.
940    //
941    // Listed for port-surface completeness so every parse.c symbol
942    // has a Rust counterpart even when the algorithm is moot in the
943    // AST architecture.
944    // ============================================================
945
946    /// Patch a list-placeholder wordcode with its actual opcode +
947    /// jump distance. Direct port of zsh/Src/parse.c:736-749
948    /// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
949    /// par_sublist runs, then comes back through set_list_code to
950    /// rewrite the slot with WCB_LIST(type, distance) once the
951    /// sublist's final length is known.
952    ///
953    /// zshrs port note: zshrs builds AST nodes inline so there's
954    /// no placeholder to patch. The ZshList { sublist, flags }
955    /// node is created with the right flags from the start.
956    /// Stub provided for port-surface completeness.
957    pub fn set_list_code(_p: usize, _type_code: i32, _cmplx: bool) {
958        // parse.c:740-748 — wordcode patching. zshrs no-op.
959    }
960
961    /// Patch a sublist-placeholder wordcode with its actual opcode.
962    /// Direct port of zsh/Src/parse.c:753-763 `set_sublist_code`.
963    /// Same role as set_list_code at the sublist level.
964    pub fn set_sublist_code(_p: usize, _type_code: i32, _flags: i32, _skip: i32, _cmplx: bool) {
965        // parse.c:757-762 — wordcode patching. zshrs no-op.
966    }
967
968    /// Add one wordcode opcode to the buffer. Direct port of
969    /// zsh/Src/parse.c:396-408 `ecadd`. Returns the index of the
970    /// new opcode. zshrs no-op since the AST is built inline.
971    pub fn ecadd(_c: u32) -> usize {
972        // parse.c:399-407 — append to ecbuf with grow-on-demand.
973        // zshrs no-op.
974        0
975    }
976
977    /// Delete a wordcode at position p. Direct port of
978    /// zsh/Src/parse.c:412-421 `ecdel`. zshrs no-op.
979    pub fn ecdel(_p: usize) {
980        // parse.c:415-420 — memmove + decrement ecused. zshrs no-op.
981    }
982
983    /// Encode a string into a wordcode value. Direct port of
984    /// zsh/Src/parse.c:425-471 `ecstrcode`. C source packs short
985    /// strings (≤4 chars) into a single wordcode + uses a binary
986    /// tree (Eccstr) for longer strings; long-string slots are
987    /// de-duplicated via hasher + strcmp. zshrs no-op since the
988    /// AST stores strings directly.
989    pub fn ecstrcode(_s: &str) -> u32 {
990        // parse.c:432-470 — the actual encoding logic. zshrs no-op.
991        0
992    }
993
994    /// Insert N empty wordcode slots at position p. Direct port of
995    /// zsh/Src/parse.c:371-388 `ecispace`. Used to reserve space
996    /// for a forward-jump opcode that will be patched once the
997    /// jump target is known. zshrs no-op since AST jumps are
998    /// resolved at compile_zsh time.
999    pub fn ecispace(_p: usize, _n: usize) {
1000        // parse.c:376-387 — grow + memmove + adjust hdocs. zshrs no-op.
1001    }
1002
1003    /// Adjust pending heredoc pointers when wordcodes shift. Direct
1004    /// port of zsh/Src/parse.c:359-367 `ecadjusthere`. Called
1005    /// internally by ecispace / ecdel after they shift the buffer.
1006    /// zshrs no-op since heredocs are tracked by index in the
1007    /// lexer's Vec, not by absolute wordcode offset.
1008    pub fn ecadjusthere(_p: usize, _d: i32) {
1009        // parse.c:362-366 — walk hdocs list, bump pc by d. zshrs no-op.
1010    }
1011
1012    // ============================================================
1013    // Eprog runtime ops (parse.c:2767-2853)
1014    //
1015    // dupeprog / useeprog / freeeprog are zsh's reference-counting
1016    // helpers for executable programs. zshrs's AST is owned by
1017    // value (Rust ownership); cloning is a tree-deep copy via
1018    // Clone, "use" is a no-op (the executor borrows the AST), and
1019    // "free" is automatic on drop.
1020    // ============================================================
1021
1022    /// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2767-2812
1023    /// `dupeprog`. C version deep-copies the wordcode array + string
1024    /// table + pattern progs. zshrs uses Clone on the AST.
1025    pub fn dupeprog(prog: &ZshProgram) -> ZshProgram {
1026        prog.clone()
1027    }
1028
1029    /// Increment an Eprog's reference count. Direct port of
1030    /// zsh/Src/parse.c:2813-2822 `useeprog`. zshrs no-op (Rust
1031    /// ownership).
1032    pub fn useeprog(_prog: &ZshProgram) {
1033        // parse.c:2815-2821 — `prog->nref++` if not heap-allocated.
1034        // zshrs no-op.
1035    }
1036
1037    /// Decrement / free an Eprog. Direct port of
1038    /// zsh/Src/parse.c:2823-2854 `freeeprog`. zshrs no-op (drop on
1039    /// scope-exit).
1040    pub fn freeeprog(_prog: ZshProgram) {
1041        // parse.c:2825-2853 — decrement nref, free if zero. zshrs
1042        // drops via Rust ownership.
1043    }
1044
1045    // ============================================================
1046    // Wordcode runtime getters (parse.c:2853-3060)
1047    //
1048    // These read packed wordcode out of a running Eprog at execution
1049    // time. zshrs's executor walks the AST directly so these are
1050    // stubs that preserve the C signatures + cite the source.
1051    // ============================================================
1052
1053    /// Read a packed string from the wordcode stream. Direct port of
1054    /// zsh/Src/parse.c:2853-2887 `ecgetstr`. C version unpacks
1055    /// 4-char inline strings + indexes into the strs table for
1056    /// longer ones. zshrs no-op (AST stores strings directly).
1057    pub fn ecgetstr(_dup: bool) -> String {
1058        // parse.c:2858-2886 — wordcode unpack logic. zshrs no-op.
1059        String::new()
1060    }
1061
1062    /// Read a packed string without consuming the wordcode pointer.
1063    /// Direct port of zsh/Src/parse.c:2890-2913 `ecrawstr`. zshrs
1064    /// no-op.
1065    pub fn ecrawstr() -> String {
1066        String::new()
1067    }
1068
1069    /// Read a NUL-terminated string array from wordcode. Direct port
1070    /// of zsh/Src/parse.c:2916-2933 `ecgetarr`. zshrs no-op.
1071    pub fn ecgetarr(_num: usize, _dup: bool) -> Vec<String> {
1072        Vec::new()
1073    }
1074
1075    /// Read a linked-list of strings from wordcode. Direct port of
1076    /// zsh/Src/parse.c:2936-2955 `ecgetlist`. zshrs no-op.
1077    pub fn ecgetlist(_num: usize, _dup: bool) -> Vec<String> {
1078        Vec::new()
1079    }
1080
1081    /// Read a sequence of redirection wordcodes. Direct port of
1082    /// zsh/Src/parse.c:2958-2991 `ecgetredirs`. zshrs no-op
1083    /// (redirections live as AST ZshRedir nodes).
1084    pub fn ecgetredirs() -> Vec<ZshRedir> {
1085        Vec::new()
1086    }
1087
1088    /// Copy consecutive redirection wordcodes into a new Eprog.
1089    /// Direct port of zsh/Src/parse.c:3001-3060 `eccopyredirs`.
1090    /// zshrs no-op.
1091    pub fn eccopyredirs() -> Option<ZshProgram> {
1092        None
1093    }
1094
1095    /// Initialize the dummy Eprog used as a placeholder. Direct port
1096    /// of zsh/Src/parse.c:3068-3075 `init_eprog`. zshrs no-op since
1097    /// the AST has no equivalent dummy node — empty programs are
1098    /// just `ZshProgram { lists: vec![] }`.
1099    pub fn init_eprog() {
1100        // parse.c:3071-3074 — set up dummy_eprog_code = WCB_END().
1101        // zshrs no-op.
1102    }
1103
1104    /// Parse the complete input
1105    pub fn parse(&mut self) -> Result<ZshProgram, Vec<ParseError>> {
1106        self.lexer.zshlex();
1107
1108        let mut program = self.parse_program_until(None);
1109
1110        if !self.errors.is_empty() {
1111            return Err(std::mem::take(&mut self.errors));
1112        }
1113        // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1114        // that the parser silently rolls past. zsh aborts with a
1115        // diagnostic in this case; mirror it.
1116        if let Some(msg) = self.lexer.error.clone() {
1117            return Err(vec![ParseError {
1118                message: msg,
1119                line: 1,
1120            }]);
1121        }
1122
1123        // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1124        // back into ZshRedir.heredoc fields via heredoc_idx.
1125        let bodies: Vec<HereDocInfo> = self
1126            .lexer
1127            .heredocs
1128            .iter()
1129            .map(|h| HereDocInfo {
1130                content: h.content.clone(),
1131                terminator: h.terminator.clone(),
1132                quoted: h.quoted,
1133            })
1134            .collect();
1135        if !bodies.is_empty() {
1136            fill_heredoc_bodies(&mut program, &bodies);
1137        }
1138
1139        Ok(program)
1140    }
1141
1142    /// Parse a program (list of lists)
1143    /// Parse a complete program (top-level entry). Calls
1144    /// parse_program_until with no end-token sentinel. Direct port of
1145    /// zsh/Src/parse.c:614-720 `parse_event` / `parse_list` /
1146    /// `par_event` flow. C distinguishes COND_EVENT (single command
1147    /// for here-string) from full event parse; zshrs's parse_program
1148    /// is the full-event entry.
1149    fn parse_program(&mut self) -> ZshProgram {
1150        self.parse_program_until(None)
1151    }
1152
1153    /// Parse a program until we hit an end token
1154    /// Parse a program until one of `end_tokens` is seen (or EOF).
1155    /// Drives parse_list in a loop. C equivalent: the body of par_event
1156    /// (parse.c:635-695) iterating par_list against the lexer.
1157    fn parse_program_until(&mut self, end_tokens: Option<&[LexTok]>) -> ZshProgram {
1158        let mut lists = Vec::new();
1159
1160        loop {
1161            if self.check_limit() {
1162                self.error("parser exceeded global iteration limit");
1163                break;
1164            }
1165
1166            // Skip separators
1167            while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1168                if self.check_limit() {
1169                    self.error("parser exceeded global iteration limit");
1170                    return ZshProgram { lists };
1171                }
1172                self.lexer.zshlex();
1173            }
1174
1175            if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
1176                break;
1177            }
1178
1179            // Check for end tokens
1180            if let Some(end_toks) = end_tokens {
1181                if end_toks.contains(&self.lexer.tok) {
1182                    break;
1183                }
1184            }
1185
1186            // Also stop at these tokens when not explicitly looking for them
1187            // Note: Else/Elif/Then are NOT here - they're handled by parse_if
1188            // to allow nested if statements inside case arms, loops, etc.
1189            match self.lexer.tok {
1190                LexTok::Outbrace
1191                | LexTok::Dsemi
1192                | LexTok::Semiamp
1193                | LexTok::Semibar
1194                | LexTok::Done
1195                | LexTok::Fi
1196                | LexTok::Esac
1197                | LexTok::Zend => break,
1198                _ => {}
1199            }
1200
1201            match self.parse_list() {
1202                Some(list) => {
1203                    let detected = simple_name_with_inoutpar(&list);
1204                    lists.push(list);
1205                    // Synthesize a FuncDef for the `name() { body }` shape
1206                    // at parse time so body_source is captured while the
1207                    // lexer still has the input. The lexer port emits
1208                    // `name(` as a single Word ending in `<INPAR><OUTPAR>`,
1209                    // so the Simple list is followed by an Inbrace once
1210                    // separators are skipped. For `name() cmd args` the
1211                    // body has already been swallowed into the same
1212                    // Simple's words tail — synthesize directly from there.
1213                    if let Some((names, body_argv)) = detected {
1214                        if !body_argv.is_empty() {
1215                            // One-line body already in the Simple. Build
1216                            // a Simple from body_argv as the function body.
1217                            lists.pop();
1218                            let body_simple = ZshCommand::Simple(ZshSimple {
1219                                assigns: Vec::new(),
1220                                words: body_argv,
1221                                redirs: Vec::new(),
1222                            });
1223                            let body_list = ZshList {
1224                                sublist: ZshSublist {
1225                                    pipe: ZshPipe {
1226                                        cmd: body_simple,
1227                                        next: None,
1228                                        lineno: self.lexer.lineno,
1229                                        merge_stderr: false,
1230                                    },
1231                                    next: None,
1232                                    flags: SublistFlags::default(),
1233                                },
1234                                flags: ListFlags::default(),
1235                            };
1236                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1237                                names,
1238                                body: Box::new(ZshProgram {
1239                                    lists: vec![body_list],
1240                                }),
1241                                tracing: false,
1242                                auto_call_args: None,
1243                                body_source: None,
1244                            });
1245                            let synthetic = ZshList {
1246                                sublist: ZshSublist {
1247                                    pipe: ZshPipe {
1248                                        cmd: funcdef,
1249                                        next: None,
1250                                        lineno: self.lexer.lineno,
1251                                        merge_stderr: false,
1252                                    },
1253                                    next: None,
1254                                    flags: SublistFlags::default(),
1255                                },
1256                                flags: ListFlags::default(),
1257                            };
1258                            lists.push(synthetic);
1259                            continue;
1260                        }
1261                        // Else: words.len() == 1 (only the trailing `name()`
1262                        // word), brace body follows. `names` may carry
1263                        // multiple identifiers from the `fna fnb fnc()`
1264                        // shorthand — all share the same brace body per
1265                        // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
1266                        // Skip separators on the real lexer; safe because
1267                        // parse_program's next iteration would also skip them.
1268                        while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1269                            self.lexer.zshlex();
1270                        }
1271                        if self.lexer.tok == LexTok::Inbrace {
1272                            // Capture body_start BEFORE the lexer
1273                            // advances past the first body token. The
1274                            // outer zshlex() consumed `{`; lexer.pos
1275                            // is now right after `{`. The next
1276                            // `zshlex()` would advance past `echo`,
1277                            // making body_start land mid-body and
1278                            // lose the first word — `typeset -f f`
1279                            // printed `a; echo b` instead of
1280                            // `echo a; echo b` for `f() { echo a;
1281                            // echo b }`.
1282                            let body_start = self.lexer.pos;
1283                            self.lexer.zshlex();
1284                            let body = self.parse_program();
1285                            let body_end = if self.lexer.tok == LexTok::Outbrace {
1286                                self.lexer.pos.saturating_sub(1)
1287                            } else {
1288                                self.lexer.pos
1289                            };
1290                            let body_source = self
1291                                .lexer
1292                                .input
1293                                .get(body_start..body_end)
1294                                .map(|s| s.trim().to_string())
1295                                .filter(|s| !s.is_empty());
1296                            if self.lexer.tok == LexTok::Outbrace {
1297                                self.lexer.zshlex();
1298                            }
1299                            // Replace the Simple list with a FuncDef list.
1300                            lists.pop();
1301                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1302                                names,
1303                                body: Box::new(body),
1304                                tracing: false,
1305                                auto_call_args: None,
1306                                body_source,
1307                            });
1308                            let synthetic = ZshList {
1309                                sublist: ZshSublist {
1310                                    pipe: ZshPipe {
1311                                        cmd: funcdef,
1312                                        next: None,
1313                                        lineno: self.lexer.lineno,
1314                                        merge_stderr: false,
1315                                    },
1316                                    next: None,
1317                                    flags: SublistFlags::default(),
1318                                },
1319                                flags: ListFlags::default(),
1320                            };
1321                            lists.push(synthetic);
1322                        } else if !matches!(
1323                            self.lexer.tok,
1324                            LexTok::Endinput | LexTok::Outbrace | LexTok::Seper | LexTok::Newlin
1325                        ) {
1326                            // No-brace one-line body: `foo() echo hello`.
1327                            // Parse a single command for the body.
1328                            let body_cmd = self.parse_cmd();
1329                            if let Some(cmd) = body_cmd {
1330                                let body_list = ZshList {
1331                                    sublist: ZshSublist {
1332                                        pipe: ZshPipe {
1333                                            cmd,
1334                                            next: None,
1335                                            lineno: self.lexer.lineno,
1336                                            merge_stderr: false,
1337                                        },
1338                                        next: None,
1339                                        flags: SublistFlags::default(),
1340                                    },
1341                                    flags: ListFlags::default(),
1342                                };
1343                                lists.pop();
1344                                let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1345                                    names: names.clone(),
1346                                    body: Box::new(ZshProgram {
1347                                        lists: vec![body_list],
1348                                    }),
1349                                    tracing: false,
1350                                    auto_call_args: None,
1351                                    body_source: None,
1352                                });
1353                                let synthetic = ZshList {
1354                                    sublist: ZshSublist {
1355                                        pipe: ZshPipe {
1356                                            cmd: funcdef,
1357                                            next: None,
1358                                            lineno: self.lexer.lineno,
1359                                            merge_stderr: false,
1360                                        },
1361                                        next: None,
1362                                        flags: SublistFlags::default(),
1363                                    },
1364                                    flags: ListFlags::default(),
1365                                };
1366                                lists.push(synthetic);
1367                            }
1368                        }
1369                    }
1370                }
1371                None => break,
1372            }
1373        }
1374
1375        ZshProgram { lists }
1376    }
1377
1378    /// Parse a list (sublist with optional & or ;).
1379    ///
1380    /// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
1381    /// par_list1 wrapper at parse.c:807-817).
1382    ///
1383    /// **Structural divergence**: zsh's parse.c emits flat wordcode
1384    /// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
1385    /// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
1386    /// builds an AST node `ZshList { sublist, flags }` instead. The
1387    /// async/sync/disown discrimination at parse.c:785-790 maps to
1388    /// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
1389    /// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
1390    /// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
1391    /// representation. This divergence is repository-wide: every
1392    /// `par_*` function emits wordcode in C, every `parse_*` builds
1393    /// AST in Rust. The compile_zsh module then traverses the AST to
1394    /// emit fusevm bytecode, which serves the same role as zsh's
1395    /// wordcode but with a different opcode set and execution model.
1396    fn parse_list(&mut self) -> Option<ZshList> {
1397        let sublist = self.parse_sublist()?;
1398
1399        let flags = match self.lexer.tok {
1400            LexTok::Amper => {
1401                self.lexer.zshlex();
1402                ListFlags {
1403                    async_: true,
1404                    disown: false,
1405                }
1406            }
1407            LexTok::Amperbang => {
1408                self.lexer.zshlex();
1409                ListFlags {
1410                    async_: true,
1411                    disown: true,
1412                }
1413            }
1414            LexTok::Seper | LexTok::Semi | LexTok::Newlin => {
1415                self.lexer.zshlex();
1416                ListFlags::default()
1417            }
1418            _ => ListFlags::default(),
1419        };
1420
1421        Some(ZshList { sublist, flags })
1422    }
1423
1424    /// Parse a sublist (pipelines connected by && or ||).
1425    ///
1426    /// Direct port of zsh/Src/parse.c:825-867 `par_sublist` and
1427    /// par_sublist2 at parse.c:869-892. par_sublist handles the
1428    /// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
1429    /// handles the leading `!` negation and `coproc` keyword.
1430    ///
1431    /// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
1432    /// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
1433    /// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
1434    fn parse_sublist(&mut self) -> Option<ZshSublist> {
1435        self.recursion_depth += 1;
1436        if self.check_recursion() {
1437            self.error("parse_sublist: max recursion depth exceeded");
1438            self.recursion_depth -= 1;
1439            return None;
1440        }
1441
1442        let mut flags = SublistFlags::default();
1443
1444        // Handle coproc and !
1445        if self.lexer.tok == LexTok::Coproc {
1446            flags.coproc = true;
1447            self.lexer.zshlex();
1448        } else if self.lexer.tok == LexTok::Bang {
1449            flags.not = true;
1450            self.lexer.zshlex();
1451        }
1452
1453        let pipe = match self.parse_pipe() {
1454            Some(p) => p,
1455            None => {
1456                self.recursion_depth -= 1;
1457                return None;
1458            }
1459        };
1460
1461        // Check for && or ||
1462        let next = match self.lexer.tok {
1463            LexTok::Damper => {
1464                self.lexer.zshlex();
1465                self.skip_separators();
1466                self.parse_sublist().map(|s| (SublistOp::And, Box::new(s)))
1467            }
1468            LexTok::Dbar => {
1469                self.lexer.zshlex();
1470                self.skip_separators();
1471                self.parse_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1472            }
1473            _ => None,
1474        };
1475
1476        self.recursion_depth -= 1;
1477        Some(ZshSublist { pipe, next, flags })
1478    }
1479
1480    /// Parse a pipeline
1481    /// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1482    /// zsh/Src/parse.c:894-956 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1483    /// C emits WC_PIPE wordcodes per command; same flow.
1484    fn parse_pipe(&mut self) -> Option<ZshPipe> {
1485        self.recursion_depth += 1;
1486        if self.check_recursion() {
1487            self.error("parse_pipe: max recursion depth exceeded");
1488            self.recursion_depth -= 1;
1489            return None;
1490        }
1491
1492        let lineno = self.lexer.toklineno;
1493        let cmd = match self.parse_cmd() {
1494            Some(c) => c,
1495            None => {
1496                self.recursion_depth -= 1;
1497                return None;
1498            }
1499        };
1500
1501        // Check for | or |&
1502        let mut merge_stderr = false;
1503        let next = match self.lexer.tok {
1504            LexTok::Bar | LexTok::Baramp => {
1505                merge_stderr = self.lexer.tok == LexTok::Baramp;
1506                self.lexer.zshlex();
1507                self.skip_separators();
1508                self.parse_pipe().map(Box::new)
1509            }
1510            _ => None,
1511        };
1512
1513        self.recursion_depth -= 1;
1514        Some(ZshPipe {
1515            cmd,
1516            next,
1517            lineno,
1518            merge_stderr,
1519        })
1520    }
1521
1522    /// Parse a command
1523    /// Parse a command — dispatches by leading token (FOR / CASE /
1524    /// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1525    /// INPAR subshell / INBRACE current-shell / TIME / NOCORRECT,
1526    /// else simple). Direct port of zsh/Src/parse.c:958-1085 `par_cmd`.
1527    fn parse_cmd(&mut self) -> Option<ZshCommand> {
1528        // Parse leading redirections
1529        let mut redirs = Vec::new();
1530        while self.lexer.tok.is_redirop() {
1531            if let Some(redir) = self.parse_redir() {
1532                redirs.push(redir);
1533            }
1534        }
1535
1536        let cmd = match self.lexer.tok {
1537            LexTok::For | LexTok::Foreach => self.parse_for(),
1538            LexTok::Select => self.parse_select(),
1539            LexTok::Case => self.parse_case(),
1540            LexTok::If => self.parse_if(),
1541            LexTok::While => self.parse_while(false),
1542            LexTok::Until => self.parse_while(true),
1543            LexTok::Repeat => self.parse_repeat(),
1544            LexTok::Inpar => self.parse_subsh(),
1545            LexTok::Inoutpar => self.parse_anon_funcdef(),
1546            LexTok::Inbrace => self.parse_cursh(),
1547            LexTok::Func => self.parse_funcdef(),
1548            LexTok::Dinbrack => self.parse_cond(),
1549            LexTok::Dinpar => self.parse_arith(),
1550            LexTok::Time => self.parse_time(),
1551            _ => self.parse_simple(redirs),
1552        };
1553
1554        // Parse trailing redirections. For Simple commands the redirs were
1555        // already captured inside parse_simple; for compound forms (Cursh,
1556        // Subsh, If, While, etc.) we collect them here and wrap in
1557        // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1558        if let Some(inner) = cmd {
1559            let mut trailing: Vec<ZshRedir> = Vec::new();
1560            while self.lexer.tok.is_redirop() {
1561                if let Some(redir) = self.parse_redir() {
1562                    trailing.push(redir);
1563                }
1564            }
1565            if trailing.is_empty() {
1566                return Some(inner);
1567            }
1568            // Simple already absorbed its own redirs (compile path expects
1569            // them on ZshSimple), so don't double-wrap.
1570            if matches!(inner, ZshCommand::Simple(_)) {
1571                if let ZshCommand::Simple(mut s) = inner {
1572                    s.redirs.extend(trailing);
1573                    return Some(ZshCommand::Simple(s));
1574                }
1575                unreachable!()
1576            }
1577            return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1578        }
1579
1580        None
1581    }
1582
1583    /// Parse a simple command
1584    /// Parse a simple command (assignments + words + redirections).
1585    /// Direct port of zsh/Src/parse.c:1836-2228 `par_simple` —
1586    /// the largest single function in parse.c. Handles ENVSTRING/
1587    /// ENVARRAY assignments at command head, intermixed redirs,
1588    /// typeset-style multi-assignment commands, and the trailing
1589    /// inout-par `()` that converts a simple command into an inline
1590    /// function definition.
1591    fn parse_simple(&mut self, mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1592        let mut assigns = Vec::new();
1593        let mut words = Vec::new();
1594        const MAX_ITERATIONS: usize = 10_000;
1595        let mut iterations = 0;
1596
1597        // Parse leading assignments
1598        while self.lexer.tok == LexTok::Envstring || self.lexer.tok == LexTok::Envarray {
1599            iterations += 1;
1600            if iterations > MAX_ITERATIONS {
1601                self.error("parse_simple: exceeded max iterations in assignments");
1602                return None;
1603            }
1604            if let Some(assign) = self.parse_assign() {
1605                assigns.push(assign);
1606            }
1607            self.lexer.zshlex();
1608        }
1609
1610        // Parse words and redirections
1611        loop {
1612            iterations += 1;
1613            if iterations > MAX_ITERATIONS {
1614                self.error("parse_simple: exceeded max iterations");
1615                return None;
1616            }
1617            match self.lexer.tok {
1618                LexTok::String | LexTok::Typeset => {
1619                    let s = self.lexer.tokstr.clone();
1620                    if let Some(s) = s {
1621                        words.push(s);
1622                    }
1623                    self.lexer.zshlex();
1624                    // Check for function definition foo() { ... }
1625                    if words.len() == 1 && self.peek_inoutpar() {
1626                        return self.parse_inline_funcdef(words.pop().unwrap());
1627                    }
1628                    // `{name}>file` named-fd redirect: the lexer doesn't
1629                    // recognize this shape, so the bare word `{name}`
1630                    // arrives as a String. If it matches `{IDENT}` and
1631                    // the NEXT token is a redirop, pop it off as the
1632                    // varid for that redir.
1633                    if !words.is_empty() && self.lexer.tok.is_redirop() {
1634                        let last = words.last().unwrap();
1635                        let untoked = crate::lexer::untokenize(last);
1636                        if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1637                            let name = &untoked[1..untoked.len() - 1];
1638                            if !name.is_empty()
1639                                && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1640                                && name
1641                                    .chars()
1642                                    .next()
1643                                    .map(|c| c == '_' || c.is_ascii_alphabetic())
1644                                    .unwrap_or(false)
1645                            {
1646                                let varid = name.to_string();
1647                                words.pop();
1648                                if let Some(mut redir) = self.parse_redir() {
1649                                    redir.varid = Some(varid);
1650                                    redirs.push(redir);
1651                                }
1652                                continue;
1653                            }
1654                        }
1655                    }
1656                }
1657                _ if self.lexer.tok.is_redirop() => {
1658                    match self.parse_redir() {
1659                        Some(redir) => redirs.push(redir),
1660                        None => break, // Error in redir parsing, stop
1661                    }
1662                }
1663                LexTok::Inoutpar if !words.is_empty() => {
1664                    // foo() { ... } style function
1665                    return self.parse_inline_funcdef(words.pop().unwrap());
1666                }
1667                _ => break,
1668            }
1669        }
1670
1671        if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1672            return None;
1673        }
1674
1675        Some(ZshCommand::Simple(ZshSimple {
1676            assigns,
1677            words,
1678            redirs,
1679        }))
1680    }
1681
1682    /// Parse an assignment
1683    /// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
1684    /// Sub-routine of parse_simple. The C source handles assignments
1685    /// inline in par_simple via the ENVSTRING/ENVARRAY token paths
1686    /// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
1687    /// helper for clarity.
1688    fn parse_assign(&mut self) -> Option<ZshAssign> {
1689        use crate::tokens::char_tokens;
1690
1691        let tokstr = self.lexer.tokstr.as_ref()?;
1692
1693        // Parse name=value or name+=value
1694        // The '=' is encoded as char_tokens::EQUALS in the token string
1695        let (name, value_str, append) = if let Some(pos) = tokstr.find(char_tokens::EQUALS) {
1696            let name_part = &tokstr[..pos];
1697            let (name, append) = if name_part.ends_with('+') {
1698                (&name_part[..name_part.len() - 1], true)
1699            } else {
1700                (name_part, false)
1701            };
1702            (
1703                name.to_string(),
1704                tokstr[pos + char_tokens::EQUALS.len_utf8()..].to_string(),
1705                append,
1706            )
1707        } else if let Some(pos) = tokstr.find('=') {
1708            // Fallback to literal '=' for compatibility
1709            let name_part = &tokstr[..pos];
1710            let (name, append) = if name_part.ends_with('+') {
1711                (&name_part[..name_part.len() - 1], true)
1712            } else {
1713                (name_part, false)
1714            };
1715            (name.to_string(), tokstr[pos + 1..].to_string(), append)
1716        } else {
1717            return None;
1718        };
1719
1720        let value = if self.lexer.tok == LexTok::Envarray {
1721            // Array assignment: name=(...)
1722            let mut elements = Vec::new();
1723            self.lexer.zshlex(); // skip past token
1724
1725            let mut arr_iters = 0;
1726            const MAX_ARRAY_ELEMENTS: usize = 10_000;
1727            while matches!(
1728                self.lexer.tok,
1729                LexTok::String | LexTok::Seper | LexTok::Newlin
1730            ) {
1731                arr_iters += 1;
1732                if arr_iters > MAX_ARRAY_ELEMENTS {
1733                    self.error("array assignment exceeded maximum elements");
1734                    break;
1735                }
1736                if self.lexer.tok == LexTok::String {
1737                    if let Some(ref s) = self.lexer.tokstr {
1738                        elements.push(s.clone());
1739                    }
1740                }
1741                self.lexer.zshlex();
1742            }
1743
1744            // The closing OUTPAR is consumed here. The outer parse_simple
1745            // loop will then `zshlex()` past whatever follows (typically
1746            // a separator or the next word) — calling zshlex twice in
1747            // tandem (here AND in parse_simple) over-advances and merges
1748            // a following `name() { … }` funcdef into the same Simple.
1749            // We only consume Outpar; let the caller handle the rest.
1750            // Without this guard `g=(o1); f() { :; }` parsed as one
1751            // Simple with assigns=[g] and words=["f()"] (one token).
1752            if self.lexer.tok == LexTok::Outpar {
1753                // Note: do NOT zshlex() here. parse_simple's `self.lexer
1754                // .zshlex()` after `parse_assign` returns advances past
1755                // the Outpar onto the next significant token.
1756                //
1757                // Force `incmdpos=true` so the next zshlex() recognizes
1758                // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
1759                // The lexer flips incmdpos to false on bare Outpar (which
1760                // is correct for subshell-close context), but for an
1761                // array-assignment close more assigns/words may follow.
1762                self.lexer.incmdpos = true;
1763            }
1764
1765            ZshAssignValue::Array(elements)
1766        } else {
1767            ZshAssignValue::Scalar(value_str)
1768        };
1769
1770        Some(ZshAssign {
1771            name,
1772            value,
1773            append,
1774        })
1775    }
1776
1777    /// Parse a redirection
1778    /// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1779    /// Direct port of zsh/Src/parse.c:2229-2346 `par_redir`. Returns
1780    /// a ZshRedir node carrying the operator type, fd, target word
1781    /// (or here-doc body / pipe-redir command), and any `{var}` style
1782    /// fd-binding parameter.
1783    fn parse_redir(&mut self) -> Option<ZshRedir> {
1784        let rtype = match self.lexer.tok {
1785            LexTok::Outang => RedirType::Write,
1786            LexTok::Outangbang => RedirType::Writenow,
1787            LexTok::Doutang => RedirType::Append,
1788            LexTok::Doutangbang => RedirType::Appendnow,
1789            LexTok::Inang => RedirType::Read,
1790            LexTok::Inoutang => RedirType::ReadWrite,
1791            LexTok::Dinang => RedirType::Heredoc,
1792            LexTok::Dinangdash => RedirType::HeredocDash,
1793            LexTok::Trinang => RedirType::Herestr,
1794            LexTok::Inangamp => RedirType::MergeIn,
1795            LexTok::Outangamp => RedirType::MergeOut,
1796            LexTok::Ampoutang => RedirType::ErrWrite,
1797            LexTok::Outangampbang => RedirType::ErrWritenow,
1798            LexTok::Doutangamp => RedirType::ErrAppend,
1799            LexTok::Doutangampbang => RedirType::ErrAppendnow,
1800            _ => return None,
1801        };
1802
1803        let fd = if self.lexer.tokfd >= 0 {
1804            self.lexer.tokfd
1805        } else if matches!(
1806            rtype,
1807            RedirType::Read
1808                | RedirType::ReadWrite
1809                | RedirType::MergeIn
1810                | RedirType::Heredoc
1811                | RedirType::HeredocDash
1812                | RedirType::Herestr
1813        ) {
1814            0
1815        } else {
1816            1
1817        };
1818
1819        self.lexer.zshlex();
1820
1821        let name = match self.lexer.tok {
1822            LexTok::String | LexTok::Envstring => {
1823                let n = self.lexer.tokstr.clone().unwrap_or_default();
1824                self.lexer.zshlex();
1825                n
1826            }
1827            _ => {
1828                self.error("expected word after redirection");
1829                return None;
1830            }
1831        };
1832
1833        // Heredoc body capture: when reading the terminator above, the
1834        // lexer pushed a HereDoc to self.lexer.heredocs[]. Record the
1835        // index so fill_heredoc_bodies() can wire content back after
1836        // process_heredocs() has run.
1837        let heredoc_idx = if matches!(rtype, RedirType::Heredoc | RedirType::HeredocDash) {
1838            if !self.lexer.heredocs.is_empty() {
1839                Some(self.lexer.heredocs.len() - 1)
1840            } else {
1841                None
1842            }
1843        } else {
1844            None
1845        };
1846
1847        Some(ZshRedir {
1848            rtype,
1849            fd,
1850            name,
1851            heredoc: None,
1852            varid: None,
1853            heredoc_idx,
1854        })
1855    }
1856
1857    /// Parse for/foreach loop
1858    /// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1859    /// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1860    /// of zsh/Src/parse.c:1087-1207 `par_for`. parse_for_cstyle is the
1861    /// inner branch for the `((...))` arithmetic-header variant
1862    /// (parse.c:1100-1140 inside par_for).
1863    fn parse_for(&mut self) -> Option<ZshCommand> {
1864        let is_foreach = self.lexer.tok == LexTok::Foreach;
1865        self.lexer.zshlex();
1866
1867        // Check for C-style: for (( init; cond; step ))
1868        if self.lexer.tok == LexTok::Dinpar {
1869            return self.parse_for_cstyle();
1870        }
1871
1872        // Get variable name(s). zsh parse.c par_for accepts multiple
1873        // identifier tokens before `in`/`(`/newline — `for k v in ...`
1874        // assigns each iteration's pair of values to k and v in turn.
1875        // We store the names space-joined since variable identifiers
1876        // can't contain whitespace.
1877        let mut names: Vec<String> = Vec::new();
1878        loop {
1879            match self.lexer.tok {
1880                LexTok::String => {
1881                    let v = self.lexer.tokstr.clone().unwrap_or_default();
1882                    if v == "in" {
1883                        break;
1884                    }
1885                    names.push(v);
1886                    self.lexer.zshlex();
1887                }
1888                _ => break,
1889            }
1890        }
1891        if names.is_empty() {
1892            self.error("expected variable name in for");
1893            return None;
1894        }
1895        let var = names.join(" ");
1896
1897        // Skip newlines
1898        self.skip_separators();
1899
1900        // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1901        // single String token with the parens lexed-as-content
1902        // (`<INPAR>a b c<OUTPAR>`) instead of as separate Inpar/String/
1903        // Outpar tokens. Detect that shape and split it manually.
1904        let list = if self.lexer.tok == LexTok::String
1905            && self
1906                .lexer
1907                .tokstr
1908                .as_ref()
1909                .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1910                .unwrap_or(false)
1911        {
1912            let raw = self.lexer.tokstr.clone().unwrap_or_default();
1913            // Strip leading INPAR + trailing OUTPAR, then untokenize the
1914            // inner content and split on whitespace for the word list.
1915            let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1916                ..raw
1917                    .char_indices()
1918                    .last()
1919                    .map(|(i, _)| i)
1920                    .unwrap_or(raw.len())];
1921            let cleaned = crate::lexer::untokenize(inner);
1922            let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1923            self.lexer.zshlex();
1924            ForList::Words(words)
1925        } else if self.lexer.tok == LexTok::String {
1926            let s = self.lexer.tokstr.as_ref();
1927            if s.map(|s| s == "in").unwrap_or(false) {
1928                self.lexer.zshlex();
1929                let mut words = Vec::new();
1930                let mut word_count = 0;
1931                while self.lexer.tok == LexTok::String {
1932                    word_count += 1;
1933                    if word_count > 500 || self.check_limit() {
1934                        self.error("for: too many words");
1935                        return None;
1936                    }
1937                    if let Some(ref s) = self.lexer.tokstr {
1938                        words.push(s.clone());
1939                    }
1940                    self.lexer.zshlex();
1941                }
1942                ForList::Words(words)
1943            } else {
1944                ForList::Positional
1945            }
1946        } else if self.lexer.tok == LexTok::Inpar {
1947            // for var (...)
1948            self.lexer.zshlex();
1949            let mut words = Vec::new();
1950            let mut word_count = 0;
1951            while self.lexer.tok == LexTok::String || self.lexer.tok == LexTok::Seper {
1952                word_count += 1;
1953                if word_count > 500 || self.check_limit() {
1954                    self.error("for: too many words in parens");
1955                    return None;
1956                }
1957                if self.lexer.tok == LexTok::String {
1958                    if let Some(ref s) = self.lexer.tokstr {
1959                        words.push(s.clone());
1960                    }
1961                }
1962                self.lexer.zshlex();
1963            }
1964            if self.lexer.tok == LexTok::Outpar {
1965                self.lexer.zshlex();
1966            }
1967            ForList::Words(words)
1968        } else {
1969            ForList::Positional
1970        };
1971
1972        // Skip to body
1973        self.skip_separators();
1974
1975        // Parse body
1976        let body = self.parse_loop_body(is_foreach)?;
1977
1978        Some(ZshCommand::For(ZshFor {
1979            var,
1980            list,
1981            body: Box::new(body),
1982            is_select: false,
1983        }))
1984    }
1985
1986    /// Parse C-style for loop: for (( init; cond; step ))
1987    /// Parse the c-style `for ((init; cond; incr)) do BODY done`.
1988    /// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
1989    /// Recognized when the token after FOR is DINPAR (the `((`
1990    /// detected by gettok via dbparens setup).
1991    fn parse_for_cstyle(&mut self) -> Option<ZshCommand> {
1992        // We're at (( (Dinpar None) - the opening ((
1993        // Lexer returns:
1994        //   Dinpar None     - opening ((
1995        //   Dinpar "init"   - init expression, semicolon consumed
1996        //   Dinpar "cond"   - cond expression, semicolon consumed
1997        //   Doutpar "step"  - step expression, closing )) consumed
1998
1999        self.lexer.zshlex(); // Get init: Dinpar "i=0"
2000
2001        if self.lexer.tok != LexTok::Dinpar {
2002            self.error("expected init expression in for ((");
2003            return None;
2004        }
2005        let init = self.lexer.tokstr.clone().unwrap_or_default();
2006
2007        self.lexer.zshlex(); // Get cond: Dinpar "i<10"
2008
2009        if self.lexer.tok != LexTok::Dinpar {
2010            self.error("expected condition in for ((");
2011            return None;
2012        }
2013        let cond = self.lexer.tokstr.clone().unwrap_or_default();
2014
2015        self.lexer.zshlex(); // Get step: Doutpar "i++"
2016
2017        if self.lexer.tok != LexTok::Doutpar {
2018            self.error("expected )) in for");
2019            return None;
2020        }
2021        let step = self.lexer.tokstr.clone().unwrap_or_default();
2022
2023        self.lexer.zshlex(); // Move past ))
2024
2025        self.skip_separators();
2026        let body = self.parse_loop_body(false)?;
2027
2028        Some(ZshCommand::For(ZshFor {
2029            var: String::new(),
2030            list: ForList::CStyle { init, cond, step },
2031            body: Box::new(body),
2032            is_select: false,
2033        }))
2034    }
2035
2036    /// Parse select loop (same syntax as for)
2037    /// Parse `select NAME in WORDS; do BODY; done`. Same shape as
2038    /// `for NAME in WORDS; do ...` but with menu-prompt semantics in
2039    /// the executor. C equivalent: the SELECT case in par_for at
2040    /// parse.c:1087-1207 (selects share parser flow with foreach).
2041    fn parse_select(&mut self) -> Option<ZshCommand> {
2042        // `select` shares parse_for's grammar (var, words, body) but the
2043        // compile path is different (interactive prompt loop).
2044        match self.parse_for()? {
2045            ZshCommand::For(mut f) => {
2046                f.is_select = true;
2047                Some(ZshCommand::For(f))
2048            }
2049            other => Some(other),
2050        }
2051    }
2052
2053    /// Parse case statement
2054    /// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
2055    /// of zsh/Src/parse.c:1209-1409 `par_case`. Each case arm is a
2056    /// (pattern_list, body, terminator) tuple where terminator is
2057    /// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
2058    fn parse_case(&mut self) -> Option<ZshCommand> {
2059        self.lexer.zshlex(); // skip 'case'
2060
2061        let word = match self.lexer.tok {
2062            LexTok::String => {
2063                let w = self.lexer.tokstr.clone().unwrap_or_default();
2064                self.lexer.zshlex();
2065                w
2066            }
2067            _ => {
2068                self.error("expected word after case");
2069                return None;
2070            }
2071        };
2072
2073        self.skip_separators();
2074
2075        // Expect 'in' or {
2076        let use_brace = self.lexer.tok == LexTok::Inbrace;
2077        if self.lexer.tok == LexTok::String {
2078            let s = self.lexer.tokstr.as_ref();
2079            if s.map(|s| s != "in").unwrap_or(true) {
2080                self.error("expected 'in' in case");
2081                return None;
2082            }
2083        } else if !use_brace {
2084            self.error("expected 'in' or '{' in case");
2085            return None;
2086        }
2087        // Set incasepat=1 BEFORE consuming "in" so the next token (which
2088        // could be a leading `(` of a paren-prefixed pattern like
2089        // `case foo in (a|b) …`) is lexed as Inpar, not as a glob-token.
2090        // Without this the `(` got swallowed into a gettokstr('(', false)
2091        // call and produced a String like "(foo)" — the parser then saw
2092        // the `)` inside a string instead of as a separate Outpar.
2093        self.lexer.incasepat = 1;
2094        self.lexer.zshlex();
2095
2096        let mut arms = Vec::new();
2097        const MAX_ARMS: usize = 10_000;
2098
2099        loop {
2100            if arms.len() > MAX_ARMS {
2101                self.error("parse_case: too many arms");
2102                break;
2103            }
2104
2105            // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
2106            // This affects how [ and | are lexed
2107            self.lexer.incasepat = 1;
2108
2109            self.skip_separators();
2110
2111            // Check for end
2112            // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
2113            let is_esac = self.lexer.tok == LexTok::Esac
2114                || (self.lexer.tok == LexTok::String
2115                    && self
2116                        .lexer
2117                        .tokstr
2118                        .as_ref()
2119                        .map(|s| s == "esac")
2120                        .unwrap_or(false));
2121            if (use_brace && self.lexer.tok == LexTok::Outbrace) || (!use_brace && is_esac) {
2122                self.lexer.incasepat = 0;
2123                self.lexer.zshlex();
2124                break;
2125            }
2126
2127            // Also break on EOF
2128            if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
2129                self.lexer.incasepat = 0;
2130                break;
2131            }
2132
2133            // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
2134            // The leading `(` is paired with a matching `)` that closes
2135            // the pattern itself; the arm-close `)` follows separately.
2136            // Track whether we consumed it so we can skip the matching
2137            // `)` after pattern parsing — otherwise the arm-close would
2138            // be interpreted as the pattern-close and the actual body
2139            // would get the leftover `)`.
2140            let had_leading_paren = self.lexer.tok == LexTok::Inpar;
2141            if had_leading_paren {
2142                self.lexer.zshlex();
2143            }
2144
2145            // incasepat is already set above
2146            let mut patterns = Vec::new();
2147            let mut pattern_iterations = 0;
2148            loop {
2149                pattern_iterations += 1;
2150                if pattern_iterations > 1000 {
2151                    self.error("parse_case: too many pattern iterations");
2152                    self.lexer.incasepat = 0;
2153                    return None;
2154                }
2155
2156                if self.lexer.tok == LexTok::String {
2157                    let s = self.lexer.tokstr.as_ref();
2158                    if s.map(|s| s == "esac").unwrap_or(false) {
2159                        break;
2160                    }
2161                    patterns.push(self.lexer.tokstr.clone().unwrap_or_default());
2162                    // After first pattern token, set incasepat=2 so ( is treated as part of pattern
2163                    self.lexer.incasepat = 2;
2164                    self.lexer.zshlex();
2165                } else if self.lexer.tok != LexTok::Bar {
2166                    break;
2167                }
2168
2169                if self.lexer.tok == LexTok::Bar {
2170                    // Reset to 1 (start of next alternative pattern)
2171                    self.lexer.incasepat = 1;
2172                    self.lexer.zshlex();
2173                } else {
2174                    break;
2175                }
2176            }
2177            self.lexer.incasepat = 0;
2178
2179            // Expect ).  Also handle the `(P))` wrapped-pattern form:
2180            // when a leading `(` was consumed, accept an extra `)` —
2181            // the inner `)` closes the optional-paren wrapper, the
2182            // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
2183            // (bare pattern, leading-paren is just the opt-marker, the
2184            // close is arm-close) and `(P)) BODY` (paren-wrapped
2185            // pattern, then arm-close). The first form is unambiguous
2186            // when the bare pattern was simple; the second is needed
2187            // when the body starts with `(`.
2188            if self.lexer.tok != LexTok::Outpar {
2189                self.error("expected ')' in case pattern");
2190                return None;
2191            }
2192            self.lexer.zshlex();
2193            if had_leading_paren && self.lexer.tok == LexTok::Outpar {
2194                self.lexer.zshlex();
2195            }
2196
2197            // Parse body
2198            let body = self.parse_program();
2199
2200            // Get terminator. Set incasepat=1 BEFORE the zshlex
2201            // advance so the next token (the next arm's pattern, like
2202            // `[a-z]`) gets tokenized in pattern context. Without
2203            // this, a `[`-prefixed pattern after the FIRST arm became
2204            // Inbrack instead of String and the pattern-loop bailed
2205            // out with "expected ')' in case pattern".
2206            let terminator = match self.lexer.tok {
2207                LexTok::Dsemi => {
2208                    self.lexer.incasepat = 1;
2209                    self.lexer.zshlex();
2210                    CaseTerm::Break
2211                }
2212                LexTok::Semiamp => {
2213                    self.lexer.incasepat = 1;
2214                    self.lexer.zshlex();
2215                    CaseTerm::Continue
2216                }
2217                LexTok::Semibar => {
2218                    self.lexer.incasepat = 1;
2219                    self.lexer.zshlex();
2220                    CaseTerm::TestNext
2221                }
2222                _ => CaseTerm::Break,
2223            };
2224
2225            if !patterns.is_empty() {
2226                arms.push(CaseArm {
2227                    patterns,
2228                    body,
2229                    terminator,
2230                });
2231            }
2232        }
2233
2234        Some(ZshCommand::Case(ZshCase { word, arms }))
2235    }
2236
2237    /// Parse if statement
2238    /// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
2239    /// Direct port of zsh/Src/parse.c:1411-1519 `par_if`. The C source
2240    /// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
2241    /// (cond, then_body) tuples plus an optional else_body.
2242    fn parse_if(&mut self) -> Option<ZshCommand> {
2243        self.lexer.zshlex(); // skip 'if'
2244
2245        // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
2246        let cond = Box::new(self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace])));
2247
2248        self.skip_separators();
2249
2250        // Expect 'then' or {
2251        let use_brace = self.lexer.tok == LexTok::Inbrace;
2252        if self.lexer.tok != LexTok::Then && !use_brace {
2253            self.error("expected 'then' or '{' after if condition");
2254            return None;
2255        }
2256        self.lexer.zshlex();
2257
2258        // Parse then-body - stops at else/elif/fi, or } if using brace syntax
2259        let then = if use_brace {
2260            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2261            if self.lexer.tok == LexTok::Outbrace {
2262                self.lexer.zshlex();
2263            }
2264            Box::new(body)
2265        } else {
2266            Box::new(self.parse_program_until(Some(&[LexTok::Else, LexTok::Elif, LexTok::Fi])))
2267        };
2268
2269        // Parse elif and else (only for then/fi syntax, not brace syntax)
2270        let mut elif = Vec::new();
2271        let mut else_ = None;
2272
2273        if !use_brace {
2274            loop {
2275                self.skip_separators();
2276
2277                match self.lexer.tok {
2278                    LexTok::Elif => {
2279                        self.lexer.zshlex();
2280                        // elif condition stops at 'then' or '{'
2281                        let econd =
2282                            self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace]));
2283                        self.skip_separators();
2284
2285                        let elif_use_brace = self.lexer.tok == LexTok::Inbrace;
2286                        if self.lexer.tok != LexTok::Then && !elif_use_brace {
2287                            self.error("expected 'then' after elif");
2288                            return None;
2289                        }
2290                        self.lexer.zshlex();
2291
2292                        // elif body stops at else/elif/fi or } if using braces
2293                        let ebody = if elif_use_brace {
2294                            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2295                            if self.lexer.tok == LexTok::Outbrace {
2296                                self.lexer.zshlex();
2297                            }
2298                            body
2299                        } else {
2300                            self.parse_program_until(Some(&[
2301                                LexTok::Else,
2302                                LexTok::Elif,
2303                                LexTok::Fi,
2304                            ]))
2305                        };
2306
2307                        elif.push((econd, ebody));
2308                    }
2309                    LexTok::Else => {
2310                        self.lexer.zshlex();
2311                        self.skip_separators();
2312
2313                        let else_use_brace = self.lexer.tok == LexTok::Inbrace;
2314                        if else_use_brace {
2315                            self.lexer.zshlex();
2316                        }
2317
2318                        // else body stops at 'fi' or '}'
2319                        else_ = Some(Box::new(if else_use_brace {
2320                            let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2321                            if self.lexer.tok == LexTok::Outbrace {
2322                                self.lexer.zshlex();
2323                            }
2324                            body
2325                        } else {
2326                            self.parse_program_until(Some(&[LexTok::Fi]))
2327                        }));
2328
2329                        // Consume the 'fi' if present (not for brace syntax)
2330                        if !else_use_brace && self.lexer.tok == LexTok::Fi {
2331                            self.lexer.zshlex();
2332                        }
2333                        break;
2334                    }
2335                    LexTok::Fi => {
2336                        self.lexer.zshlex();
2337                        break;
2338                    }
2339                    _ => break,
2340                }
2341            }
2342        }
2343
2344        Some(ZshCommand::If(ZshIf {
2345            cond,
2346            then,
2347            elif,
2348            else_,
2349        }))
2350    }
2351
2352    /// Parse while/until loop
2353    /// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
2354    /// Direct port of zsh/Src/parse.c:1521-1563 `par_while`. The
2355    /// `until` variant is the same loop with the condition negated.
2356    fn parse_while(&mut self, until: bool) -> Option<ZshCommand> {
2357        self.lexer.zshlex(); // skip while/until
2358
2359        let cond = Box::new(self.parse_program());
2360
2361        self.skip_separators();
2362        let body = self.parse_loop_body(false)?;
2363
2364        Some(ZshCommand::While(ZshWhile {
2365            cond,
2366            body: Box::new(body),
2367            until,
2368        }))
2369    }
2370
2371    /// Parse repeat loop
2372    /// Parse `repeat N; do BODY; done`. Direct port of
2373    /// zsh/Src/parse.c:1565-1617 `par_repeat`. The C source supports
2374    /// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
2375    /// parser doesn't yet special-case that variant.
2376    fn parse_repeat(&mut self) -> Option<ZshCommand> {
2377        self.lexer.zshlex(); // skip 'repeat'
2378
2379        let count = match self.lexer.tok {
2380            LexTok::String => {
2381                let c = self.lexer.tokstr.clone().unwrap_or_default();
2382                self.lexer.zshlex();
2383                c
2384            }
2385            _ => {
2386                self.error("expected count after repeat");
2387                return None;
2388            }
2389        };
2390
2391        self.skip_separators();
2392        let body = self.parse_loop_body(false)?;
2393
2394        Some(ZshCommand::Repeat(ZshRepeat {
2395            count,
2396            body: Box::new(body),
2397        }))
2398    }
2399
2400    /// Parse loop body (do...done, {...}, or shortloop)
2401    /// Parse the `do BODY done` body of a for/while/until/select/
2402    /// repeat loop. Direct equivalent of zsh's parse.c handling
2403    /// inside the loop builders — they all consume DOLOOP, parse a
2404    /// list until DONE, and return the list. The `foreach_style`
2405    /// flag signals foreach (where short-form `for NAME in WORDS;
2406    /// CMD` may skip do/done) vs c-style (which always requires
2407    /// do/done).
2408    fn parse_loop_body(&mut self, foreach_style: bool) -> Option<ZshProgram> {
2409        if self.lexer.tok == LexTok::Doloop {
2410            self.lexer.zshlex();
2411            let body = self.parse_program();
2412            if self.lexer.tok == LexTok::Done {
2413                self.lexer.zshlex();
2414            }
2415            Some(body)
2416        } else if self.lexer.tok == LexTok::Inbrace {
2417            self.lexer.zshlex();
2418            let body = self.parse_program();
2419            if self.lexer.tok == LexTok::Outbrace {
2420                self.lexer.zshlex();
2421            }
2422            Some(body)
2423        } else if foreach_style {
2424            // foreach allows 'end' terminator
2425            let body = self.parse_program();
2426            if self.lexer.tok == LexTok::Zend {
2427                self.lexer.zshlex();
2428            }
2429            Some(body)
2430        } else {
2431            // Short loop - single command
2432            self.parse_list()
2433                .map(|list| ZshProgram { lists: vec![list] })
2434        }
2435    }
2436
2437    /// Parse (...) subshell
2438    /// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619-1670
2439    /// `par_subsh`. Body parses as a normal list; the subshell wrapper
2440    /// fork-isolates execution in the executor.
2441    fn parse_subsh(&mut self) -> Option<ZshCommand> {
2442        self.lexer.zshlex(); // skip (
2443        let prog = self.parse_program();
2444        if self.lexer.tok == LexTok::Outpar {
2445            self.lexer.zshlex();
2446        }
2447        Some(ZshCommand::Subsh(Box::new(prog)))
2448    }
2449
2450    /// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
2451    /// function named `_zshrs_anon_N`, invokes it with the args, and the
2452    /// body runs with positional params set. Implemented as the desugared
2453    /// pair (FuncDef + Simple call) so the compile path doesn't need new
2454    /// machinery.
2455    /// Parse an anonymous function definition `() { BODY }` followed
2456    /// by call args. zsh treats `() { echo hi; } a b c` as defining
2457    /// and immediately calling an anon fn with args a/b/c. C
2458    /// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
2459    /// triggers an anon-funcdef path.
2460    fn parse_anon_funcdef(&mut self) -> Option<ZshCommand> {
2461        self.lexer.zshlex(); // skip ()
2462        self.skip_separators();
2463        // No `{` after `()` → bare empty subshell shape `()`. Fall back
2464        // to a Subsh with an empty program so the status is 0 (matches
2465        // zsh's `()` no-op behavior).
2466        if self.lexer.tok != LexTok::Inbrace {
2467            return Some(ZshCommand::Subsh(Box::new(ZshProgram {
2468                lists: Vec::new(),
2469            })));
2470        }
2471        self.lexer.zshlex(); // skip {
2472        let body = self.parse_program();
2473        if self.lexer.tok == LexTok::Outbrace {
2474            self.lexer.zshlex();
2475        }
2476        // Collect any trailing args until a separator. zsh's anon-fn form
2477        // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
2478        let mut args = Vec::new();
2479        while self.lexer.tok == LexTok::String {
2480            if let Some(s) = self.lexer.tokstr.clone() {
2481                args.push(s);
2482            }
2483            self.lexer.zshlex();
2484        }
2485
2486        // Generate a unique name. Module-level static would be cleaner but
2487        // a thread-local atomic is enough — anonymous functions are
2488        // ephemeral and the name isn't user-visible.
2489        use std::sync::atomic::{AtomicUsize, Ordering};
2490        static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2491        let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2492        let name = format!("_zshrs_anon_{}", n);
2493        Some(ZshCommand::FuncDef(ZshFuncDef {
2494            names: vec![name],
2495            body: Box::new(body),
2496            tracing: false,
2497            auto_call_args: Some(args),
2498            body_source: None,
2499        }))
2500    }
2501
2502    /// Parse {...} cursh
2503    /// Parse a current-shell brace block `{ BODY }`. C source:
2504    /// par_cmd at parse.c:958-1085 handles INBRACE → emit WC_CURSH
2505    /// + recurse into list. zshrs's parse_cursh extracts that arm
2506    /// into a dedicated method.
2507    fn parse_cursh(&mut self) -> Option<ZshCommand> {
2508        self.lexer.zshlex(); // skip {
2509        let prog = self.parse_program();
2510
2511        // Check for { ... } always { ... }
2512        if self.lexer.tok == LexTok::Outbrace {
2513            self.lexer.zshlex();
2514
2515            // Check for 'always'
2516            if self.lexer.tok == LexTok::String {
2517                let s = self.lexer.tokstr.as_ref();
2518                if s.map(|s| s == "always").unwrap_or(false) {
2519                    self.lexer.zshlex();
2520                    self.skip_separators();
2521
2522                    if self.lexer.tok == LexTok::Inbrace {
2523                        self.lexer.zshlex();
2524                        let always = self.parse_program();
2525                        if self.lexer.tok == LexTok::Outbrace {
2526                            self.lexer.zshlex();
2527                        }
2528                        return Some(ZshCommand::Try(ZshTry {
2529                            try_block: Box::new(prog),
2530                            always: Box::new(always),
2531                        }));
2532                    }
2533                }
2534            }
2535        }
2536
2537        Some(ZshCommand::Cursh(Box::new(prog)))
2538    }
2539
2540    /// Parse function definition
2541    /// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
2542    /// port of zsh/Src/parse.c:1672-1785 `par_funcdef`. zsh handles
2543    /// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
2544    /// the optional `[fname1 fname2 ...]` for multi-name function defs,
2545    /// and the `function FOO () { ... }` traditional/POSIX hybrid form.
2546    fn parse_funcdef(&mut self) -> Option<ZshCommand> {
2547        self.lexer.zshlex(); // skip 'function'
2548
2549        let mut names = Vec::new();
2550        let mut tracing = false;
2551
2552        // Handle options like -T and function names
2553        loop {
2554            match self.lexer.tok {
2555                LexTok::String => {
2556                    let s = self.lexer.tokstr.as_ref()?;
2557                    if s.starts_with('-') {
2558                        if s.contains('T') {
2559                            tracing = true;
2560                        }
2561                        self.lexer.zshlex();
2562                        continue;
2563                    }
2564                    names.push(s.clone());
2565                    self.lexer.zshlex();
2566                }
2567                LexTok::Inbrace | LexTok::Inoutpar | LexTok::Seper | LexTok::Newlin => break,
2568                _ => break,
2569            }
2570        }
2571
2572        // Optional ()
2573        let saw_paren = self.lexer.tok == LexTok::Inoutpar;
2574        if saw_paren {
2575            self.lexer.zshlex();
2576        }
2577
2578        self.skip_separators();
2579
2580        // Parse body
2581        if self.lexer.tok == LexTok::Inbrace {
2582            // Capture body_start BEFORE the lexer advances past the
2583            // first body token. After the previous zshlex consumed
2584            // `{`, lexer.pos points just past `{` (which is where the
2585            // body source starts). The next `zshlex()` would advance
2586            // past the first token (`echo`), making body_start land
2587            // mid-body and lose the first word — `typeset -f f` would
2588            // print `a; echo b` for `{ echo a; echo b }`.
2589            let body_start = self.lexer.pos;
2590            self.lexer.zshlex();
2591            let body = self.parse_program();
2592            let body_end = if self.lexer.tok == LexTok::Outbrace {
2593                // Lexer has just consumed `}`; pos is past it. Body content
2594                // ends one byte before pos.
2595                self.lexer.pos.saturating_sub(1)
2596            } else {
2597                self.lexer.pos
2598            };
2599            let body_source = self
2600                .lexer
2601                .input
2602                .get(body_start..body_end)
2603                .map(|s| s.trim().to_string())
2604                .filter(|s| !s.is_empty());
2605            if self.lexer.tok == LexTok::Outbrace {
2606                self.lexer.zshlex();
2607            }
2608
2609            // Anonymous form `function () { body } a b c` (with `()`) or
2610            // `function { body } a b c` (zsh-only shorthand, no `()`). No
2611            // name was collected. Mirror parse_anon_funcdef: synthesize
2612            // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2613            // so compile_funcdef registers + immediately calls the
2614            // function with the args as positional params.
2615            if names.is_empty() {
2616                let mut args = Vec::new();
2617                while self.lexer.tok == LexTok::String {
2618                    if let Some(s) = self.lexer.tokstr.clone() {
2619                        args.push(s);
2620                    }
2621                    self.lexer.zshlex();
2622                }
2623                use std::sync::atomic::{AtomicUsize, Ordering};
2624                static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2625                let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2626                let name = format!("_zshrs_anon_kw_{}", n);
2627                return Some(ZshCommand::FuncDef(ZshFuncDef {
2628                    names: vec![name],
2629                    body: Box::new(body),
2630                    tracing,
2631                    auto_call_args: Some(args),
2632                    body_source,
2633                }));
2634            }
2635
2636            Some(ZshCommand::FuncDef(ZshFuncDef {
2637                names,
2638                body: Box::new(body),
2639                tracing,
2640                auto_call_args: None,
2641                body_source,
2642            }))
2643        } else {
2644            // Short form
2645            self.parse_list().map(|list| {
2646                ZshCommand::FuncDef(ZshFuncDef {
2647                    names,
2648                    body: Box::new(ZshProgram { lists: vec![list] }),
2649                    tracing,
2650                    auto_call_args: None,
2651                    body_source: None,
2652                })
2653            })
2654        }
2655    }
2656
2657    /// Parse inline function definition: name() { ... }
2658    /// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
2659    /// without the `function` keyword). The name has already been
2660    /// consumed and pushed by parse_simple before this method fires.
2661    /// C source: handled inline in par_simple's INOUTPAR-after-name
2662    /// arm (parse.c:1836-2228).
2663    fn parse_inline_funcdef(&mut self, name: String) -> Option<ZshCommand> {
2664        // Skip ()
2665        if self.lexer.tok == LexTok::Inoutpar {
2666            self.lexer.zshlex();
2667        }
2668
2669        self.skip_separators();
2670
2671        // Parse body
2672        if self.lexer.tok == LexTok::Inbrace {
2673            // Same body_start-before-zshlex fix as parse_funcdef.
2674            let body_start = self.lexer.pos;
2675            self.lexer.zshlex();
2676            let body = self.parse_program();
2677            let body_end = if self.lexer.tok == LexTok::Outbrace {
2678                self.lexer.pos.saturating_sub(1)
2679            } else {
2680                self.lexer.pos
2681            };
2682            let body_source = self
2683                .lexer
2684                .input
2685                .get(body_start..body_end)
2686                .map(|s| s.trim().to_string())
2687                .filter(|s| !s.is_empty());
2688            if self.lexer.tok == LexTok::Outbrace {
2689                self.lexer.zshlex();
2690            }
2691            Some(ZshCommand::FuncDef(ZshFuncDef {
2692                names: vec![name],
2693                body: Box::new(body),
2694                tracing: false,
2695                auto_call_args: None,
2696                body_source,
2697            }))
2698        } else {
2699            match self.parse_cmd() {
2700                Some(cmd) => {
2701                    let list = ZshList {
2702                        sublist: ZshSublist {
2703                            pipe: ZshPipe {
2704                                cmd,
2705                                next: None,
2706                                lineno: self.lexer.lineno,
2707                                merge_stderr: false,
2708                            },
2709                            next: None,
2710                            flags: SublistFlags::default(),
2711                        },
2712                        flags: ListFlags::default(),
2713                    };
2714                    Some(ZshCommand::FuncDef(ZshFuncDef {
2715                        names: vec![name],
2716                        body: Box::new(ZshProgram { lists: vec![list] }),
2717                        tracing: false,
2718                        auto_call_args: None,
2719                        body_source: None,
2720                    }))
2721                }
2722                None => None,
2723            }
2724        }
2725    }
2726
2727    /// Parse [[ ... ]] conditional
2728    /// Parse `[[ EXPR ]]` conditional expression. Direct port of
2729    /// zsh/Src/parse.c:2409-2731 `par_cond` (and helpers par_cond_1,
2730    /// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2731    /// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2732    /// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2733    /// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2734    fn parse_cond(&mut self) -> Option<ZshCommand> {
2735        self.lexer.zshlex(); // skip [[
2736                             // Empty cond `[[ ]]` is a parse error in zsh — emit the
2737                             // diagnostic and return None so the caller produces a
2738                             // non-zero exit. Without this, `[[ ]]` silently passed and
2739                             // returned exit 0.
2740        if self.lexer.tok == LexTok::Doutbrack {
2741            self.error("parse error near `]]'");
2742            self.lexer.zshlex();
2743            return None;
2744        }
2745        let cond = self.parse_cond_expr();
2746
2747        if self.lexer.tok == LexTok::Doutbrack {
2748            self.lexer.zshlex();
2749        }
2750
2751        cond.map(ZshCommand::Cond)
2752    }
2753
2754    /// Parse conditional expression
2755    /// Top of `[[ ]]` cond-expression parsing — entry to recursive
2756    /// descent (or → and → not → primary). Direct port of zsh's
2757    /// par_cond_1 at parse.c:2434-2475.
2758    fn parse_cond_expr(&mut self) -> Option<ZshCond> {
2759        self.parse_cond_or()
2760    }
2761
2762    /// Cond-expression `||` level. C: inside par_cond_1 at
2763    /// parse.c:2434-2475 (the `cond_or` ladder).
2764    fn parse_cond_or(&mut self) -> Option<ZshCond> {
2765        self.recursion_depth += 1;
2766        if self.check_recursion() {
2767            self.error("parse_cond_or: max recursion depth exceeded");
2768            self.recursion_depth -= 1;
2769            return None;
2770        }
2771
2772        let left = match self.parse_cond_and() {
2773            Some(l) => l,
2774            None => {
2775                self.recursion_depth -= 1;
2776                return None;
2777            }
2778        };
2779
2780        self.skip_cond_separators();
2781
2782        let result = if self.lexer.tok == LexTok::Dbar {
2783            self.lexer.zshlex();
2784            self.skip_cond_separators();
2785            self.parse_cond_or()
2786                .map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
2787        } else {
2788            Some(left)
2789        };
2790
2791        self.recursion_depth -= 1;
2792        result
2793    }
2794
2795    /// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
2796    fn parse_cond_and(&mut self) -> Option<ZshCond> {
2797        self.recursion_depth += 1;
2798        if self.check_recursion() {
2799            self.error("parse_cond_and: max recursion depth exceeded");
2800            self.recursion_depth -= 1;
2801            return None;
2802        }
2803
2804        let left = match self.parse_cond_not() {
2805            Some(l) => l,
2806            None => {
2807                self.recursion_depth -= 1;
2808                return None;
2809            }
2810        };
2811
2812        self.skip_cond_separators();
2813
2814        let result = if self.lexer.tok == LexTok::Damper {
2815            self.lexer.zshlex();
2816            self.skip_cond_separators();
2817            self.parse_cond_and()
2818                .map(|right| ZshCond::And(Box::new(left), Box::new(right)))
2819        } else {
2820            Some(left)
2821        };
2822
2823        self.recursion_depth -= 1;
2824        result
2825    }
2826
2827    /// Cond-expression `!` negation level. C: handled inside
2828    /// par_cond_2 at parse.c:2476-2625 via the BANG token check.
2829    fn parse_cond_not(&mut self) -> Option<ZshCond> {
2830        self.recursion_depth += 1;
2831        if self.check_recursion() {
2832            self.error("parse_cond_not: max recursion depth exceeded");
2833            self.recursion_depth -= 1;
2834            return None;
2835        }
2836
2837        self.skip_cond_separators();
2838
2839        // ! can be either LexTok::Bang or String "!"
2840        let is_not = self.lexer.tok == LexTok::Bang
2841            || (self.lexer.tok == LexTok::String
2842                && self
2843                    .lexer
2844                    .tokstr
2845                    .as_ref()
2846                    .map(|s| s == "!")
2847                    .unwrap_or(false));
2848        if is_not {
2849            self.lexer.zshlex();
2850            let inner = match self.parse_cond_not() {
2851                Some(i) => i,
2852                None => {
2853                    self.recursion_depth -= 1;
2854                    return None;
2855                }
2856            };
2857            self.recursion_depth -= 1;
2858            return Some(ZshCond::Not(Box::new(inner)));
2859        }
2860
2861        if self.lexer.tok == LexTok::Inpar {
2862            self.lexer.zshlex();
2863            self.skip_cond_separators();
2864            let inner = match self.parse_cond_expr() {
2865                Some(i) => i,
2866                None => {
2867                    self.recursion_depth -= 1;
2868                    return None;
2869                }
2870            };
2871            self.skip_cond_separators();
2872            if self.lexer.tok == LexTok::Outpar {
2873                self.lexer.zshlex();
2874            }
2875            self.recursion_depth -= 1;
2876            return Some(inner);
2877        }
2878
2879        let result = self.parse_cond_primary();
2880        self.recursion_depth -= 1;
2881        result
2882    }
2883
2884    /// Cond-expression primary: unary tests (-f, -d, ...), binary
2885    /// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
2886    /// sub-expressions. Direct port of par_cond_double / par_cond_triple
2887    /// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
2888    fn parse_cond_primary(&mut self) -> Option<ZshCond> {
2889        let s1 = match self.lexer.tok {
2890            LexTok::String => {
2891                let s = self.lexer.tokstr.clone().unwrap_or_default();
2892                self.lexer.zshlex();
2893                s
2894            }
2895            _ => return None,
2896        };
2897
2898        self.skip_cond_separators();
2899
2900        // Check for unary operator
2901        if s1.starts_with('-') && s1.len() == 2 {
2902            let s2 = match self.lexer.tok {
2903                LexTok::String => {
2904                    let s = self.lexer.tokstr.clone().unwrap_or_default();
2905                    self.lexer.zshlex();
2906                    s
2907                }
2908                _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2909            };
2910            return Some(ZshCond::Unary(s1, s2));
2911        }
2912
2913        // Check for binary operator
2914        let op = match self.lexer.tok {
2915            LexTok::String => {
2916                let s = self.lexer.tokstr.clone().unwrap_or_default();
2917                self.lexer.zshlex();
2918                s
2919            }
2920            LexTok::Inang => {
2921                self.lexer.zshlex();
2922                "<".to_string()
2923            }
2924            LexTok::Outang => {
2925                self.lexer.zshlex();
2926                ">".to_string()
2927            }
2928            _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2929        };
2930
2931        self.skip_cond_separators();
2932
2933        let s2 = match self.lexer.tok {
2934            LexTok::String => {
2935                let s = self.lexer.tokstr.clone().unwrap_or_default();
2936                self.lexer.zshlex();
2937                s
2938            }
2939            _ => return Some(ZshCond::Binary(s1, op, String::new())),
2940        };
2941
2942        if op == "=~" {
2943            Some(ZshCond::Regex(s1, s2))
2944        } else {
2945            Some(ZshCond::Binary(s1, op, s2))
2946        }
2947    }
2948
2949    fn skip_cond_separators(&mut self) {
2950        while self.lexer.tok == LexTok::Seper && {
2951            let s = self.lexer.tokstr.as_ref();
2952            s.map(|s| !s.contains(';')).unwrap_or(true)
2953        } {
2954            self.lexer.zshlex();
2955        }
2956    }
2957
2958    /// Parse (( ... )) arithmetic command
2959    /// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
2960    /// `par_dinbrack` (despite the name; the function actually handles
2961    /// DINPAR `(( ))` blocks too).
2962    fn parse_arith(&mut self) -> Option<ZshCommand> {
2963        let expr = self.lexer.tokstr.clone().unwrap_or_default();
2964        self.lexer.zshlex();
2965        Some(ZshCommand::Arith(expr))
2966    }
2967
2968    /// Parse time command
2969    /// Parse `time CMD` (POSIX time keyword). Direct port of
2970    /// zsh/Src/parse.c:1787-1808 `par_time`. The `time` keyword
2971    /// times the execution of the following pipeline / cmd.
2972    fn parse_time(&mut self) -> Option<ZshCommand> {
2973        self.lexer.zshlex(); // skip 'time'
2974
2975        // Check if there's a pipeline to time
2976        if self.lexer.tok == LexTok::Seper
2977            || self.lexer.tok == LexTok::Newlin
2978            || self.lexer.tok == LexTok::Endinput
2979        {
2980            Some(ZshCommand::Time(None))
2981        } else {
2982            let sublist = self.parse_sublist();
2983            Some(ZshCommand::Time(sublist.map(Box::new)))
2984        }
2985    }
2986
2987    /// Check if next token is ()
2988    fn peek_inoutpar(&mut self) -> bool {
2989        self.lexer.tok == LexTok::Inoutpar
2990    }
2991
2992    /// Skip separator tokens
2993    fn skip_separators(&mut self) {
2994        let mut iterations = 0;
2995        while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
2996            iterations += 1;
2997            if iterations > 100_000 {
2998                self.error("skip_separators: too many iterations");
2999                return;
3000            }
3001            self.lexer.zshlex();
3002        }
3003    }
3004
3005    /// Record an error
3006    fn error(&mut self, msg: &str) {
3007        self.errors.push(ParseError {
3008            message: msg.to_string(),
3009            line: self.lexer.lineno,
3010        });
3011    }
3012}
3013
3014#[cfg(test)]
3015mod tests {
3016    use super::*;
3017
3018    fn parse(input: &str) -> Result<ZshProgram, Vec<ParseError>> {
3019        let mut parser = ZshParser::new(input);
3020        parser.parse()
3021    }
3022
3023    #[test]
3024    fn test_simple_command() {
3025        let prog = parse("echo hello world").unwrap();
3026        assert_eq!(prog.lists.len(), 1);
3027        match &prog.lists[0].sublist.pipe.cmd {
3028            ZshCommand::Simple(s) => {
3029                assert_eq!(s.words, vec!["echo", "hello", "world"]);
3030            }
3031            _ => panic!("expected simple command"),
3032        }
3033    }
3034
3035    #[test]
3036    fn test_pipeline() {
3037        let prog = parse("ls | grep foo | wc -l").unwrap();
3038        assert_eq!(prog.lists.len(), 1);
3039
3040        let pipe = &prog.lists[0].sublist.pipe;
3041        assert!(pipe.next.is_some());
3042
3043        let pipe2 = pipe.next.as_ref().unwrap();
3044        assert!(pipe2.next.is_some());
3045    }
3046
3047    #[test]
3048    fn test_and_or() {
3049        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
3050        let sublist = &prog.lists[0].sublist;
3051
3052        assert!(sublist.next.is_some());
3053        let (op, _) = sublist.next.as_ref().unwrap();
3054        assert_eq!(*op, SublistOp::And);
3055    }
3056
3057    #[test]
3058    fn test_if_then() {
3059        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
3060        match &prog.lists[0].sublist.pipe.cmd {
3061            ZshCommand::If(_) => {}
3062            _ => panic!("expected if command"),
3063        }
3064    }
3065
3066    #[test]
3067    fn test_for_loop() {
3068        let prog = parse("for i in a b c; do echo $i; done").unwrap();
3069        match &prog.lists[0].sublist.pipe.cmd {
3070            ZshCommand::For(f) => {
3071                assert_eq!(f.var, "i");
3072                match &f.list {
3073                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
3074                    _ => panic!("expected word list"),
3075                }
3076            }
3077            _ => panic!("expected for command"),
3078        }
3079    }
3080
3081    #[test]
3082    fn test_case() {
3083        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
3084        match &prog.lists[0].sublist.pipe.cmd {
3085            ZshCommand::Case(c) => {
3086                assert_eq!(c.arms.len(), 2);
3087            }
3088            _ => panic!("expected case command"),
3089        }
3090    }
3091
3092    #[test]
3093    fn test_function() {
3094        // First test just parsing "function foo" to see what happens
3095        let prog = parse("function foo { }").unwrap();
3096        match &prog.lists[0].sublist.pipe.cmd {
3097            ZshCommand::FuncDef(f) => {
3098                assert_eq!(f.names, vec!["foo"]);
3099            }
3100            _ => panic!(
3101                "expected function, got {:?}",
3102                prog.lists[0].sublist.pipe.cmd
3103            ),
3104        }
3105    }
3106
3107    #[test]
3108    fn test_redirection() {
3109        let prog = parse("echo hello > file.txt").unwrap();
3110        match &prog.lists[0].sublist.pipe.cmd {
3111            ZshCommand::Simple(s) => {
3112                assert_eq!(s.redirs.len(), 1);
3113                assert_eq!(s.redirs[0].rtype, RedirType::Write);
3114            }
3115            _ => panic!("expected simple command"),
3116        }
3117    }
3118
3119    #[test]
3120    fn test_assignment() {
3121        let prog = parse("FOO=bar echo $FOO").unwrap();
3122        match &prog.lists[0].sublist.pipe.cmd {
3123            ZshCommand::Simple(s) => {
3124                assert_eq!(s.assigns.len(), 1);
3125                assert_eq!(s.assigns[0].name, "FOO");
3126            }
3127            _ => panic!("expected simple command"),
3128        }
3129    }
3130
3131    #[test]
3132    fn test_parse_completion_function() {
3133        let input = r#"_2to3_fixes() {
3134  local -a fixes
3135  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3136  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3137}"#;
3138        let result = parse(input);
3139        assert!(
3140            result.is_ok(),
3141            "Failed to parse completion function: {:?}",
3142            result.err()
3143        );
3144        let prog = result.unwrap();
3145        assert!(
3146            !prog.lists.is_empty(),
3147            "Expected at least one list in program"
3148        );
3149    }
3150
3151    #[test]
3152    fn test_parse_array_with_complex_elements() {
3153        let input = r#"arguments=(
3154  '(- * :)'{-h,--help}'[show this help message and exit]'
3155  {-d,--doctests_only}'[fix up doctests only]'
3156  '*:filename:_files'
3157)"#;
3158        let result = parse(input);
3159        assert!(
3160            result.is_ok(),
3161            "Failed to parse array assignment: {:?}",
3162            result.err()
3163        );
3164    }
3165
3166    #[test]
3167    fn test_parse_full_completion_file() {
3168        let input = r##"#compdef 2to3
3169
3170# zsh completions for '2to3'
3171
3172_2to3_fixes() {
3173  local -a fixes
3174  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3175  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3176}
3177
3178local -a arguments
3179
3180arguments=(
3181  '(- * :)'{-h,--help}'[show this help message and exit]'
3182  {-d,--doctests_only}'[fix up doctests only]'
3183  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
3184  {-j,--processes}'[run 2to3 concurrently]:number: '
3185  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
3186  {-l,--list-fixes}'[list available transformations]'
3187  {-p,--print-function}'[modify the grammar so that print() is a function]'
3188  {-v,--verbose}'[more verbose logging]'
3189  '--no-diffs[do not show diffs of the refactoring]'
3190  {-w,--write}'[write back modified files]'
3191  {-n,--nobackups}'[do not write backups for modified files]'
3192  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
3193  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
3194  '--add-suffix[append this string to all output filenames]:suffix: '
3195  '*:filename:_files'
3196)
3197
3198_arguments -s -S $arguments
3199"##;
3200        let result = parse(input);
3201        assert!(
3202            result.is_ok(),
3203            "Failed to parse full completion file: {:?}",
3204            result.err()
3205        );
3206        let prog = result.unwrap();
3207        // Should have parsed successfully with at least one statement
3208        assert!(!prog.lists.is_empty(), "Expected at least one list");
3209    }
3210
3211    #[test]
3212    fn test_parse_logs_sh() {
3213        let input = r#"#!/usr/bin/env bash
3214shopt -s globstar
3215
3216if [[ $(uname) == Darwin ]]; then
3217    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
3218else
3219    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
3220        tail -f /var/log/**/*.log | lolcat
3221    else
3222        printf "Unsupported...\n" >&2
3223    fi
3224fi
3225"#;
3226        let result = parse(input);
3227        assert!(
3228            result.is_ok(),
3229            "Failed to parse logs.sh: {:?}",
3230            result.err()
3231        );
3232    }
3233
3234    #[test]
3235    fn test_parse_case_with_glob() {
3236        let input = r#"case "$ZPWR_OS_TYPE" in
3237    darwin*)  open_cmd='open'
3238      ;;
3239    cygwin*)  open_cmd='cygstart'
3240      ;;
3241    linux*)
3242        open_cmd='xdg-open'
3243      ;;
3244esac"#;
3245        let result = parse(input);
3246        assert!(
3247            result.is_ok(),
3248            "Failed to parse case with glob: {:?}",
3249            result.err()
3250        );
3251    }
3252
3253    #[test]
3254    fn test_parse_case_with_nested_if() {
3255        // Test case with nested if and glob patterns
3256        let input = r##"function zpwrGetOpenCommand(){
3257    local open_cmd
3258    case "$ZPWR_OS_TYPE" in
3259        darwin*)  open_cmd='open' ;;
3260        cygwin*)  open_cmd='cygstart' ;;
3261        linux*)
3262            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
3263                open_cmd='nohup xdg-open'
3264            fi
3265            ;;
3266    esac
3267}"##;
3268        let result = parse(input);
3269        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
3270    }
3271
3272    #[test]
3273    fn test_parse_zpwr_scripts() {
3274        use std::fs;
3275        use std::path::Path;
3276        use std::sync::mpsc;
3277        use std::thread;
3278        use std::time::{Duration, Instant};
3279
3280        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
3281        if !scripts_dir.exists() {
3282            eprintln!("Skipping test: scripts directory not found");
3283            return;
3284        }
3285
3286        let mut total = 0;
3287        let mut passed = 0;
3288        let mut failed_files = Vec::new();
3289        let mut timeout_files = Vec::new();
3290
3291        for ext in &["sh", "zsh"] {
3292            let pattern = scripts_dir.join(format!("*.{}", ext));
3293            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
3294                for entry in entries.flatten() {
3295                    total += 1;
3296                    let file_path = entry.display().to_string();
3297                    let content = match fs::read_to_string(&entry) {
3298                        Ok(c) => c,
3299                        Err(e) => {
3300                            failed_files.push((file_path, format!("read error: {}", e)));
3301                            continue;
3302                        }
3303                    };
3304
3305                    // Parse with timeout
3306                    let content_clone = content.clone();
3307                    let (tx, rx) = mpsc::channel();
3308                    let handle = thread::spawn(move || {
3309                        let result = parse(&content_clone);
3310                        let _ = tx.send(result);
3311                    });
3312
3313                    match rx.recv_timeout(Duration::from_secs(2)) {
3314                        Ok(Ok(_)) => passed += 1,
3315                        Ok(Err(errors)) => {
3316                            let first_err = errors
3317                                .first()
3318                                .map(|e| format!("line {}: {}", e.line, e.message))
3319                                .unwrap_or_default();
3320                            failed_files.push((file_path, first_err));
3321                        }
3322                        Err(_) => {
3323                            timeout_files.push(file_path);
3324                            // Thread will be abandoned
3325                        }
3326                    }
3327                }
3328            }
3329        }
3330
3331        eprintln!("\n=== ZPWR Scripts Parse Results ===");
3332        eprintln!("Passed: {}/{}", passed, total);
3333
3334        if !timeout_files.is_empty() {
3335            eprintln!("\nTimeout files (>2s):");
3336            for file in &timeout_files {
3337                eprintln!("  {}", file);
3338            }
3339        }
3340
3341        if !failed_files.is_empty() {
3342            eprintln!("\nFailed files:");
3343            for (file, err) in &failed_files {
3344                eprintln!("  {} - {}", file, err);
3345            }
3346        }
3347
3348        // Allow some failures initially, but track progress
3349        let pass_rate = if total > 0 {
3350            (passed as f64 / total as f64) * 100.0
3351        } else {
3352            0.0
3353        };
3354        eprintln!("Pass rate: {:.1}%", pass_rate);
3355
3356        // Require at least 50% pass rate for now
3357        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3358    }
3359
3360    #[test]
3361    #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
3362    fn test_parse_zsh_stdlib_functions() {
3363        use std::fs;
3364        use std::path::Path;
3365        use std::sync::mpsc;
3366        use std::thread;
3367        use std::time::Duration;
3368
3369        let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
3370        if !functions_dir.exists() {
3371            eprintln!(
3372                "Skipping test: zsh_functions directory not found at {:?}",
3373                functions_dir
3374            );
3375            return;
3376        }
3377
3378        let mut total = 0;
3379        let mut passed = 0;
3380        let mut failed_files = Vec::new();
3381        let mut timeout_files = Vec::new();
3382
3383        if let Ok(entries) = fs::read_dir(&functions_dir) {
3384            for entry in entries.flatten() {
3385                let path = entry.path();
3386                if !path.is_file() {
3387                    continue;
3388                }
3389
3390                total += 1;
3391                let file_path = path.display().to_string();
3392                let content = match fs::read_to_string(&path) {
3393                    Ok(c) => c,
3394                    Err(e) => {
3395                        failed_files.push((file_path, format!("read error: {}", e)));
3396                        continue;
3397                    }
3398                };
3399
3400                // Parse with timeout
3401                let content_clone = content.clone();
3402                let (tx, rx) = mpsc::channel();
3403                thread::spawn(move || {
3404                    let result = parse(&content_clone);
3405                    let _ = tx.send(result);
3406                });
3407
3408                match rx.recv_timeout(Duration::from_secs(2)) {
3409                    Ok(Ok(_)) => passed += 1,
3410                    Ok(Err(errors)) => {
3411                        let first_err = errors
3412                            .first()
3413                            .map(|e| format!("line {}: {}", e.line, e.message))
3414                            .unwrap_or_default();
3415                        failed_files.push((file_path, first_err));
3416                    }
3417                    Err(_) => {
3418                        timeout_files.push(file_path);
3419                    }
3420                }
3421            }
3422        }
3423
3424        eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
3425        eprintln!("Passed: {}/{}", passed, total);
3426
3427        if !timeout_files.is_empty() {
3428            eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
3429            for file in timeout_files.iter().take(10) {
3430                eprintln!("  {}", file);
3431            }
3432            if timeout_files.len() > 10 {
3433                eprintln!("  ... and {} more", timeout_files.len() - 10);
3434            }
3435        }
3436
3437        if !failed_files.is_empty() {
3438            eprintln!("\nFailed files: {}", failed_files.len());
3439            for (file, err) in failed_files.iter().take(20) {
3440                let filename = Path::new(file)
3441                    .file_name()
3442                    .unwrap_or_default()
3443                    .to_string_lossy();
3444                eprintln!("  {} - {}", filename, err);
3445            }
3446            if failed_files.len() > 20 {
3447                eprintln!("  ... and {} more", failed_files.len() - 20);
3448            }
3449        }
3450
3451        let pass_rate = if total > 0 {
3452            (passed as f64 / total as f64) * 100.0
3453        } else {
3454            0.0
3455        };
3456        eprintln!("Pass rate: {:.1}%", pass_rate);
3457
3458        // Require at least 50% pass rate
3459        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3460    }
3461}
zshrs_parse/parser.rs

zshrs_parse/
parser.rs