Skip to main content

zsh/ported/
parse.rs

1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free fns (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10    lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11    DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12    DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13    FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14    IS_REDIROP, LEXERR, NEWLIN, NOCORRECT, NULLTOK, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15    OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16    STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19    eprog, estate, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang, Inpar,
20    Outang, Outpar, Stringg, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT,
21    COND_OR, COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ,
22    CSHJUNKIELOOPS,
23    EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
24    PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW,
25    REDIR_FROM_HEREDOC_MASK, REDIR_VARID_MASK, REDIR_ERRAPP,
26    REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_HEREDOC, REDIR_HEREDOCDASH,
27    REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE, REDIR_READ,
28    REDIR_READWRITE, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS, SHORTREPEAT, WCB_COND, WCB_SIMPLE,
29    WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE, WC_REDIR_VARID, WC_SUBLIST_COPROC,
30    WC_SUBLIST_NOT,
31};
32use crate::ported::utils::{zerr, zwarnnam};
33use serde::{Deserialize, Serialize};
34use std::fs::File;
35use std::io::{Read, Seek, SeekFrom, Write};
36use std::sync::atomic::{AtomicUsize, Ordering};
37
38/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
39/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
40/// parse.c:447-453 including the conditional cmp chain
41/// (nfunc → hashval → strcmp), so corpus inputs where C's tree-walk
42/// finds-or-misses match get the same outcome on the Rust side.
43struct EccstrNode {
44    left: Option<Box<EccstrNode>>,
45    right: Option<Box<EccstrNode>>,
46    /// C-byte form of the string (single byte per char ≤ 0xff).
47    /// Owned because Rust doesn't have C zsh's "stable pointers into
48    /// the lexer's tokstr arena" — every tokstr lives as a fresh
49    /// Rust String allocation.
50    str: Vec<u8>,
51    /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
52    /// Same shape as `Eccstr::offs` (parse.c:459).
53    offs: u32,
54    /// `nfunc` snapshot at insert time. Per-function namespace key
55    /// — top-level scripts use 0; each funcdef bumps it.
56    nfunc: i32,
57    /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
58    hashval: u32,
59}
60
61// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
62// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
63// thread parsing a separate program needs its own wordcode buffer.
64//
65// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
66// (parse.c:275).
67// ECLEN: allocated entries in ECBUF (parse.c:269).
68// ECUSED: entries actually used so far (parse.c:271).
69// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
70// ECSOFFS / ECSSUB: byte offsets into the deferred string region
71// (parse.c:279). ECSSUB subtracts substring overlap.
72// ECNFUNC: count of functions defined so far (parse.c:285).
73// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
74// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
75// at zsh_h::eccstr but stays unused at runtime here. The HashMap
76// preserves the API contract (lookup by (nfunc, str) → offs) with
77// simpler ownership semantics.
78thread_local! {
79    pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
80    static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
81    static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
82    static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
83    static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
84    static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
85    static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
86    static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
87        = std::cell::RefCell::new(std::collections::HashMap::new());
88    /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
89    /// a hashval-ordered binary search tree of long-strings for
90    /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
91    /// HashMap above is a fast-path lookup; this tree is the
92    /// C-fidelity walker that mirrors C's exact dedup-hit pattern
93    /// (including its quirks for hash-colliding content).
94    static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
95        = const { std::cell::RefCell::new(None) };
96    /// Reverse index for `ecgetstr`: offs → owned string. Populated
97    /// at ecstrcode time so the consumer can recover the string from
98    /// the wordcode offs without walking the encode-time HashMap.
99    /// Stores the METAFIED BYTE form of each long-string, exactly
100    /// matching what C's strs region holds. `String` would not work
101    /// here because Rust strings carry UTF-8-encoded chars (e.g.
102    /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
103    /// `\xc2 \x9b`) while C stores zsh markers as single bytes
104    /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
105    /// what C writes after metafy.
106    pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
107        = std::cell::RefCell::new(std::collections::HashMap::new());
108}
109
110// Direct port of `Src/parse.c:287-289` grow-policy constants.
111const EC_INIT_SIZE: i32 = 256;
112const EC_DOUBLE_THRESHOLD: i32 = 32768;
113const EC_INCREMENT: i32 = 1024;
114
115// Parser recursion + iteration safety counters as file-scope
116// thread_locals (Rust-only — no C analog; C uses OS stack overflow).
117thread_local! {
118    pub static PARSER_RECURSION_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
119    pub static PARSER_GLOBAL_ITERATIONS: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
120}
121
122// =============================================================================
123// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
124// to walk a compiled Eprog without re-running the parser. These are the
125// only `Src/parse.c` functions ported so far in this file; the recursive-
126// descent parser (par_event / par_list / par_cmd / par_*) follows
127// below as free fns at module scope.
128// =============================================================================
129
130/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
131/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
132/// string pool. Returns the interned string (or a 1-3-char literal
133/// inlined directly into the wordcode word).
134pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
135    let prog = &s.prog.prog;
136    if s.pc >= prog.len() {
137        return String::new();
138    }
139    let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
140    s.pc += 1;
141    if let Some(tf) = tokflag {
142        *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
143    }
144    if c == 6 || c == 7 {
145        // c:2861 `if (c == 6 || c == 7) r = "";`
146        return String::new();
147    }
148    let r: String = if (c & 2) != 0 {
149        // c:2862 `else if (c & 2)`
150        // c:2863-2866 — 3-byte inline string packed into the wordcode word.
151        let b0 = ((c >> 3) & 0xff) as u8;
152        let b1 = ((c >> 11) & 0xff) as u8;
153        let b2 = ((c >> 19) & 0xff) as u8;
154        let mut v = vec![b0, b1, b2];
155        v.retain(|&x| x != 0);
156        String::from_utf8_lossy(&v).into_owned()
157    } else {
158        // c:2877 `else r = s->strs + (c >> 2);`
159        let off = (c >> 2) as usize + s.strs_offset;
160        let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
161        if off >= strs_bytes.len() {
162            String::new()
163        } else {
164            let tail = &strs_bytes[off..];
165            let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
166            String::from_utf8_lossy(&tail[..end]).into_owned()
167        }
168    };
169    // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
170    // Rust owns the String already; `dup` flag has no observable effect.
171    let _ = (dup, EC_DUP, EC_NODUP);
172    r
173}
174
175/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
176///
177/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
178/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
179pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
180    let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
181    let prog_len = s.prog.prog.len();
182    if s.pc >= prog_len {
183        return ret;
184    }
185    let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
186    s.pc += 1;
187
188    loop {
189        if wc_code(code) != WC_REDIR {
190            // c:2988-2989 `s->pc--` then break from while
191            s.pc = s.pc.saturating_sub(1);
192            break;
193        }
194
195        let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
196        if s.pc >= prog_len {
197            break;
198        }
199        let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
200        s.pc += 1;
201
202        let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
203
204        let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
205            // c:2970-2973
206            let term = ecgetstr(s, EC_DUP, None);
207            let munged = ecgetstr(s, EC_DUP, None);
208            (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
209        } else {
210            // c:2974-2977
211            (0, None, None)
212        };
213
214        let varid = if WC_REDIR_VARID(code) != 0 {
215            // c:2979-2980
216            Some(ecgetstr(s, EC_DUP, None))
217        } else {
218            None // c:2981-2982
219        };
220
221        ret.push(redir {
222            // c:2965-2982 fields + c:2984 `addlinknode`
223            typ,
224            flags,
225            fd1: fd1_w as i32,
226            fd2: 0,
227            name: Some(name),
228            varid,
229            here_terminator,
230            munged_here_terminator,
231        });
232
233        if s.pc >= prog_len {
234            break;
235        }
236        code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
237        s.pc += 1;
238    }
239
240    ret // c:2990 `return ret`
241}
242
243// === AST tree relocated to src/extensions/zsh_ast.rs ===
244//
245// zsh C does NOT have an AST tree — it emits wordcode directly via
246// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
247// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
248// Shell* AST node types lived in this file as a Rust-only IR that
249// stands in for that wordcode.
250//
251// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
252// to make their Rust-only-extension nature explicit. The full P9c +
253// P9d rewrite (par_* emitting wordcode + exec.rs reading wordcode)
254// retires them entirely — until then, callers reach them via this
255// re-export.
256pub use crate::heredoc_ast::HereDoc;
257pub use crate::zsh_ast::{
258    CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
259    Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
260    VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
261    ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
262    ZshTry, ZshWhile,
263};
264use crate::ported::lex::{
265    heredocs_clear, heredocs_clone, heredocs_is_empty, heredocs_len, heredocs_push, heredocs_set,
266    heredocs_take, incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset,
267    isnewlin, lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond,
268    set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_noaliases,
269    set_nocorrect, set_pos, set_tokfd, set_toklineno, set_tokstr, tok, tokfd, toklineno, tokstr,
270    tokstr_eq, tokstr_is_none, tokstr_is_some, tokstr_take, zshlex,
271};
272use crate::prompt::{cmdpop, cmdpush};
273use crate::zsh_h::{
274    wc_bdata, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF, CS_ELSE,
275    CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT, CS_SELECT,
276    CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH, WCB_END,
277    WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
278    WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY, WC_ASSIGN_INC,
279    WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR, WC_CASE_TESTAND,
280    WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_HEAD, WC_IF_IF, WC_PIPE_END, WC_PIPE_LINENO,
281    WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST, WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END,
282    WC_SUBLIST_FLAGS, WC_SUBLIST_OR, WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY,
283    WC_TIMED_PIPE, WC_WHILE_UNTIL, WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
284};
285// === end AST relocation ===
286
287// Parser state lives in file-scope thread_locals:
288//   - LEX_* (lexer side, matching Src/lex.c file-statics)
289//   - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
290//     ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
291//     Src/parse.c file-statics)
292//   - PARSER_RECURSION_DEPTH / PARSER_GLOBAL_ITERATIONS (Rust-only
293//     safety counters; no C analog — C relies on OS stack overflow).
294//
295// Callers use the free-fn entry points directly:
296//   crate::ported::parse::parse_init(input);
297//   let prog = crate::ported::parse::parse();
298
299const MAX_RECURSION_DEPTH: usize = 500;
300
301/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
302/// Used by `parse_context_save` / `parse_context_restore`
303/// (parse.c:295-355) to snapshot per-parse-call state so a nested
304/// parse (e.g. inside command substitution) doesn't clobber the
305/// outer parse.
306///
307/// A second port of `struct parse_stack` exists at
308/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
309/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
310/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
311/// wires wordcode emission. This local version uses the working-set
312/// shapes (Vec<HereDoc>, stubbed wordcode fields) suited to zshrs's
313/// pre-wordcode AST architecture; the consolidation happens in P9b.
314#[allow(non_camel_case_types)]
315#[derive(Debug, Default, Clone)]
316pub struct parse_stack {
317    // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
318    /// Pending heredocs awaiting body collection. C: `struct heredocs
319    /// *hdocs` (zsh.h:3100). zshrs uses Vec<HereDoc> until Phase 9b
320    /// (PORT_PLAN.md) reinstates C's linked-list shape.
321    pub hdocs: Vec<HereDoc>,
322    /// C: `int incmdpos` (zsh.h:3102).
323    pub incmdpos: bool,
324    /// C: `int aliasspaceflag` (zsh.h:3103).
325    pub aliasspaceflag: i32,
326    /// C: `int incond` (zsh.h:3104).
327    pub incond: i32,
328    /// C: `int inredir` (zsh.h:3105).
329    pub inredir: bool,
330    /// C: `int incasepat` (zsh.h:3106).
331    pub incasepat: i32,
332    /// C: `int isnewlin` (zsh.h:3107).
333    pub isnewlin: i32,
334    /// C: `int infor` (zsh.h:3108).
335    pub infor: i32,
336    /// C: `int inrepeat_` (zsh.h:3109).
337    pub inrepeat_: i32,
338    /// C: `int intypeset` (zsh.h:3110).
339    pub intypeset: bool,
340    // ── Wordcode-buffer state — STUB until Phase 9b ──
341    // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
342    // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
343    // zshrs hasn't emitted wordcode yet — these fields exist to
344    // preserve the C shape but read/write nothing until P9b lands.
345    pub eclen: i32,
346    pub ecused: i32,
347    pub ecnpats: i32,
348    pub ecbuf: Option<Vec<u32>>,
349    pub ecstrs: Option<Vec<u8>>,
350    pub ecsoffs: i32,
351    pub ecssub: i32,
352    pub ecnfunc: i32,
353    // P8: Rust-only safety counters (recursion_depth, global_iterations)
354    // migrated to PARSER_RECURSION_DEPTH + PARSER_GLOBAL_ITERATIONS
355    // thread_locals. parse_stack no longer carries them — matches C
356    // exactly (C's struct parse_stack has no analog).
357}
358
359// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
360// existing call sites (context.rs) keep resolving until the
361// rename ripples through.
362#[allow(non_camel_case_types)]
363pub type ParseStack = parse_stack;
364
365/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
366/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
367/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
368/// during scanning (in source order).
369fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
370    for list in &mut prog.lists {
371        fill_in_sublist(&mut list.sublist, bodies);
372    }
373}
374
375fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
376    fill_in_pipe(&mut sub.pipe, bodies);
377    if let Some(next) = &mut sub.next {
378        fill_in_sublist(&mut next.1, bodies);
379    }
380}
381
382fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
383    fill_in_command(&mut pipe.cmd, bodies);
384    if let Some(next) = &mut pipe.next {
385        fill_in_pipe(next, bodies);
386    }
387}
388
389fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
390    match cmd {
391        ZshCommand::Simple(s) => {
392            for r in &mut s.redirs {
393                resolve_redir(r, bodies);
394            }
395        }
396        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
397        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
398        ZshCommand::If(i) => {
399            fill_heredoc_bodies(&mut i.cond, bodies);
400            fill_heredoc_bodies(&mut i.then, bodies);
401            for (c, b) in &mut i.elif {
402                fill_heredoc_bodies(c, bodies);
403                fill_heredoc_bodies(b, bodies);
404            }
405            if let Some(e) = &mut i.else_ {
406                fill_heredoc_bodies(e, bodies);
407            }
408        }
409        ZshCommand::While(w) | ZshCommand::Until(w) => {
410            fill_heredoc_bodies(&mut w.cond, bodies);
411            fill_heredoc_bodies(&mut w.body, bodies);
412        }
413        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
414        ZshCommand::Case(c) => {
415            for arm in &mut c.arms {
416                fill_heredoc_bodies(&mut arm.body, bodies);
417            }
418        }
419        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
420        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
421        ZshCommand::Try(t) => {
422            fill_heredoc_bodies(&mut t.try_block, bodies);
423            fill_heredoc_bodies(&mut t.always, bodies);
424        }
425        ZshCommand::Redirected(inner, redirs) => {
426            for r in redirs {
427                resolve_redir(r, bodies);
428            }
429            fill_in_command(inner, bodies);
430        }
431        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
432    }
433}
434
435fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
436    if let Some(idx) = r.heredoc_idx {
437        if let Some(info) = bodies.get(idx) {
438            r.heredoc = Some(info.clone());
439        }
440    }
441}
442
443/// If `list` is a Simple containing one word that ends in the
444/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
445/// return the bare name. Used by `parse_program_until` to detect
446/// `name() {body}` style function definitions where the lexer
447/// hasn't split the `()` from the name.
448/// Detect the `name() …` shape inside a Simple. Returns the function
449/// name and (when the body was already inlined into the same Simple,
450/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
451/// Returns None for non-funcdef shapes.
452fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
453    if list.flags.async_ || list.sublist.next.is_some() {
454        return None;
455    }
456    let pipe = &list.sublist.pipe;
457    if pipe.next.is_some() {
458        return None;
459    }
460    let simple = match &pipe.cmd {
461        ZshCommand::Simple(s) => s,
462        _ => return None,
463    };
464    if simple.words.is_empty() || !simple.assigns.is_empty() {
465        return None;
466    }
467    let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
468                                 // Find the FIRST word ending in `()`. zsh accepts the
469                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
470                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
471                                 // words[i] is `lastname()`. Words after are the body argv
472                                 // (one-line shorthand, `name() cmd args`).
473    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
474    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
475    for w in &simple.words[..par_idx] {
476        // Earlier names must be bare identifiers, NOT contain
477        // tokens that imply they're not function names (no `()`,
478        // no quotes, no expansions). zsh's lexer enforces this
479        // at the wordlist level; we approximate by requiring the
480        // word be an identifier-shaped token after untokenize.
481        let bare = super::lex::untokenize(w);
482        let valid = !bare.is_empty()
483            && bare
484                .chars()
485                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
486        if !valid {
487            return None;
488        }
489        names.push(bare);
490    }
491    let last = &simple.words[par_idx];
492    let bare = &last[..last.len() - suffix.len()];
493    if bare.is_empty() {
494        return None;
495    }
496    names.push(super::lex::untokenize(bare));
497    let rest = simple.words[par_idx + 1..].to_vec();
498    Some((names, rest))
499}
500
501/// Initialize parser state for a fresh parse of `input`.
502/// Free-fn entry point — resets parser thread_locals and loads input.
503pub fn parse_init(input: &str) {
504    // P8: reset Rust-only safety counters at parser construction.
505    PARSER_GLOBAL_ITERATIONS.set(0);
506    PARSER_RECURSION_DEPTH.set(0);
507    // Seed the option defaults the parser/lexer inspect. Real zsh
508    // installs these via `install_emulation_defaults` (options.c:172)
509    // at shell startup; zshrs's parse-only test entry path bypasses
510    // init_main, so we mirror the `zsh` emulation defaults here.
511    // Only seeds when unset so a script that explicitly disabled an
512    // option stays so.
513    for (name, default) in [
514        ("shortloops", true),
515        ("shortrepeat", false),
516        ("multifuncdef", true),
517        ("aliasfuncdef", false),
518        ("ignorebraces", false),
519        ("cshjunkieloops", false),
520        ("posixbuiltins", false),
521        ("execopt", true),
522        ("kshautoload", false),
523        ("aliases", true),
524    ] {
525        if crate::ported::options::opt_state_get(name).is_none() {
526            crate::ported::options::opt_state_set(name, default);
527        }
528    }
529    lex_init(input);
530}
531
532/// C zsh's parser has no iteration cap — it trusts itself. The
533/// Rust-only `check_limit` was a paranoia counter that fired
534/// spuriously under nested cmdsubst (parse_context_save resets the
535/// counter to 0 mid-parse, then the outer frame's tear-down decrement
536/// underflowed). Now a no-op for C fidelity; mirrors the Phase 1
537/// removal in lex.rs.
538#[inline]
539fn check_limit() -> bool {
540    false
541}
542
543/// Same story as `check_limit` — C has no recursion cap, and the
544/// Rust counter underflowed across nested-context boundaries. Stub.
545#[inline]
546fn check_recursion() -> bool {
547    false
548}
549
550/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
551/// Snapshots the lexer-side file-statics (which currently live on
552/// `lexer` until Phase 7 dissolution makes them file-scope
553/// thread_local!s) plus the pending heredoc list, plus the
554/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
555/// recursion counters too so nested parses get fresh limits.
556/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
557pub fn parse_context_save(ps: &mut parse_stack) {
558    // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;`
559    ps.hdocs = heredocs_take();
560    // parse.c:302-310 — save lexer-side state.
561    ps.incmdpos = incmdpos();
562    // parse.c:303 — aliasspaceflag — not yet a LEX_* thread_local.
563    // STUB; Phase 7 wires it. Same for the few below marked STUB.
564    ps.aliasspaceflag = 0;
565    ps.incond = incond();
566    ps.inredir = inredir();
567    ps.incasepat = incasepat();
568    ps.isnewlin = isnewlin();
569    ps.infor = infor();
570    ps.inrepeat_ = inrepeat();
571    ps.intypeset = intypeset();
572    // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
573    // (zshrs has no ecbuf yet).
574    ps.eclen = 0;
575    ps.ecused = 0;
576    ps.ecnpats = 0;
577    ps.ecbuf = None;
578    ps.ecstrs = None;
579    ps.ecsoffs = 0;
580    ps.ecssub = 0;
581    ps.ecnfunc = 0;
582    // P8: counters are file-scope thread_locals; reset them on save
583    // (matches the C parse_context_save clear-buffer semantics).
584    // Nested parses get a fresh limit; outer parse's count is lost
585    // — acceptable since the counters are safety nets, not state.
586    PARSER_RECURSION_DEPTH.set(0);
587    PARSER_GLOBAL_ITERATIONS.set(0);
588    set_incmdpos(true);
589    set_incond(0);
590    set_inredir(false);
591    set_incasepat(0);
592    set_infor(0);
593    set_inrepeat(0);
594    set_intypeset(false);
595}
596
597/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
598/// Inverse of `parse_context_save`. Restores lexer-side state +
599/// pending heredocs + Rust-only counters from `ps`, then clears
600/// `errflag & ERRFLAG_ERROR` per parse.c:354.
601/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
602pub fn parse_context_restore(ps: &parse_stack) {
603    // parse.c:330-331 — free any in-progress wordcode buffer.
604    // zshrs has no wordcode yet (STUB until Phase 9b); the AST
605    // nodes are owned by their parent so dropping the parser
606    // frees them.
607
608    // parse.c:333-352 — restore saved state.
609    heredocs_set(ps.hdocs.clone());
610    set_incmdpos(ps.incmdpos);
611    // aliasspaceflag STUB until Phase 7.
612    set_incond(ps.incond);
613    set_inredir(ps.inredir);
614    set_incasepat(ps.incasepat);
615    set_isnewlin(ps.isnewlin);
616    set_infor(ps.infor);
617    set_inrepeat(ps.inrepeat_);
618    set_intypeset(ps.intypeset);
619    // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
620    // STUB until Phase 9b.
621    // P8: counters not restored — see parse_context_save comment.
622
623    // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
624    // error flag so the outer parse sees a clean state.
625    crate::ported::utils::errflag.fetch_and(
626        !crate::ported::utils::ERRFLAG_ERROR,
627        std::sync::atomic::Ordering::Relaxed,
628    );
629}
630
631/// Initialize parser status. Direct port of zsh/Src/parse.c:491
632/// `init_parse_status`. Clears the per-parse-call lexer flags
633/// so a fresh parse starts from cmd-position with no nesting
634/// state inherited from a prior parse.
635pub fn init_parse_status() {
636    // parse.c:500-502 — `incasepat = incond = inredir = infor =
637    // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
638    set_incasepat(0);
639    set_incond(0);
640    set_inredir(false);
641    set_infor(0);
642    set_intypeset(false);
643    set_incmdpos(true);
644}
645
646/// Initialize parser for a fresh parse. Direct port of
647/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
648/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
649/// per-parse-call counters, and calls init_parse_status. zshrs
650/// has no flat wordcode buffer (AST is built inline) so this
651/// function reduces to init_parse_status + recursion_depth/
652/// global_iterations clear.
653pub fn init_parse() {
654    // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
655    // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
656    // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
657    // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
658    // buffer for this parse call. zshrs uses thread-local
659    // statics declared at file scope (parse.rs:25-50).
660    ECBUF.with_borrow_mut(|buf| {
661        buf.clear();
662        buf.resize(EC_INIT_SIZE as usize, 0);
663    });
664    ECLEN.set(EC_INIT_SIZE);
665    ECUSED.set(0);
666    ECNPATS.set(0);
667    ECSOFFS.set(0);
668    ECSSUB.set(0);
669    ECNFUNC.set(0);
670    ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
671    ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
672    ECSTRS_TREE.with_borrow_mut(|t| *t = None);
673
674    PARSER_RECURSION_DEPTH.set(0);
675    PARSER_GLOBAL_ITERATIONS.set(0);
676    // parse.c:522 — `init_parse_status();`
677    init_parse_status();
678}
679
680/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
681/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
682/// the eprog is empty when its prog buffer is missing or the
683/// first wordcode is the WC_END marker. Used by signal handlers
684/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
685/// an empty program.
686pub fn empty_eprog(p: &crate::ported::zsh_h::eprog) -> bool {
687    p.prog.is_empty() || p.prog[0] == crate::ported::zsh_h::WCB_END()
688}
689
690/// Clear pending here-document list. Direct port of
691/// zsh/Src/parse.c:591 `clear_hdocs`. The C version walks
692/// the global `hdocs` linked list and frees each node. zshrs
693/// stores pending heredocs on the lexer's `heredocs` Vec —
694/// truncating it has the same effect.
695pub fn clear_hdocs() {
696    heredocs_clear();
697}
698
699/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
700/// 612-631 `parse_event`. Reads one event from the lexer (a
701/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
702/// returns the resulting ZshProgram.
703///
704/// `endtok` is the token that terminates the event — usually
705/// ENDINPUT, but for command-style substitutions the closing
706/// `)` (zsh's CMD_SUBST_CLOSE).
707///
708/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
709/// allocated wordcode program). zshrs returns a `ZshProgram`
710/// (AST root). Same role at the parse-output boundary.
711pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
712    // parse.c:616-619 — reset state and prime the lexer.
713    set_tok(ENDINPUT);
714    set_incmdpos(true);
715    zshlex();
716    // parse.c:620 — `init_parse();`
717    init_parse();
718
719    // parse.c:622-625 — drive par_event; on failure clear hdocs.
720    if !par_event(endtok) {
721        clear_hdocs();
722        return None;
723    }
724    // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
725    // parse for a substitution that doesn't need its own eprog.
726    // zshrs returns an empty program in that case (caller
727    // discards).
728    if endtok != ENDINPUT {
729        return Some(ZshProgram { lists: Vec::new() });
730    }
731    // parse.c:630 — `bld_eprog(1);` — build the final eprog.
732    // zshrs has already built the AST via parse_program_until,
733    // but parse_event uses par_event directly so we need to
734    // collect what par_event accumulated.
735    Some(parse_program_until(None))
736}
737
738/// Parse one event (sublist with optional separator). Direct
739/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
740/// an event was successfully parsed, false on EOF / endtok.
741///
742/// zshrs port note: the C version emits wordcodes via ecadd/
743/// set_list_code; zshrs's parser builds AST nodes via
744/// par_sublist + par_list. Same flow, different output.
745pub fn par_event(endtok: lextok) -> bool {
746    // parse.c:639-643 — skip leading SEPERs.
747    while tok() == SEPER {
748        // parse.c:640-641 — at top-level (endtok == ENDINPUT),
749        // a SEPER on a fresh line ends the event.
750        if isnewlin() > 0 && endtok == ENDINPUT {
751            return false;
752        }
753        zshlex();
754    }
755    // parse.c:644-647 — terminate on EOF or matching close-token.
756    if tok() == ENDINPUT {
757        return false;
758    }
759    if tok() == endtok {
760        return true;
761    }
762    // parse.c:649-... — drive par_sublist + handle terminator.
763    // zshrs's par_sublist already builds the AST node directly.
764    match par_sublist() {
765        Some(_) => {
766            // parse.c:651-693 — terminator handling. zshrs's
767            // par_list wraps this; for parse_event we just
768            // confirm the sublist parsed.
769            true
770        }
771        None => false,
772    }
773}
774
775/// Parse one list — non-recursing variant. Direct port of
776/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
777/// doesn't recurse on the trailing-separator path; used by
778/// callers that only want one statement (e.g. each arm of a
779/// case body).
780pub fn par_list1() -> Option<ZshSublist> {
781    // parse.c:810-816 — body is a single par_sublist call wrapped
782    // in the eu/ecused tracking that zshrs doesn't need (no
783    // wordcode buffer).
784    par_sublist()
785}
786
787/// Wire a here-document body onto the redirection token that
788/// requested it. Direct port of zsh/Src/parse.c:2347
789/// `setheredoc`. Called when a heredoc terminator has been
790/// matched and the body is ready to be attached to the redir.
791///
792/// zshrs port note: zsh's setheredoc patches the wordcode
793/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
794/// zshrs threads heredoc bodies through `HereDocInfo` structs
795/// that resolve_redir applies during the post-parse fill_in pass.
796/// This method is the AST-side equivalent: writes back to the
797/// matching redir node by index.
798pub fn setheredoc(_pc: usize, _redir_type: i32, _doc: &str, _term: &str, _munged_term: &str) {
799    // zshrs's heredoc resolution happens in fill_in_command /
800    // resolve_redir at parse top. This stub exists for API
801    // parity with the C signature; live wiring happens via
802    // heredocs which the post-parse pass consumes.
803}
804
805/// Parse a wordlist for `for ... in WORDS;`. Direct port of
806/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
807/// until the next SEPER / SEMI / NEWLIN.
808pub fn par_wordlist() -> Vec<String> {
809    let mut out = Vec::new();
810    // parse.c:2362-2378 — collect STRINGs into the wordlist.
811    while tok() == STRING_LEX {
812        if let Some(text) = tokstr() {
813            out.push(text);
814        }
815        zshlex();
816    }
817    out
818}
819
820/// Parse a newline-separated wordlist. Direct port of
821/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
822/// par_wordlist but tolerates leading/trailing newlines.
823pub fn par_nl_wordlist() -> Vec<String> {
824    // parse.c:2380-2381 — skip leading newlines.
825    while tok() == NEWLIN {
826        zshlex();
827    }
828    let out = par_wordlist();
829    // parse.c:2395-2397 — skip trailing newlines.
830    while tok() == NEWLIN {
831        zshlex();
832    }
833    out
834}
835
836/// Read an integer from the next cond token. NOT a direct C port —
837/// the C `get_cond_num(char *tst)` (parse.c:2643) is the
838/// string-lookup helper ported below. This Rust-only helper exists
839/// to support the AST cond-walker (`par_cond_*` analogs) when it
840/// needs a numeric literal from the current lex position.
841pub fn read_cond_num() -> Option<i64> {
842    if tok() != STRING_LEX {
843        return None;
844    }
845    let text = tokstr()?;
846    let parsed = text.parse::<i64>().ok()?;
847    zshlex();
848    Some(parsed)
849}
850
851/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
852/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
853/// or `-1` if not a recognized binary cond operator.
854pub fn get_cond_num(tst: &str) -> i32 {
855    // c:2643
856    const CONDSTRS: [&str; 9] = [
857        "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
858    ];
859    for (i, &c) in CONDSTRS.iter().enumerate() {
860        if c == tst {
861            return i as i32; // c:2654
862        }
863    }
864    -1 // c:2656
865}
866
867/// Emit a parser-level error. Direct port of zsh/Src/parse.c
868/// 2733-2766 `yyerror`. C version fills a per-event error buffer
869/// and sets errflag. zshrs pushes onto errors which the
870/// caller drains via parse()'s Result return.
871pub fn yyerror(msg: &str) {
872    // parse.c:2735-2765 — zsh's yyerror collects the offending
873    // token's literal text + line number. zshrs already does
874    // this via error() with the lexer's toklineno.
875    error(msg);
876}
877
878// ============================================================
879// Wordcode emission stubs (parse.c private helpers)
880//
881// The following functions are direct counterparts of zsh's
882// private wordcode-emission helpers in parse.c. zsh uses these
883// to write u32 opcodes into a flat `ecbuf` array; zshrs builds
884// an AST tree and never emits wordcode at the parse layer.
885// The implementations are documented stubs that preserve the
886// function signatures + cite the C source. Real wordcode would
887// be emitted later by compile_zsh.rs walking the AST.
888//
889// Listed for port-surface completeness so every parse.c symbol
890// has a Rust counterpart even when the algorithm is moot in the
891// AST architecture.
892// ============================================================
893
894/// Patch a list-placeholder wordcode with its actual opcode +
895/// jump distance. Direct port of zsh/Src/parse.c:738
896/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
897/// par_sublist runs, then comes back through set_list_code to
898/// rewrite the slot with WCB_LIST(type, distance) once the
899/// sublist's final length is known.
900///
901/// Port of `set_list_code(int p, int type, int cmplx)` from
902/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
903/// whether the sublist body is simple (single command, no
904/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
905/// header when possible, otherwise the plain WCB_LIST(type, 0).
906pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
907    let _ = wc_bdata;
908    // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
909    // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
910    let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
911    let z = type_code;
912    let qualifies = !cmplx
913        && (z == Z_SYNC || z == (Z_SYNC | Z_END))
914        && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
915    if qualifies {
916        // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
917        // & WC_SUBLIST_SIMPLE);`
918        let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
919        // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
920        let used = ECUSED.get() as usize;
921        let off = used.saturating_sub(2 + p);
922        ECBUF.with_borrow_mut(|b| {
923            if p < b.len() {
924                b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
925            }
926        });
927        // c:744 — `ecdel(p+1);`
928        ecdel(p + 1);
929        // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
930        if ispipe {
931            ECBUF.with_borrow_mut(|b| {
932                if p + 1 < b.len() {
933                    b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
934                }
935            });
936        }
937    } else {
938        // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
939        ECBUF.with_borrow_mut(|b| {
940            if p < b.len() {
941                b[p] = WCB_LIST(z as wordcode, 0);
942            }
943        });
944    }
945}
946
947/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
948/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
949/// When the sublist is non-complex (single command, no pipeline),
950/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
951/// `WC_PIPE_LINENO`.
952pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
953    if cmplx {
954        // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
955        ECBUF.with_borrow_mut(|b| {
956            if p < b.len() {
957                b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
958            }
959        });
960    } else {
961        // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
962        ECBUF.with_borrow_mut(|b| {
963            if p < b.len() {
964                b[p] = WCB_SUBLIST(
965                    type_code as wordcode,
966                    (flags as wordcode) | WC_SUBLIST_SIMPLE,
967                    skip as wordcode,
968                );
969            }
970        });
971        // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
972        ECBUF.with_borrow_mut(|b| {
973            if p + 1 < b.len() {
974                b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
975            }
976        });
977    }
978}
979
980/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
981/// the wordcode buffer with grow-on-demand, return the new index.
982pub fn ecadd(c: u32) -> usize {
983    // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
984    if (ECLEN.get() - ECUSED.get()) < 1 {
985        let cur = ECLEN.get();
986        let a = if cur < EC_DOUBLE_THRESHOLD {
987            cur
988        } else {
989            EC_INCREMENT
990        };
991        ECBUF.with_borrow_mut(|buf| {
992            buf.resize((cur + a) as usize, 0);
993        });
994        ECLEN.set(cur + a);
995    }
996    let idx = ECUSED.get();
997    ECBUF.with_borrow_mut(|buf| {
998        if (idx as usize) >= buf.len() {
999            buf.resize((idx + 1) as usize, 0);
1000        }
1001        buf[idx as usize] = c;
1002    });
1003    ECUSED.set(idx + 1);
1004    idx as usize
1005}
1006
1007/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
1008/// wordcode at position `p`, shift later entries left by one,
1009/// decrement ecused, adjust pending heredoc pointers.
1010pub fn ecdel(p: usize) {
1011    // parse.c:415-418 — memmove + decrement ecused.
1012    let n = ECUSED.get() as usize - p - 1;
1013    if n > 0 {
1014        ECBUF.with_borrow_mut(|buf| {
1015            for i in 0..n {
1016                buf[p + i] = buf[p + i + 1];
1017            }
1018        });
1019    }
1020    ECUSED.set(ECUSED.get() - 1);
1021    // parse.c:420 — `ecadjusthere(p, -1)`.
1022    ecadjusthere(p, -1);
1023}
1024
1025/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
1026/// string into a single wordcode (short strings ≤4 bytes packed
1027/// inline; longer strings get an offset into the deduped registry).
1028///
1029/// The long-string path stores the METAFIED bytes (matches what C's
1030/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
1031/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
1032/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
1033/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
1034/// is already metafied at this point.
1035pub fn ecstrcode(s: &str) -> u32 {
1036    // Convert Rust UTF-8 → C-byte form inline: chars ≤ 0xff collapse
1037    // to single bytes (so zsh markers like Dash = `\u{9b}` are 1 byte
1038    // instead of `\xc2 \x9b` UTF-8). Chars > 0xff fall back to their
1039    // UTF-8 bytes — matches how C tokstr would hold them (it sees
1040    // multi-byte UTF-8 source as raw byte sequences).
1041    let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
1042    for ch in s.chars() {
1043        let cu = ch as u32;
1044        if cu <= 0xff {
1045            c_bytes.push(cu as u8);
1046        } else {
1047            let mut tmp = [0u8; 4];
1048            c_bytes.extend_from_slice(ch.encode_utf8(&mut tmp).as_bytes());
1049        }
1050    }
1051    let t = c_bytes.iter().any(|&b| (0x83..=0x9f).contains(&b));
1052    let l = c_bytes.len() + 1; // include NUL terminator
1053    if l <= 4 {
1054        // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
1055        // (NOT metafied — the inline packing stores 1 byte per slot).
1056        let mut c: u32 = if t { 3 } else { 2 };
1057        match l {
1058            4 => {
1059                c |= (c_bytes[2] as u32) << 19;
1060                c |= (c_bytes[1] as u32) << 11;
1061                c |= (c_bytes[0] as u32) << 3;
1062            }
1063            3 => {
1064                c |= (c_bytes[1] as u32) << 11;
1065                c |= (c_bytes[0] as u32) << 3;
1066            }
1067            2 => {
1068                c |= (c_bytes[0] as u32) << 3;
1069            }
1070            1 => {
1071                // parse.c:443 — empty string special case.
1072                c = if t { 7 } else { 6 };
1073            }
1074            _ => {}
1075        }
1076        c
1077    } else {
1078        // parse.c:447-466 — long string. Port of C's eccstr BST walk
1079        // exactly: walk the tree comparing nfunc, then hashval, then
1080        // strcmp on bytes. Return offs on full match; insert new
1081        // leaf otherwise. Matches C's exact dedup-hit pattern
1082        // (which is content-dependent — hash collisions and the
1083        // lazy short-circuit cmp chain make the tree shape determine
1084        // whether matching nodes are reachable).
1085        // hasher is byte-by-byte polynomial (hashtable.c:86); pass
1086        // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
1087        // bytes feed straight in. SAFETY: hasher only iterates
1088        // `.bytes()` — no UTF-8 validity assumed.
1089        let val = crate::ported::hashtable::hasher(unsafe {
1090            std::str::from_utf8_unchecked(&c_bytes)
1091        });
1092        let nfunc = ECNFUNC.get();
1093        let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
1094            // Walk the tree. At each node, if all 3 cmps == 0,
1095            // return the node's offs. Otherwise descend left/right
1096            // by the first non-zero cmp's sign.
1097            let mut cur: &mut Option<Box<EccstrNode>> = root;
1098            loop {
1099                let p = match cur.as_mut() {
1100                    Some(p) => p,
1101                    None => break None,
1102                };
1103                // c:448 — `cmp = p->nfunc - ecnfunc`
1104                let mut cmp = (p.nfunc as i64) - (nfunc as i64);
1105                if cmp == 0 {
1106                    // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
1107                    cmp = (p.hashval as i64) - (val as i64);
1108                    if cmp == 0 {
1109                        // c:448 — `&& !(cmp = strcmp(p->str, s))`
1110                        cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
1111                            std::cmp::Ordering::Less => -1,
1112                            std::cmp::Ordering::Equal => 0,
1113                            std::cmp::Ordering::Greater => 1,
1114                        };
1115                        if cmp == 0 {
1116                            // c:450 — `return p->offs;`
1117                            break Some(p.offs);
1118                        }
1119                    }
1120                }
1121                // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
1122                cur = if cmp < 0 { &mut p.left } else { &mut p.right };
1123            }
1124        });
1125        if let Some(offs) = found_offs {
1126            return offs;
1127        }
1128        let offs =
1129            (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
1130        // c:457-465 — insert new node at the NULL slot the walk
1131        // terminated at. Encode the walk path as a Vec<bool> of
1132        // left/right turns (true = right), then re-descend to
1133        // insert. Borrow-checker friendly: a single mutable walk
1134        // that either finds an existing node (descend) or fills
1135        // the empty slot (return).
1136        let stored = c_bytes.clone();
1137        let stored_len = stored.len();
1138        let new_node = Box::new(EccstrNode {
1139            left: None,
1140            right: None,
1141            str: stored.clone(),
1142            offs,
1143            nfunc,
1144            hashval: val,
1145        });
1146        ECSTRS_TREE.with_borrow_mut(|root| {
1147            // Build the path first (immutable-walk; safe because we
1148            // only ever go further down).
1149            let mut path: Vec<bool> = Vec::new();
1150            {
1151                let mut cur: &Option<Box<EccstrNode>> = root;
1152                while let Some(p) = cur.as_ref() {
1153                    let mut cmp = (p.nfunc as i64) - (nfunc as i64);
1154                    if cmp == 0 {
1155                        cmp = (p.hashval as i64) - (val as i64);
1156                        if cmp == 0 {
1157                            cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
1158                                std::cmp::Ordering::Less => -1,
1159                                std::cmp::Ordering::Equal => 0,
1160                                std::cmp::Ordering::Greater => 1,
1161                            };
1162                        }
1163                    }
1164                    let go_right = cmp >= 0;
1165                    path.push(go_right);
1166                    cur = if go_right { &p.right } else { &p.left };
1167                }
1168            }
1169            // Descend mutably along the recorded path and assign at
1170            // the NULL leaf.
1171            let mut cur: &mut Option<Box<EccstrNode>> = root;
1172            for turn in path {
1173                let p = cur.as_mut().expect("path matches walk");
1174                cur = if turn { &mut p.right } else { &mut p.left };
1175            }
1176            *cur = Some(new_node);
1177        });
1178        // Also keep the existing reverse index (offs → bytes) for
1179        // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
1180        ECSTRS_REVERSE.with_borrow_mut(|m| {
1181            m.insert(offs, stored);
1182        });
1183        let _ = l;
1184        ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
1185        offs
1186    }
1187}
1188
1189/// P9b decoder (wordcode-pipeline variant): direct port of
1190/// `ecgetstr(Estate s, int dup, int *tokflag)` from
1191/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
1192/// encoded string back to owned String. Returns (string,
1193/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
1194/// takes a separate strs buffer for text.rs) — this variant uses
1195/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
1196pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
1197    if pc >= buf.len() {
1198        return (String::new(), pc);
1199    }
1200    let c = buf[pc];
1201    let next = pc + 1;
1202    // parse.c:2862-2863 — empty-string sentinels.
1203    if c == 6 || c == 7 {
1204        return (String::new(), next);
1205    }
1206    // parse.c:2864-2871 — inline-packed short string.
1207    if (c & 2) != 0 {
1208        let b0 = ((c >> 3) & 0xff) as u8;
1209        let b1 = ((c >> 11) & 0xff) as u8;
1210        let b2 = ((c >> 19) & 0xff) as u8;
1211        let mut bytes: Vec<u8> = Vec::new();
1212        for b in [b0, b1, b2] {
1213            if b == 0 {
1214                break;
1215            }
1216            bytes.push(b);
1217        }
1218        return (String::from_utf8_lossy(&bytes).into_owned(), next);
1219    }
1220    // parse.c:2872-2873 — long string via offs lookup. Map value is
1221    // metafied Vec<u8>; convert back to display String. Unmetafy is
1222    // the caller's job (the wordcode-parity dumper does it; other
1223    // callers may want raw bytes).
1224    let s = ECSTRS_REVERSE
1225        .with_borrow(|m| m.get(&c).cloned())
1226        .map(|v| String::from_utf8_lossy(&v).into_owned())
1227        .unwrap_or_default();
1228    (s, next)
1229}
1230
1231/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
1232/// empty wordcode slots at position `p`, shifting later entries
1233/// right, growing the buffer as needed, adjusting heredoc pointers.
1234pub fn ecispace(p: usize, n: usize) {
1235    // parse.c:376-381 — grow if needed.
1236    let need = n as i32;
1237    if (ECLEN.get() - ECUSED.get()) < need {
1238        let cur = ECLEN.get();
1239        let mut a = if cur < EC_DOUBLE_THRESHOLD {
1240            cur
1241        } else {
1242            EC_INCREMENT
1243        };
1244        if need > a {
1245            a = need;
1246        }
1247        ECBUF.with_borrow_mut(|buf| {
1248            buf.resize((cur + a) as usize, 0);
1249        });
1250        ECLEN.set(cur + a);
1251    }
1252    // parse.c:382-385 — memmove p → p+n, gap of n.
1253    let m = ECUSED.get() as usize - p;
1254    if m > 0 {
1255        ECBUF.with_borrow_mut(|buf| {
1256            let needed = (ECUSED.get() as usize) + n;
1257            if buf.len() < needed {
1258                buf.resize(needed, 0);
1259            }
1260            for i in (0..m).rev() {
1261                buf[p + n + i] = buf[p + i];
1262            }
1263            for i in 0..n {
1264                buf[p + i] = 0;
1265            }
1266        });
1267    }
1268    // parse.c:386 — bump ecused by n.
1269    ECUSED.set(ECUSED.get() + need);
1270    // parse.c:387 — `ecadjusthere(p, n)`.
1271    ecadjusthere(p, need);
1272}
1273
1274/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
1275/// the pending-heredocs list and bump each `pc` by `d` if it's
1276/// at or after position `p`. Called by `ecispace` / `ecdel` when
1277/// wordcodes shift.
1278#[allow(unused_variables)]
1279pub fn ecadjusthere(p: usize, d: i32) {
1280    // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
1281    // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
1282    // Vec<HereDoc> on the lexer (pre-P9c migration); since none
1283    // of them carry a wordcode pc today (the AST tree has no pc
1284    // slots), this is a no-op until Phase 9c wires
1285    // `hdocs.pc` into wordcode emission.
1286}
1287
1288// ============================================================
1289// Eprog runtime ops (parse.c:2767-2853)
1290//
1291// dupeprog / useeprog / freeeprog are zsh's reference-counting
1292// helpers for executable programs. zshrs's AST is owned by
1293// value (Rust ownership); cloning is a tree-deep copy via
1294// Clone, "use" is a no-op (the executor borrows the AST), and
1295// "free" is automatic on drop.
1296// ============================================================
1297
1298/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
1299/// Port of `Eprog dupeprog(Eprog p, int heap)` from
1300/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
1301/// table, and pattern-prog slots. `dummy_eprog` is returned
1302/// unchanged. `heap`-allocated copies get `nref = -1` (never
1303/// freed); real ones get `nref = 1`.
1304pub fn dupeprog(p: &crate::ported::zsh_h::eprog, heap: bool) -> crate::ported::zsh_h::eprog {
1305    // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
1306    // observable identity in C uses a pointer compare; Rust's
1307    // equivalent is "if it has the dummy's shape (single WCB_END
1308    // word and no strs), return a copy of the same shape".
1309    // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
1310    // C uses `dummy_patprog1` as a placeholder; the Rust port has
1311    // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
1312    // initialized patprog for each slot (resolved later by
1313    // pattern.c::patcompile-on-first-use).
1314    let dummy_pat = || crate::ported::zsh_h::patprog {
1315        startoff: 0,
1316        size: 0,
1317        mustoff: 0,
1318        patmlen: 0,
1319        globflags: 0,
1320        globend: 0,
1321        flags: 0,
1322        patnpar: 0,
1323        patstartch: 0,
1324    };
1325    let r = crate::ported::zsh_h::eprog {
1326        // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
1327        flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
1328        len: p.len,
1329        npats: p.npats,
1330        // c:2787 — `nref = heap ? -1 : 1;`
1331        nref: if heap { -1 } else { 1 },
1332        prog: p.prog.clone(),
1333        strs: p.strs.clone(),
1334        pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
1335        shf: None,
1336        dump: None,
1337    };
1338    r
1339}
1340
1341/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
1342/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
1343/// pin a real (non-heap, non-dummy) Eprog so it survives the
1344/// next `freeeprog`.
1345pub fn useeprog(p: &mut crate::ported::zsh_h::eprog) {
1346    // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
1347    if p.nref >= 0 {
1348        p.nref += 1; // c:2816
1349    }
1350}
1351
1352/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
1353/// Refcount-decrement; when it hits zero, drops the pattern progs,
1354/// decrements the dump refcount if any, and releases the eprog.
1355/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
1356/// never freed either — they live as long as the heap arena.
1357pub fn freeeprog(p: &mut crate::ported::zsh_h::eprog) {
1358    // c:2829 — `if (p && p != &dummy_eprog) { ... }`
1359    if p.nref > 0 {
1360        p.nref -= 1; // c:2832
1361        if p.nref == 0 {
1362            // c:2833-2840 — drop pats, dump refcount, then the eprog.
1363            // Rust's Drop handles the per-field cleanup; we just
1364            // need to decrement the dump count first.
1365            if let Some(dump) = p.dump.take() {
1366                let dumped = (*dump).clone();
1367                decrdumpcount(&dumped); // c:2837
1368            }
1369            p.prog.clear();
1370            p.strs = None;
1371            p.pats.clear();
1372        }
1373    }
1374}
1375
1376// ============================================================
1377// Wordcode runtime getters (parse.c:2853-3060)
1378//
1379// These read packed wordcode out of a running Eprog at execution
1380// time. zshrs's executor walks the AST directly so these are
1381// stubs that preserve the C signatures + cite the source.
1382// ============================================================
1383
1384/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
1385/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
1386/// without advancing — caller steps `pc` separately.
1387pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
1388    if pc >= p.prog.len() {
1389        return String::new();
1390    }
1391    let c = p.prog[pc]; // c:2894
1392    if let Some(tf) = tokflag {
1393        *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
1394    }
1395    if c == 6 || c == 7 {
1396        // c:2897
1397        return String::new();
1398    }
1399    if (c & 2) != 0 {
1400        // c:2902
1401        let b0 = ((c >> 3) & 0xff) as u8;
1402        let b1 = ((c >> 11) & 0xff) as u8;
1403        let b2 = ((c >> 19) & 0xff) as u8;
1404        let mut v = vec![b0, b1, b2];
1405        v.retain(|&x| x != 0);
1406        String::from_utf8_lossy(&v).into_owned()
1407    } else {
1408        // c:2911
1409        let off = (c >> 2) as usize;
1410        let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
1411        if off >= strs_bytes.len() {
1412            return String::new();
1413        }
1414        let tail = &strs_bytes[off..];
1415        let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
1416        String::from_utf8_lossy(&tail[..end]).into_owned()
1417    }
1418}
1419
1420/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
1421/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
1422/// and OR-folds each entry's token flag into `*tokflag`.
1423pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
1424    let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
1425    let mut tf: i32 = 0;
1426    for _ in 0..num {
1427        // c:2924 `while (num--)`
1428        let mut tmp = 0;
1429        ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
1430        tf |= tmp; // c:2926
1431    }
1432    if let Some(out) = tokflag {
1433        // c:2929
1434        *out = tf;
1435    }
1436    ret
1437}
1438
1439/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
1440/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
1441/// `LinkList`; zshrs uses `Vec<String>` for both.
1442pub fn ecgetlist(
1443    s: &mut crate::ported::zsh_h::estate,
1444    num: usize,
1445    dup: i32,
1446    tokflag: Option<&mut i32>,
1447) -> Vec<String> {
1448    if num == 0 {
1449        // c:2949-2952
1450        if let Some(tf) = tokflag {
1451            *tf = 0;
1452        }
1453        return Vec::new();
1454    }
1455    ecgetarr(s, num, dup, tokflag)
1456}
1457
1458/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
1459/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
1460/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
1461/// re-emitting each redir's wordcodes into the reserved slot —
1462/// finally calls `bld_eprog(0)` to package the result as an Eprog.
1463pub fn eccopyredirs(s: &mut crate::ported::zsh_h::estate) -> Option<crate::ported::zsh_h::eprog> {
1464    let prog_len = s.prog.prog.len();
1465    if s.pc >= prog_len {
1466        return None;
1467    }
1468    // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
1469    let first_code = s.prog.prog[s.pc];
1470    if wc_code(first_code) != WC_REDIR {
1471        return None;
1472    }
1473    // c:3011 — `init_parse();`
1474    init_parse();
1475
1476    // c:3013-3027 — count wordcodes the redir run will need.
1477    // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
1478    // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
1479    // `+1` if WC_REDIR_VARID.
1480    let mut probe = s.pc;
1481    let mut ncodes = 0usize;
1482    loop {
1483        if probe >= prog_len {
1484            break;
1485        }
1486        let code = s.prog.prog[probe];
1487        if wc_code(code) != WC_REDIR {
1488            break;
1489        }
1490        let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
1491            5
1492        } else {
1493            3
1494        };
1495        if WC_REDIR_VARID(code) != 0 {
1496            ncode += 1;
1497        }
1498        probe += ncode;
1499        ncodes += ncode;
1500    }
1501
1502    // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
1503    let r0 = ECUSED.get() as usize;
1504    ecispace(r0, ncodes);
1505
1506    // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
1507    let mut r = r0;
1508    loop {
1509        if s.pc >= prog_len {
1510            break;
1511        }
1512        let code = s.prog.prog[s.pc];
1513        if wc_code(code) != WC_REDIR {
1514            break;
1515        }
1516        s.pc += 1;
1517        // c:3036 — `ecbuf[r++] = code;`
1518        ECBUF.with_borrow_mut(|buf| {
1519            if r >= buf.len() {
1520                buf.resize(r + 1, 0);
1521            }
1522            buf[r] = code;
1523        });
1524        r += 1;
1525        // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
1526        let fd1 = s.prog.prog[s.pc];
1527        s.pc += 1;
1528        ECBUF.with_borrow_mut(|buf| {
1529            if r >= buf.len() {
1530                buf.resize(r + 1, 0);
1531            }
1532            buf[r] = fd1;
1533        });
1534        r += 1;
1535        // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
1536        let name = ecgetstr(s, EC_NODUP, None);
1537        let nc = ecstrcode(&name);
1538        ECBUF.with_borrow_mut(|buf| {
1539            if r >= buf.len() {
1540                buf.resize(r + 1, 0);
1541            }
1542            buf[r] = nc;
1543        });
1544        r += 1;
1545        // c:3042-3047 — heredoc terminators.
1546        if WC_REDIR_FROM_HEREDOC(code) != 0 {
1547            let term = ecgetstr(s, EC_NODUP, None);
1548            let tc = ecstrcode(&term);
1549            ECBUF.with_borrow_mut(|buf| {
1550                if r >= buf.len() {
1551                    buf.resize(r + 1, 0);
1552                }
1553                buf[r] = tc;
1554            });
1555            r += 1;
1556            let munged = ecgetstr(s, EC_NODUP, None);
1557            let mc = ecstrcode(&munged);
1558            ECBUF.with_borrow_mut(|buf| {
1559                if r >= buf.len() {
1560                    buf.resize(r + 1, 0);
1561                }
1562                buf[r] = mc;
1563            });
1564            r += 1;
1565        }
1566        // c:3048-3049 — varid.
1567        if WC_REDIR_VARID(code) != 0 {
1568            let varid = ecgetstr(s, EC_NODUP, None);
1569            let vc = ecstrcode(&varid);
1570            ECBUF.with_borrow_mut(|buf| {
1571                if r >= buf.len() {
1572                    buf.resize(r + 1, 0);
1573                }
1574                buf[r] = vc;
1575            });
1576            r += 1;
1577        }
1578    }
1579
1580    // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
1581    // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
1582    Some(bld_eprog(false))
1583}
1584
1585/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
1586/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
1587/// builtin.c when clearing a stale autoload stub. Held in a Mutex
1588/// so `init_eprog` can set it once at shell startup.
1589pub static DUMMY_EPROG: std::sync::Mutex<crate::ported::zsh_h::eprog> =
1590    std::sync::Mutex::new(crate::ported::zsh_h::eprog {
1591        flags: 0,
1592        len: 0,
1593        npats: 0,
1594        nref: 0,
1595        prog: Vec::new(),
1596        strs: None,
1597        pats: Vec::new(),
1598        shf: None,
1599        dump: None,
1600    });
1601
1602/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
1603/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
1604/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
1605/// Called once at shell startup (init_main → init_misc → init_eprog).
1606pub fn init_eprog() {
1607    let mut d = DUMMY_EPROG.lock().unwrap();
1608    d.prog = vec![crate::ported::zsh_h::WCB_END()]; // c:3071/3073
1609    d.len = std::mem::size_of::<wordcode>() as i32; // c:3072
1610    d.strs = None; // c:3074
1611    d.flags = 0;
1612    d.npats = 0;
1613    d.nref = 0;
1614}
1615
1616/// Parse the complete input. Direct port of `parse_event` /
1617/// `par_list` from `Src/parse.c:614-720`. On syntax error,
1618/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
1619/// partial program — callers check `errflag` to detect failure,
1620/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
1621pub fn parse() -> ZshProgram {
1622    zshlex();
1623
1624    let mut program = parse_program_until(None);
1625
1626    // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1627    // that the parser silently rolls past. zsh aborts with a
1628    // diagnostic via `zerr` which sets `errflag |= ERRFLAG_ERROR`.
1629    if let Some(msg) = crate::ported::lex::error() {
1630        crate::ported::utils::zerr(&msg);
1631    }
1632
1633    // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1634    // back into ZshRedir.heredoc fields via heredoc_idx.
1635    let bodies: Vec<HereDocInfo> = heredocs_clone()
1636        .into_iter()
1637        .map(|h| HereDocInfo {
1638            content: h.content,
1639            terminator: h.terminator,
1640            quoted: h.quoted,
1641        })
1642        .collect();
1643    if !bodies.is_empty() {
1644        fill_heredoc_bodies(&mut program, &bodies);
1645    }
1646
1647    program
1648}
1649
1650/// P9c: wordcode-emission parser entry. Direct port of zsh's
1651/// `parse_event(int endtok)` from `Src/parse.c:683-720`. Emits a
1652/// minimal wordcode stream for the parsed program into the live
1653/// `ECBUF` thread_local via P9b's `ecadd` / `ecstrcode` API and
1654/// returns the start index of the emitted Eprog (matching C's
1655/// `Eprog parse_event(...)` return).
1656///
1657/// Minimal implementation: emits `WCB_END()` only for now (P9c
1658/// stub). The full par_event/par_list/par_sublist/par_pipe/par_cmd
1659/// recursion that walks the token stream and emits the right
1660/// wordcode for each production is the multi-week rewrite called
1661/// out in PORT_PLAN.md. This stub establishes the entry point and
1662/// drives the live ECBUF emission so downstream consumers (P9d
1663/// exec_wordcode) have a real wordcode buffer to walk.
1664pub fn par_event_wordcode() -> usize {
1665    let start = ECUSED.get() as usize;
1666    // parse.c:691-710 — par_list loop. Each iteration emits one WC_LIST
1667    // entry plus its sublist payload; terminator handling between
1668    // lists matches the SEMI/NEWLIN/AMPER/SEPER switch in the C source.
1669    while tok() != ENDINPUT && tok() != LEXERR {
1670        par_list_wordcode();
1671        match tok() {
1672            SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
1673                zshlex();
1674            }
1675            _ => break,
1676        }
1677    }
1678    // parse.c:712 — `ecadd(WCB_END());`
1679    ecadd(crate::ported::zsh_h::WCB_END());
1680    start
1681}
1682
1683/// Thread-local mirror of C parse.c's `int *cmplx` argument. Each
1684/// `par_*` wordcode emitter ORs its complexity bit into this
1685/// during the recursive descent; the outer `par_event_wordcode`
1686/// reads it at the end. Mirrors C's `int *cmplx` plumbing
1687/// through every par_* function — Rust uses a thread_local so
1688/// the signatures can stay no-arg.
1689thread_local! {
1690    static PARSER_CMPLX: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
1691    static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
1692}
1693
1694#[inline]
1695fn cmplx_get() -> bool {
1696    PARSER_CMPLX.with(|c| c.get())
1697}
1698#[inline]
1699fn cmplx_or(b: bool) {
1700    PARSER_CMPLX.with(|c| c.set(c.get() | b));
1701}
1702#[inline]
1703fn cmplx_set(b: bool) {
1704    PARSER_CMPLX.with(|c| c.set(b));
1705}
1706
1707/// Port of `par_list(int *cmplx)` from `Src/parse.c:771-803`.
1708/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
1709/// Drives the WCB_LIST chain — for each sublist, emits a WCB_LIST
1710/// header, recurses into par_sublist, then patches the header
1711/// with the right Z_SYNC/Z_ASYNC/Z_ASYNC|Z_DISOWN flag + Z_END
1712/// marker on the last entry.
1713pub fn par_list_wordcode() {
1714    let mut lp: Option<usize> = None;
1715    loop {
1716        // c:780 — `while (tok == SEPER) zshlex();`
1717        while tok() == SEPER {
1718            zshlex();
1719        }
1720        // c:782 — `p = ecadd(0);`
1721        let p = ecadd(0);
1722        // c:783 — `c = 0;` — local cmplx accumulator for this sublist.
1723        let outer = cmplx_get();
1724        cmplx_set(false);
1725        let sublist_ok = par_sublist_wordcode();
1726        let c = cmplx_get();
1727        cmplx_set(outer | c);
1728        if sublist_ok {
1729            // c:785 — `*cmplx |= c;` (already done above)
1730            let t = tok();
1731            if t == SEPER || t == AMPER || t == AMPERBANG {
1732                // c:787 — `if (tok != SEPER) *cmplx = 1;`
1733                if t != SEPER {
1734                    cmplx_set(true);
1735                }
1736                // c:788 — `set_list_code(p, ...)`
1737                let z = if t == SEPER {
1738                    Z_SYNC
1739                } else if t == AMPER {
1740                    Z_ASYNC
1741                } else {
1742                    Z_ASYNC | Z_DISOWN
1743                };
1744                set_list_code(p, z, c);
1745                // c:792-794 — `incmdpos = 1; do { zshlex(); } while
1746                // (tok == SEPER);`
1747                set_incmdpos(true);
1748                loop {
1749                    zshlex();
1750                    if tok() != SEPER {
1751                        break;
1752                    }
1753                }
1754                lp = Some(p);
1755                continue; // c:795 `goto rec;`
1756            } else {
1757                // c:797 — `set_list_code(p, (Z_SYNC | Z_END), c);`
1758                set_list_code(p, Z_SYNC | Z_END, c);
1759            }
1760        } else {
1761            // c:799-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
1762            ECUSED.set((ECUSED.get() - 1).max(0));
1763            if let Some(prev) = lp {
1764                ECBUF.with_borrow_mut(|b| {
1765                    if prev < b.len() {
1766                        b[prev] |= wc_bdata(Z_END as wordcode);
1767                    }
1768                });
1769            }
1770        }
1771        break;
1772    }
1773}
1774
1775/// Port of `par_list1(int *cmplx)` from `Src/parse.c:805-816`.
1776/// Single-sublist variant used by funcdef bodies and the short
1777/// `for`/`while`/`repeat` forms — exactly one sublist with
1778/// `Z_SYNC|Z_END`, no chain.
1779pub fn par_list1_wordcode() {
1780    // c:807 — `p = ecadd(0); c = 0;`
1781    let p = ecadd(0);
1782    let outer = cmplx_get();
1783    cmplx_set(false);
1784    let ok = par_sublist_wordcode();
1785    let c = cmplx_get();
1786    cmplx_set(outer | c);
1787    if ok {
1788        // c:809-811 — `set_list_code(p, Z_SYNC|Z_END, c); *cmplx |= c;`
1789        set_list_code(p, Z_SYNC | Z_END, c);
1790    } else {
1791        // c:813 — `ecused--;`
1792        ECUSED.set((ECUSED.get() - 1).max(0));
1793    }
1794}
1795
1796/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
1797/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
1798/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
1799/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
1800/// or DAMPER (`&&`) recursively. Returns true if at least one
1801/// pipeline was emitted.
1802pub fn par_sublist_wordcode() -> bool {
1803    // c:827 — `p = ecadd(0);`
1804    let p = ecadd(0);
1805    let outer = cmplx_get();
1806    cmplx_set(false);
1807    let mut c2 = 0i32;
1808    let f = par_sublist2(&mut c2);
1809    let c = c2 != 0;
1810    cmplx_set(outer | c);
1811    match f {
1812        Some(flags) => {
1813            // c:831 — `e = ecused;`
1814            let e = ECUSED.get() as usize;
1815            if tok() == DBAR || tok() == DAMPER {
1816                // c:834 — `qtok = tok;`
1817                let qtok = tok();
1818                // c:836 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
1819                cmdpush(if qtok == DBAR {
1820                    CS_CMDOR as u8
1821                } else {
1822                    CS_CMDAND as u8
1823                });
1824                // c:837 — `zshlex();`
1825                zshlex();
1826                // c:838-839 — `while (tok == SEPER) zshlex();`
1827                while tok() == SEPER {
1828                    zshlex();
1829                }
1830                // c:840 — `sl = par_sublist(cmplx);`
1831                let sl = par_sublist_wordcode();
1832                // c:841-844 — `set_sublist_code(p, (sl ? (qtok==DBAR ?
1833                // WC_SUBLIST_OR : WC_SUBLIST_AND) : WC_SUBLIST_END),
1834                // f, e-1-p, c);`
1835                let st = if sl {
1836                    if qtok == DBAR {
1837                        WC_SUBLIST_OR
1838                    } else {
1839                        WC_SUBLIST_AND
1840                    }
1841                } else {
1842                    WC_SUBLIST_END
1843                };
1844                set_sublist_code(p, st as i32, flags, (e - 1 - p) as i32, c);
1845                // c:845 — `cmdpop();`
1846                cmdpop();
1847            } else {
1848                // c:847-849 — `if (tok == AMPER || tok == AMPERBANG)
1849                // { c = 1; *cmplx |= c; }`
1850                let c_final = if tok() == AMPER || tok() == AMPERBANG {
1851                    cmplx_set(true);
1852                    true
1853                } else {
1854                    c
1855                };
1856                // c:851 — `set_sublist_code(p, WC_SUBLIST_END, f,
1857                // e-1-p, c);`
1858                set_sublist_code(p, WC_SUBLIST_END as i32, flags, (e - 1 - p) as i32, c_final);
1859            }
1860            true
1861        }
1862        None => {
1863            // c:855-857 — `ecused--; return 0;`
1864            ECUSED.set((ECUSED.get() - 1).max(0));
1865            false
1866        }
1867    }
1868}
1869
1870/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
1871/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
1872/// WCB_PIPE header (mid for chain links, end for the last cmd)
1873/// plus the optional BARAMP `2>&1` synthetic redir.
1874pub fn par_pipe_wordcode() -> bool {
1875    let line = toklineno() as i64;
1876    // c:898 — `p = ecadd(0);`
1877    let p = ecadd(0);
1878    // c:900-903 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
1879    if !par_cmd_wordcode(false) {
1880        ECUSED.set((ECUSED.get() - 1).max(0));
1881        return false;
1882    }
1883    if tok() == BAR_TOK {
1884        // c:905 — `*cmplx = 1;`
1885        cmplx_set(true);
1886        // c:906 — `cmdpush(CS_PIPE);`
1887        cmdpush(CS_PIPE as u8);
1888        // c:907 — `zshlex();`
1889        zshlex();
1890        // c:908-909 — `while (tok == SEPER) zshlex();`
1891        while tok() == SEPER {
1892            zshlex();
1893        }
1894        // c:910 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
1895        ECBUF.with_borrow_mut(|b| {
1896            if p < b.len() {
1897                b[p] = WCB_PIPE(
1898                    WC_PIPE_MID,
1899                    if line >= 0 { (line + 1) as wordcode } else { 0 },
1900                );
1901            }
1902        });
1903        // c:911 — `ecispace(p+1, 1);`
1904        ecispace(p + 1, 1);
1905        // c:912 — `ecbuf[p+1] = ecused - 1 - p;`
1906        let used = ECUSED.get() as usize;
1907        ECBUF.with_borrow_mut(|b| {
1908            if p + 1 < b.len() {
1909                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
1910            }
1911        });
1912        // c:913-915 — `if (!par_pline(cmplx)) tok = LEXERR;`
1913        if !par_pipe_wordcode() {
1914            set_tok(LEXERR);
1915        }
1916        cmdpop();
1917        true
1918    } else if tok() == BARAMP {
1919        // c:920-924 — walk past inline WC_REDIR to find r.
1920        let mut r = p + 1;
1921        loop {
1922            let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
1923            if wc_code(code) != WC_REDIR {
1924                break;
1925            }
1926            r += WC_REDIR_WORDS(code) as usize;
1927        }
1928        // c:926-929 — `ecispace(r, 3);` + synthetic `2>&1` redir
1929        ecispace(r, 3);
1930        ECBUF.with_borrow_mut(|b| {
1931            if r + 2 < b.len() {
1932                b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
1933                b[r + 1] = 2;
1934                b[r + 2] = ecstrcode("1");
1935            }
1936        });
1937        cmplx_set(true);
1938        cmdpush(CS_ERRPIPE as u8);
1939        zshlex();
1940        while tok() == SEPER {
1941            zshlex();
1942        }
1943        ECBUF.with_borrow_mut(|b| {
1944            if p < b.len() {
1945                b[p] = WCB_PIPE(
1946                    WC_PIPE_MID,
1947                    if line >= 0 { (line + 1) as wordcode } else { 0 },
1948                );
1949            }
1950        });
1951        ecispace(p + 1, 1);
1952        let used = ECUSED.get() as usize;
1953        ECBUF.with_borrow_mut(|b| {
1954            if p + 1 < b.len() {
1955                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
1956            }
1957        });
1958        if !par_pipe_wordcode() {
1959            set_tok(LEXERR);
1960        }
1961        cmdpop();
1962        true
1963    } else {
1964        // c:951 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
1965        ECBUF.with_borrow_mut(|b| {
1966            if p < b.len() {
1967                b[p] = WCB_PIPE(
1968                    WC_PIPE_END,
1969                    if line >= 0 { (line + 1) as wordcode } else { 0 },
1970                );
1971            }
1972        });
1973        true
1974    }
1975}
1976
1977/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
1978/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
1979/// dispatches on the current token to the right par_* builder.
1980/// Returns false only when no command was emitted (no redirs +
1981/// par_simple returned 0).
1982pub fn par_cmd_wordcode(zsh_construct: bool) -> bool {
1983    let mut nr = 0i32;
1984    // c:962 — `r = ecused;` — used for trailing-redir patch
1985    // bookkeeping; the actual redir mutation goes through par_redir
1986    // which keeps its own offset.
1987    let mut r = ECUSED.get();
1988    // c:964-969 — leading redirs.
1989    if IS_REDIROP(tok()) {
1990        cmplx_set(true);
1991        while IS_REDIROP(tok()) {
1992            if let Some(_) = par_redir() {
1993                nr += 1;
1994            } else {
1995                break;
1996            }
1997        }
1998    }
1999    match tok() {
2000        FOR => {
2001            cmdpush(CS_FOR as u8);
2002            par_for_wordcode();
2003            cmdpop();
2004        }
2005        FOREACH => {
2006            cmdpush(CS_FOREACH as u8);
2007            par_for_wordcode();
2008            cmdpop();
2009        }
2010        SELECT => {
2011            cmplx_set(true);
2012            cmdpush(CS_SELECT as u8);
2013            par_for_wordcode();
2014            cmdpop();
2015        }
2016        CASE => {
2017            cmdpush(CS_CASE as u8);
2018            par_case_wordcode();
2019            cmdpop();
2020        }
2021        IF => {
2022            par_if_wordcode();
2023        }
2024        WHILE => {
2025            cmdpush(CS_WHILE as u8);
2026            par_while_wordcode();
2027            cmdpop();
2028        }
2029        UNTIL => {
2030            cmdpush(CS_UNTIL as u8);
2031            par_while_wordcode();
2032            cmdpop();
2033        }
2034        REPEAT => {
2035            cmdpush(CS_REPEAT as u8);
2036            par_repeat_wordcode();
2037            cmdpop();
2038        }
2039        INPAR_TOK => {
2040            cmplx_set(true);
2041            cmdpush(CS_SUBSH as u8);
2042            par_subsh_wordcode_impl(zsh_construct);
2043            cmdpop();
2044        }
2045        INBRACE_TOK => {
2046            cmdpush(CS_CURSH as u8);
2047            par_subsh_wordcode_impl(zsh_construct);
2048            cmdpop();
2049        }
2050        FUNC => {
2051            cmdpush(CS_FUNCDEF as u8);
2052            par_funcdef_wordcode();
2053            cmdpop();
2054        }
2055        DINBRACK => {
2056            cmdpush(CS_COND as u8);
2057            par_cond_wordcode();
2058            cmdpop();
2059        }
2060        DINPAR => {
2061            par_arith_wordcode();
2062        }
2063        TIME => {
2064            // c:1037-1050 — `static int inpartime` guard so
2065            // `time time foo` doesn't recurse infinitely.
2066            if !PARSER_INPARTIME.with(|c| c.get()) {
2067                cmplx_set(true);
2068                PARSER_INPARTIME.with(|c| c.set(true));
2069                par_time_wordcode();
2070                PARSER_INPARTIME.with(|c| c.set(false));
2071            } else {
2072                set_tok(STRING_LEX);
2073                let sr = par_simple_wordcode_impl(nr);
2074                if sr == 0 && nr == 0 {
2075                    return false;
2076                }
2077                if sr > 1 {
2078                    cmplx_set(true);
2079                    r += sr - 1;
2080                }
2081            }
2082        }
2083        _ => {
2084            // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
2085            let sr = par_simple_wordcode_impl(nr);
2086            if sr == 0 {
2087                if nr == 0 {
2088                    return false;
2089                }
2090            } else if sr > 1 {
2091                cmplx_set(true);
2092                r += sr - 1;
2093            }
2094        }
2095    }
2096    // c:1075-1078 — trailing redirs.
2097    if IS_REDIROP(tok()) {
2098        cmplx_set(true);
2099        while IS_REDIROP(tok()) {
2100            let _ = par_redir();
2101        }
2102    }
2103    // c:1079-1082 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
2104    set_incmdpos(true);
2105    set_incasepat(0);
2106    set_incond(0);
2107    set_intypeset(false);
2108    let _ = r;
2109    true
2110}
2111
2112/// Adapter: par_cmd_wordcode wrapper for sites that don't supply
2113/// the zsh_construct flag (defaults to false, matching the C
2114/// `par_cmd(cmplx, 0)` call shape at c:902).
2115pub fn par_cmd_wordcode_noargs() {
2116    par_cmd_wordcode(false);
2117}
2118
2119/// P9c stub: direct port of `par_for(int *complex)` from
2120/// Port of `par_for(int *cmplx)` from `Src/parse.c:1087-1199`.
2121pub fn par_for_wordcode() {
2122    let csh = tok() == FOREACH;
2123    let sel = tok() == SELECT;
2124    let p = ecadd(0);
2125    set_incmdpos(false);
2126    set_infor(if tok() == FOR { 2 } else { 0 });
2127    zshlex();
2128    let type_code: wordcode;
2129    if tok() == DINPAR {
2130        zshlex();
2131        if tok() != DINPAR {
2132            error("par_for: expected init");
2133            return;
2134        }
2135        ecstr(&tokstr().unwrap_or_default());
2136        zshlex();
2137        if tok() != DINPAR {
2138            error("par_for: expected cond");
2139            return;
2140        }
2141        ecstr(&tokstr().unwrap_or_default());
2142        zshlex();
2143        if tok() != DOUTPAR {
2144            error("par_for: expected ))");
2145            return;
2146        }
2147        ecstr(&tokstr().unwrap_or_default());
2148        set_infor(0);
2149        set_incmdpos(true);
2150        zshlex();
2151        type_code = WC_FOR_COND;
2152    } else {
2153        set_infor(0);
2154        if tok() != STRING_LEX {
2155            error("par_for: expected identifier");
2156            return;
2157        }
2158        let np = if !sel { Some(ecadd(0)) } else { None };
2159        let mut n = 0u32;
2160        set_incmdpos(true);
2161        loop {
2162            n += 1;
2163            ecstr(&tokstr().unwrap_or_default());
2164            zshlex();
2165            if tok() != STRING_LEX || sel {
2166                break;
2167            }
2168            if tokstr().as_deref() == Some("in") {
2169                break;
2170            }
2171        }
2172        if let Some(np) = np {
2173            ECBUF.with_borrow_mut(|b| {
2174                if np < b.len() {
2175                    b[np] = n;
2176                }
2177            });
2178        }
2179        let posix_in = isnewlin() != 0;
2180        while isnewlin() != 0 {
2181            zshlex();
2182        }
2183        if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
2184            set_incmdpos(false);
2185            zshlex();
2186            let np = ecadd(0);
2187            let mut n = 0u32;
2188            while tok() == STRING_LEX {
2189                if let Some(s) = tokstr() {
2190                    ecstr(&s);
2191                }
2192                n += 1;
2193                zshlex();
2194            }
2195            if tok() != SEPER {
2196                error("par_for: expected separator after `in`");
2197                return;
2198            }
2199            ECBUF.with_borrow_mut(|b| {
2200                if np < b.len() {
2201                    b[np] = n as wordcode;
2202                }
2203            });
2204            type_code = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
2205        } else if !posix_in && tok() == INPAR_TOK {
2206            set_incmdpos(false);
2207            zshlex();
2208            let np = ecadd(0);
2209            let mut n = 0u32;
2210            while tok() == NEWLIN {
2211                zshlex();
2212            }
2213            while tok() == STRING_LEX {
2214                if let Some(s) = tokstr() {
2215                    ecstr(&s);
2216                }
2217                n += 1;
2218                zshlex();
2219            }
2220            while tok() == NEWLIN {
2221                zshlex();
2222            }
2223            if tok() != OUTPAR_TOK {
2224                error("par_for: expected `)`");
2225                return;
2226            }
2227            ECBUF.with_borrow_mut(|b| {
2228                if np < b.len() {
2229                    b[np] = n as wordcode;
2230                }
2231            });
2232            set_incmdpos(true);
2233            zshlex();
2234            type_code = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
2235        } else {
2236            type_code = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
2237        }
2238    }
2239    set_incmdpos(true);
2240    while tok() == SEPER {
2241        zshlex();
2242    }
2243    par_loop_body_wordcode(csh);
2244    let used = ECUSED.get() as usize;
2245    let off = used.saturating_sub(1 + p) as wordcode;
2246    ECBUF.with_borrow_mut(|b| {
2247        if p < b.len() {
2248            b[p] = if sel {
2249                WCB_SELECT(type_code, off)
2250            } else {
2251                WCB_FOR(type_code, off)
2252            };
2253        }
2254    });
2255}
2256
2257/// Body dispatch shared by par_for / par_while / par_repeat.
2258/// Direct port of `Src/parse.c:1167-1195`.
2259fn par_loop_body_wordcode(csh: bool) {
2260    if tok() == DOLOOP {
2261        zshlex();
2262        par_list_wordcode();
2263        if tok() != DONE {
2264            error("missing `done`");
2265            return;
2266        }
2267        set_incmdpos(false);
2268        zshlex();
2269    } else if tok() == INBRACE_TOK {
2270        zshlex();
2271        par_list_wordcode();
2272        if tok() != OUTBRACE_TOK {
2273            error("missing `}`");
2274            return;
2275        }
2276        set_incmdpos(false);
2277        zshlex();
2278    } else if csh || isset(CSHJUNKIELOOPS) {
2279        par_list_wordcode();
2280        if tok() != ZEND {
2281            error("missing `end`");
2282            return;
2283        }
2284        set_incmdpos(false);
2285        zshlex();
2286    } else if unset(SHORTLOOPS) {
2287        error("short loop form requires SHORTLOOPS");
2288    } else {
2289        par_list1_wordcode();
2290    }
2291}
2292
2293/// `select` shares par_for body (c:1024 routes SELECT to par_for).
2294pub fn par_select_wordcode() {
2295    par_for_wordcode();
2296}
2297
2298/// Port of `par_case(int *cmplx)` from `Src/parse.c:1209-1409`.
2299pub fn par_case_wordcode() {
2300    let p = ecadd(0);
2301    set_incmdpos(false);
2302    zshlex();
2303    if tok() != STRING_LEX {
2304        error("par_case: expected scrutinee");
2305        return;
2306    }
2307    ecstr(&tokstr().unwrap_or_default());
2308    set_incmdpos(true);
2309    zshlex();
2310    while tok() == SEPER {
2311        zshlex();
2312    }
2313    let saw_brace = tok() == INBRACE_TOK;
2314    if !saw_brace && !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) {
2315        error("par_case: expected `in` or `{`");
2316        return;
2317    }
2318    zshlex();
2319    loop {
2320        while tok() == SEPER {
2321            zshlex();
2322        }
2323        // c:1245-1247 — `esac` can arrive either as the ESAC reswd
2324        // token (when incmdpos was true at the SEPER between arms,
2325        // which is the normal case after the body's `;;`) OR as a
2326        // STRING with tokstr "esac" (alias context or noaliases off).
2327        // Accept both shapes so the outer arm loop terminates.
2328        if (saw_brace && tok() == OUTBRACE_TOK)
2329            || (!saw_brace && tok() == ESAC)
2330            || (!saw_brace && tok() == STRING_LEX && tokstr().as_deref() == Some("esac"))
2331        {
2332            zshlex();
2333            break;
2334        }
2335        if tok() == INPAR_TOK {
2336            zshlex();
2337        }
2338        // c:1265-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
2339        // Two arm-header words: PP holds WCB_CASE(type, body_off),
2340        // PALTS holds the pattern alternative count.
2341        let pp = ecadd(0);
2342        let palts = ecadd(0);
2343        let mut nalts: u32 = 0;
2344        loop {
2345            if tok() != STRING_LEX {
2346                error("par_case: expected pattern");
2347                return;
2348            }
2349            ecstr(&tokstr().unwrap_or_default());
2350            // c:1307,1316 — `ecadd(ecnpats++);` after each pattern.
2351            // Records a per-pattern index slot that the compiled
2352            // Patprog later drops into. Without this, npats=0 and
2353            // the strs/wordcode header bytes diverge from C.
2354            let np = ECNPATS.with(|c| { let v = c.get(); c.set(v + 1); v }) as u32;
2355            ecadd(np);
2356            nalts += 1;
2357            zshlex();
2358            if tok() != BAR_TOK {
2359                break;
2360            }
2361            zshlex();
2362        }
2363        ECBUF.with_borrow_mut(|b| {
2364            if palts < b.len() {
2365                b[palts] = nalts;
2366            }
2367        });
2368        if tok() != OUTPAR_TOK {
2369            error("par_case: expected `)`");
2370            return;
2371        }
2372        set_incmdpos(true);
2373        zshlex();
2374        par_list_wordcode();
2375        // c:1330-1336 — arm-terminator drives the WC_CASE_OR /
2376        // WC_CASE_AND / WC_CASE_TESTAND type tag in the WCB_CASE
2377        // header, which is patched at pp.
2378        let arm_type = match tok() {
2379            DSEMI => WC_CASE_OR,
2380            SEMIAMP => WC_CASE_AND,
2381            SEMIBAR => WC_CASE_TESTAND,
2382            _ => WC_CASE_OR,
2383        };
2384        let used = ECUSED.get() as usize;
2385        let arm_off = used.saturating_sub(1 + pp) as wordcode;
2386        ECBUF.with_borrow_mut(|b| {
2387            if pp < b.len() {
2388                b[pp] = WCB_CASE(arm_type, arm_off);
2389            }
2390        });
2391        if tok() == DSEMI || tok() == SEMIAMP || tok() == SEMIBAR {
2392            zshlex();
2393        }
2394    }
2395    let used = ECUSED.get() as usize;
2396    let off = used.saturating_sub(1 + p) as wordcode;
2397    ECBUF.with_borrow_mut(|b| {
2398        if p < b.len() {
2399            b[p] = WCB_CASE(WC_CASE_HEAD, off);
2400        }
2401    });
2402}
2403
2404/// Port of `par_if(int *cmplx)` from `Src/parse.c:1411-1519`.
2405pub fn par_if_wordcode() {
2406    let p = ecadd(0);
2407    cmdpush(CS_IF as u8);
2408    loop {
2409        let arm = ecadd(0);
2410        zshlex();
2411        par_list_wordcode();
2412        let body_brace = tok() == INBRACE_TOK;
2413        if !body_brace {
2414            while tok() == SEPER {
2415                zshlex();
2416            }
2417            if tok() != THEN {
2418                error("par_if: expected `then`");
2419                cmdpop();
2420                return;
2421            }
2422        }
2423        cmdpop();
2424        cmdpush(CS_IFTHEN as u8);
2425        zshlex();
2426        par_list_wordcode();
2427        cmdpop();
2428        let used = ECUSED.get() as usize;
2429        let arm_off = used.saturating_sub(1 + arm) as wordcode;
2430        ECBUF.with_borrow_mut(|b| {
2431            if arm < b.len() {
2432                b[arm] = WCB_IF(WC_IF_IF, arm_off);
2433            }
2434        });
2435        match tok() {
2436            ELIF => {
2437                cmdpush(CS_ELIF as u8);
2438                continue;
2439            }
2440            ELSE => {
2441                cmdpush(CS_ELSE as u8);
2442                let arm = ecadd(0);
2443                zshlex();
2444                par_list_wordcode();
2445                let used = ECUSED.get() as usize;
2446                let arm_off = used.saturating_sub(1 + arm) as wordcode;
2447                ECBUF.with_borrow_mut(|b| {
2448                    if arm < b.len() {
2449                        b[arm] = WCB_IF(WC_IF_IF, arm_off);
2450                    }
2451                });
2452                cmdpop();
2453                if tok() != FI {
2454                    error("par_if: expected `fi`");
2455                    return;
2456                }
2457                zshlex();
2458                break;
2459            }
2460            FI => {
2461                zshlex();
2462                break;
2463            }
2464            _ => {
2465                if body_brace && tok() == OUTBRACE_TOK {
2466                    zshlex();
2467                    break;
2468                }
2469                error("par_if: expected `elif`/`else`/`fi`");
2470                return;
2471            }
2472        }
2473    }
2474    let used = ECUSED.get() as usize;
2475    let off = used.saturating_sub(1 + p) as wordcode;
2476    ECBUF.with_borrow_mut(|b| {
2477        if p < b.len() {
2478            b[p] = WCB_IF(WC_IF_HEAD, off);
2479        }
2480    });
2481}
2482
2483/// Port of `par_while(int *cmplx)` from `Src/parse.c:1521-1564`.
2484pub fn par_while_wordcode() {
2485    let until = tok() == UNTIL;
2486    let p = ecadd(0);
2487    zshlex();
2488    par_list_wordcode();
2489    while tok() == SEPER {
2490        zshlex();
2491    }
2492    par_loop_body_wordcode(false);
2493    let type_code = if until {
2494        WC_WHILE_UNTIL
2495    } else {
2496        WC_WHILE_WHILE
2497    };
2498    let used = ECUSED.get() as usize;
2499    let off = used.saturating_sub(1 + p) as wordcode;
2500    ECBUF.with_borrow_mut(|b| {
2501        if p < b.len() {
2502            b[p] = WCB_WHILE(type_code, off);
2503        }
2504    });
2505}
2506
2507/// `until` shares par_while body — tok==UNTIL flips the type.
2508pub fn par_until_wordcode() {
2509    par_while_wordcode();
2510}
2511
2512/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1565-1618`.
2513pub fn par_repeat_wordcode() {
2514    let p = ecadd(0);
2515    set_incmdpos(false);
2516    zshlex();
2517    if tok() != STRING_LEX {
2518        error("par_repeat: expected count");
2519        return;
2520    }
2521    ecstr(&tokstr().unwrap_or_default());
2522    set_incmdpos(true);
2523    zshlex();
2524    while tok() == SEPER {
2525        zshlex();
2526    }
2527    par_loop_body_wordcode(false);
2528    let used = ECUSED.get() as usize;
2529    let off = used.saturating_sub(1 + p) as wordcode;
2530    ECBUF.with_borrow_mut(|b| {
2531        if p < b.len() {
2532            b[p] = WCB_REPEAT(off);
2533        }
2534    });
2535}
2536
2537/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1786`.
2538pub fn par_funcdef_wordcode() {
2539    let p = ecadd(0);
2540    // c:1681-1683 — `nocorrect = 1; incmdpos = 0; zshlex();` —
2541    // set BEFORE the zshlex past `function`, so the next-token
2542    // lex doesn't promote `{` to INBRACE_TOK or recognise reswds.
2543    set_nocorrect(1);
2544    set_incmdpos(false);
2545    zshlex();
2546    let np = ecadd(0);
2547    let mut n = 0u32;
2548    // c:1701-1709 — names loop. C special-cases `tokstr[0] ==
2549    // Inbrace || tokstr[0] == '{'` to break out and set tok =
2550    // INBRACE, since a bare `{` at incmdpos=0 lexes as STRING
2551    // but should still open the funcdef body. Without this,
2552    // `function f { ... }` swallowed the `{` as a name and the
2553    // body never started.
2554    while tok() == STRING_LEX {
2555        let s = tokstr().unwrap_or_default();
2556        let bytes = s.as_bytes();
2557        if bytes.len() == 1 && (bytes[0] == b'{' || s == "\u{8f}") {
2558            set_tok(INBRACE_TOK);
2559            break;
2560        }
2561        ecstr(&s);
2562        n += 1;
2563        zshlex();
2564    }
2565    ECBUF.with_borrow_mut(|b| {
2566        if np < b.len() {
2567            b[np] = n;
2568        }
2569    });
2570    // c:1715-1716 — `nocorrect = 0; incmdpos = 1;` — restore
2571    // before the body parse.
2572    set_nocorrect(0);
2573    set_incmdpos(true);
2574    if tok() == INOUTPAR {
2575        zshlex();
2576    }
2577    while tok() == SEPER {
2578        zshlex();
2579    }
2580    if tok() == INBRACE_TOK {
2581        zshlex();
2582        par_list_wordcode();
2583        if tok() != OUTBRACE_TOK {
2584            error("par_funcdef: expected `}`");
2585            return;
2586        }
2587        zshlex();
2588    } else if unset(SHORTLOOPS) {
2589        error("par_funcdef: short body requires SHORTLOOPS");
2590        return;
2591    } else {
2592        par_list1_wordcode();
2593    }
2594    let used = ECUSED.get() as usize;
2595    let off = used.saturating_sub(1 + p) as wordcode;
2596    ECBUF.with_borrow_mut(|b| {
2597        if p < b.len() {
2598            b[p] = WCB_FUNCDEF(off);
2599        }
2600    });
2601}
2602
2603/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
2604/// `{...}` brace group (cursh) plus optional `always { ... }`
2605/// trailing block. C uses a single function with `zsh_construct=1`
2606/// for `{...}` and 0 for `(...)`.
2607pub fn par_subsh_wordcode_impl(zsh_construct: bool) {
2608    // c:1621 — `enum lextok otok = tok;`
2609    let otok = tok();
2610    // c:1624 — `p = ecadd(0);`
2611    let p = ecadd(0);
2612    // c:1626 — `pp = ecadd(0);` (extra word for the always-block try slot)
2613    let pp = ecadd(0);
2614    // c:1627 — `zshlex();`
2615    zshlex();
2616    // c:1628 — `par_list(cmplx);`
2617    par_list_wordcode();
2618    // c:1629 — `ecadd(WCB_END());`
2619    ecadd(WCB_END());
2620    // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
2621    // YYERRORV(oecused);`
2622    let want = if otok == INPAR_TOK {
2623        OUTPAR_TOK
2624    } else {
2625        OUTBRACE_TOK
2626    };
2627    if tok() != want {
2628        error("par_subsh: missing closing token");
2629        return;
2630    }
2631    // c:1633 — `incmdpos = !zsh_construct;`
2632    set_incmdpos(!zsh_construct);
2633    // c:1634 — `zshlex();`
2634    zshlex();
2635
2636    // c:1637 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always"))`
2637    let is_always =
2638        otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always");
2639    if is_always {
2640        // c:1638 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
2641        let used = ECUSED.get() as usize;
2642        let off = used.saturating_sub(1 + pp);
2643        ECBUF.with_borrow_mut(|b| {
2644            if pp < b.len() {
2645                b[pp] = WCB_TRY(off as wordcode);
2646            }
2647        });
2648        // c:1639 — `incmdpos = 1;`
2649        set_incmdpos(true);
2650        // c:1640-1642 — `do { zshlex(); } while (tok == SEPER);`
2651        loop {
2652            zshlex();
2653            if tok() != SEPER {
2654                break;
2655            }
2656        }
2657        // c:1644-1645 — `if (tok != INBRACE) YYERRORV(oecused);`
2658        if tok() != INBRACE_TOK {
2659            error("par_subsh: 'always' expects '{'");
2660            return;
2661        }
2662        // c:1648 — `zshlex();`
2663        zshlex();
2664        // c:1649 — `par_save_list(cmplx);`
2665        par_list_wordcode();
2666        // c:1650-1651 — `while (tok == SEPER) zshlex();`
2667        while tok() == SEPER {
2668            zshlex();
2669        }
2670        // c:1653 — `incmdpos = 1;`
2671        set_incmdpos(true);
2672        // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
2673        if tok() != OUTBRACE_TOK {
2674            error("par_subsh: 'always' block missing '}'");
2675            return;
2676        }
2677        zshlex();
2678        // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
2679        let used = ECUSED.get() as usize;
2680        let off = used.saturating_sub(1 + p);
2681        ECBUF.with_borrow_mut(|b| {
2682            if p < b.len() {
2683                b[p] = WCB_TRY(off as wordcode);
2684            }
2685        });
2686    } else {
2687        // c:1660-1662 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) :
2688        // WCB_CURSH(...));`
2689        let used = ECUSED.get() as usize;
2690        let off = used.saturating_sub(1 + p);
2691        ECBUF.with_borrow_mut(|b| {
2692            if p < b.len() {
2693                b[p] = if otok == INPAR_TOK {
2694                    WCB_SUBSH(off as wordcode)
2695                } else {
2696                    WCB_CURSH(off as wordcode)
2697                };
2698            }
2699        });
2700    }
2701}
2702
2703/// Wrapper for `(...)` subshell — calls `par_subsh_wordcode_impl(false)`.
2704pub fn par_subsh_wordcode() {
2705    par_subsh_wordcode_impl(false);
2706}
2707
2708/// Wrapper for `{...}` brace group (cursh) — calls
2709/// `par_subsh_wordcode_impl(true)`. C uses the same `par_subsh`
2710/// function with `zsh_construct=1`; the Rust split exists because
2711/// the par_cmd dispatch at parse.rs:1446 already named them
2712/// separately.
2713pub fn par_cursh_wordcode() {
2714    par_subsh_wordcode_impl(true);
2715}
2716
2717/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
2718/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
2719/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
2720pub fn par_time_wordcode() {
2721    // c:1791 — `zshlex();`
2722    zshlex();
2723    // c:1793-1794 — `p = ecadd(0); ecadd(0);`
2724    let p = ecadd(0);
2725    ecadd(0);
2726    // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
2727    let mut c = 0i32;
2728    let f = par_sublist2(&mut c);
2729    match f {
2730        Some(flags) => {
2731            // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
2732            ECBUF.with_borrow_mut(|b| {
2733                if p < b.len() {
2734                    b[p] = WCB_TIMED(WC_TIMED_PIPE);
2735                }
2736            });
2737            // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
2738            // ecused-2-p, c);`
2739            let used = ECUSED.get() as usize;
2740            let skip = used.saturating_sub(2 + p) as i32;
2741            set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
2742        }
2743        None => {
2744            // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
2745            ECUSED.set((ECUSED.get() - 1).max(0));
2746            ECBUF.with_borrow_mut(|b| {
2747                if p < b.len() {
2748                    b[p] = WCB_TIMED(WC_TIMED_EMPTY);
2749                }
2750            });
2751        }
2752    }
2753}
2754
2755/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
2756/// `par_cond` (the cond-expression emitter at parse.c:2409) with
2757/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
2758/// expectation.
2759pub fn par_cond_wordcode() {
2760    let oecused = ECUSED.get();
2761    // c:1814 — `incond = 1;`
2762    set_incond(1);
2763    // c:1815 — `incmdpos = 0;`
2764    set_incmdpos(false);
2765    // c:1816 — `zshlex();` past `[[`.
2766    zshlex();
2767    // c:1817 — `par_cond();` — call the no-skip cond-expression
2768    // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
2769    // par_cond_2 → par_cond_double/triple/multi). NOT the AST
2770    // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
2771    // that skips `[[` AND `]]` and returns a ZshCommand AST node
2772    // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
2773    // either — that's also AST-only, returning ZshCond. With
2774    // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
2775    // wordcode payload and parity dropped ~148 words on /etc/zshrc.
2776    let _ = par_cond_top();
2777    // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
2778    if tok() != DOUTBRACK {
2779        let _ = oecused;
2780        error("missing ]]");
2781        return;
2782    }
2783    // c:1820 — `incond = 0;`
2784    set_incond(0);
2785    // c:1821 — `incmdpos = 1;`
2786    set_incmdpos(true);
2787    // c:1822 — `zshlex();` past `]]`.
2788    zshlex();
2789}
2790
2791/// Port of the `case DINPAR:` arm of `par_cmd` from
2792/// `Src/parse.c:1031-1034`:
2793/// ```c
2794/// ecadd(WCB_ARITH());
2795/// ecstr(tokstr);
2796/// zshlex();
2797/// ```
2798/// `(( EXPR ))` arithmetic at command position — emits the ARITH
2799/// opcode followed by the interned EXPR string, then advances past
2800/// the DINPAR token (which already carries the body text).
2801pub fn par_arith_wordcode() {
2802    // c:1032 — `ecadd(WCB_ARITH());`
2803    ecadd(WCB_ARITH());
2804    // c:1033 — `ecstr(tokstr);` — interns the expression string and
2805    // appends its strcode index to the wordcode buffer.
2806    let expr = tokstr().unwrap_or_default();
2807    ecstr(&expr);
2808    // c:1034 — `zshlex();`
2809    zshlex();
2810}
2811
2812/// Port of `par_simple(int *cmplx, int nr)` from
2813/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
2814/// interned string offsets. Returns `0` when nothing was emitted,
2815/// otherwise `1 + (number of code words consumed by redirections)`.
2816/// The full C body handles assignments (ENVSTRING/ENVARRAY),
2817/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
2818/// and `name() { body }` funcdef detection — those paths are
2819/// progressively wired into the AST parser; this wordcode-emitter
2820/// covers the simple `cmd args...` case + interleaved redirs.
2821pub fn par_simple_wordcode_impl(mut nr: i32) -> i32 {
2822    // c:1836-1842 — `int oecused = ecused, isnull = 1, r, argc = 0,
2823    // p, isfunc = 0, sr = 0; int c = *cmplx, nrediradd, assignments
2824    // = 0, ppost = 0, is_typeset = 0; ...`
2825    let _oecused = ECUSED.get() as usize;
2826    let mut isnull = true;
2827    let mut argc: u32 = 0;
2828    let mut sr: i32 = 0;
2829    let mut assignments = false;
2830
2831    // c:1843 — `r = ecused;` — saves the offset where redirs get
2832    // INSERTED (via ecispace). Each redir shifts later words DOWN
2833    // by ncodes, so the SIMPLE placeholder at `p` (set later) must
2834    // also bump by ncodes when a redir lands. C uses `&r` to pass
2835    // the cursor by reference; Rust uses a mutable local + manual
2836    // bumps after each par_redir_wordcode call.
2837    let mut r: usize = ECUSED.get() as usize;
2838
2839    // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
2840    // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
2841    // a non-assignment token is seen.
2842    loop {
2843        match tok() {
2844            NOCORRECT => {
2845                // c:1846-1849
2846                cmplx_set(true);
2847                set_nocorrect(1);
2848            }
2849            ENVSTRING => {
2850                // c:1848-1898 — scalar assignment `name=value` or
2851                // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
2852                // followed by ecstr(name), ecstr(value).
2853                let raw = tokstr().unwrap_or_default();
2854                // Find first of Inbrack / '=' / '+' (the C scan at
2855                // c:1851-1853). Inside Inbrack we skipparens — i.e.
2856                // skip `name[...]` index, then continue.
2857                let bytes: Vec<char> = raw.chars().collect();
2858                let mut idx = 0usize;
2859                while idx < bytes.len() {
2860                    let ch = bytes[idx];
2861                    if ch == '\u{91}' /* Inbrack */ {
2862                        // Skip matched Inbrack…Outbrack pair.
2863                        let mut depth = 1;
2864                        idx += 1;
2865                        while idx < bytes.len() && depth > 0 {
2866                            match bytes[idx] {
2867                                '\u{91}' => depth += 1,
2868                                '\u{92}' => depth -= 1,
2869                                _ => {}
2870                            }
2871                            idx += 1;
2872                        }
2873                        continue;
2874                    }
2875                    // c:1851-1853 — `*ptr != '=' && *ptr != '+'` —
2876                    // C scan stops on either literal `=` / `+` OR the
2877                    // Equals marker (`\u{8d}`) the lexer emits for
2878                    // unquoted `=`. Without the marker check, the
2879                    // ENVSTRING split scans past the `=` (since it's
2880                    // already tokenised) and the whole `name=value`
2881                    // ends up in one ecstr.
2882                    if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
2883                        break;
2884                    }
2885                    idx += 1;
2886                }
2887                let is_inc = idx < bytes.len() && bytes[idx] == '+';
2888                // c:1855-1860 — emit WCB_ASSIGN with WC_ASSIGN_INC
2889                // (+=) or WC_ASSIGN_NEW (=). The third arg (count)
2890                // is 0 for scalar.
2891                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
2892                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
2893                // Split into name and str at the `=` (after the
2894                // optional `+`).
2895                if is_inc {
2896                    idx += 1;
2897                }
2898                let name: String = bytes[..idx].iter().collect();
2899                // Skip past the `=` separator (literal or Equals
2900                // marker `\u{8d}`) so the value starts at the byte
2901                // after it. Mirrors C `*ptr = '\0'; str = ptr + 1;`
2902                // (parse.c:1864).
2903                let str_off = if idx < bytes.len()
2904                    && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
2905                {
2906                    idx + 1
2907                } else {
2908                    idx
2909                };
2910                let value: String = bytes[str_off..].iter().collect();
2911                // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
2912                // subst); if found, bump cmplx (suppresses Z_SIMPLE).
2913                let vbytes: Vec<char> = value.chars().collect();
2914                for (i, ch) in vbytes.iter().enumerate() {
2915                    if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}' /* Inpar */ {
2916                        if *ch == '\u{8d}' /* Equals */
2917                            || *ch == '\u{94}' /* Inang */
2918                            || *ch == '\u{96}' /* OutangProc */
2919                        {
2920                            cmplx_set(true);
2921                            break;
2922                        }
2923                    }
2924                }
2925                ecstr(&name);
2926                ecstr(&value);
2927                isnull = false;
2928                assignments = true;
2929            }
2930            ENVARRAY => {
2931                // c:1898-1922 — array assignment `name=( ... )`.
2932                // Implementation note: emits placeholder, parses
2933                // wordlist, patches WCB_ASSIGN(ARRAY, NEW|INC, n)
2934                // header with the actual element count. zshrs's
2935                // par_nl_wordlist isn't wired into the wordcode
2936                // emitter yet; fall back to a minimal placeholder
2937                // so the WCB_ASSIGN slot exists at the expected
2938                // position. TODO: full port of c:1898-1922.
2939                cmplx_set(true);
2940                let p = ecadd(0);
2941                set_incmdpos(false);
2942                let raw = tokstr().unwrap_or_default();
2943                let is_inc = raw.ends_with('+');
2944                let name = if is_inc { &raw[..raw.len() - 1] } else { raw.as_str() };
2945                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
2946                ecstr(name);
2947                cmdpush(CS_ARRAY as u8);
2948                zshlex();
2949                // Count words until OUTPAR_TOK.
2950                let mut n = 0u32;
2951                while tok() == STRING_LEX {
2952                    let w = tokstr().unwrap_or_default();
2953                    ecstr(&w);
2954                    n += 1;
2955                    zshlex();
2956                    while tok() == NEWLIN {
2957                        zshlex();
2958                    }
2959                }
2960                ECBUF.with_borrow_mut(|b| {
2961                    if p < b.len() {
2962                        b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, n);
2963                    }
2964                });
2965                cmdpop();
2966                if tok() != OUTPAR_TOK {
2967                    error("expected `)' after array assignment");
2968                    return 0;
2969                }
2970                set_incmdpos(true);
2971                isnull = false;
2972                assignments = true;
2973            }
2974            t if IS_REDIROP(t) => {
2975                // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
2976                // NULL); continue;`. The wordcode-emitting redir is
2977                // distinct from the AST par_redir — it INSERTS
2978                // WCB_REDIR + fd + ecstrcode(name) at offset `r`
2979                // via ecispace, shifting any later words down.
2980                cmplx_set(true);
2981                let added = par_redir_wordcode(&mut r);
2982                if added == 0 {
2983                    break;
2984                }
2985                nr += added;
2986                continue;
2987            }
2988            _ => break,
2989        }
2990        zshlex(); // c:1907 `zshlex();`
2991    }
2992
2993    // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
2994    if tok() == AMPER || tok() == AMPERBANG {
2995        error("par_simple: unexpected &");
2996        return 0;
2997    }
2998
2999    // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
3000    let mut p = ecadd(WCB_SIMPLE(0));
3001
3002    // c:1924-2105 — main words loop. is_typeset tracks whether the
3003    // outer command was `typeset`/`export`/etc. so the final
3004    // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
3005    let mut is_typeset = false;
3006    let mut postassigns: u32 = 0;
3007    let mut ppost: usize = 0;
3008    loop {
3009        match tok() {
3010            STRING_LEX | TYPESET => {
3011                // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
3012                cmplx_set(true);
3013                set_incmdpos(false);
3014                // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
3015                if tok() == TYPESET {
3016                    set_intypeset(true);
3017                    is_typeset = true;
3018                }
3019                let s = tokstr().unwrap_or_default();
3020                ecstr(&s);
3021                argc += 1;
3022                isnull = false;
3023                zshlex();
3024            }
3025            ENVSTRING => {
3026                // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
3027                // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
3028                // ecstr(name) + ecstr(value), tracking the first
3029                // postassign offset in `ppost` (which the trailing
3030                // WCB_TYPESET header points to).
3031                if postassigns == 0 {
3032                    ppost = ecadd(0);
3033                }
3034                postassigns += 1;
3035                let raw = tokstr().unwrap_or_default();
3036                let bytes: Vec<char> = raw.chars().collect();
3037                let mut idx = 0usize;
3038                while idx < bytes.len() {
3039                    let ch = bytes[idx];
3040                    if ch == '\u{91}' /* Inbrack */ {
3041                        let mut depth = 1;
3042                        idx += 1;
3043                        while idx < bytes.len() && depth > 0 {
3044                            match bytes[idx] {
3045                                '\u{91}' => depth += 1,
3046                                '\u{92}' => depth -= 1,
3047                                _ => {}
3048                            }
3049                            idx += 1;
3050                        }
3051                        continue;
3052                    }
3053                    if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
3054                        break;
3055                    }
3056                    idx += 1;
3057                }
3058                let name: String = bytes[..idx].iter().collect();
3059                let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}') {
3060                    idx + 1
3061                } else {
3062                    idx
3063                };
3064                let value: String = bytes[str_off..].iter().collect();
3065                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
3066                ecstr(&name);
3067                ecstr(&value);
3068                isnull = false;
3069                zshlex();
3070            }
3071            ENVARRAY => {
3072                // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
3073                // C tracks postassigns + ppost the same as ENVSTRING,
3074                // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
3075                // with `n` patched in after par_nl_wordlist consumes
3076                // the elements. C also toggles intypeset=0 around the
3077                // wordlist so the lexer doesn't try to re-emit
3078                // assignments inside the array.
3079                cmplx_set(true);
3080                if postassigns == 0 {
3081                    ppost = ecadd(0);
3082                }
3083                postassigns += 1;
3084                let parr = ecadd(0);
3085                let raw = tokstr().unwrap_or_default();
3086                let is_inc = raw.ends_with('+');
3087                let name = if is_inc { &raw[..raw.len() - 1] } else { raw.as_str() };
3088                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
3089                ecstr(name);
3090                cmdpush(CS_ARRAY as u8);
3091                set_intypeset(false);
3092                zshlex();
3093                let mut nelem = 0u32;
3094                while tok() == STRING_LEX {
3095                    ecstr(&tokstr().unwrap_or_default());
3096                    nelem += 1;
3097                    zshlex();
3098                    while tok() == NEWLIN {
3099                        zshlex();
3100                    }
3101                }
3102                ECBUF.with_borrow_mut(|b| {
3103                    if parr < b.len() {
3104                        b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
3105                    }
3106                });
3107                cmdpop();
3108                set_intypeset(true);
3109                if tok() != OUTPAR_TOK {
3110                    error("expected `)' after array assignment");
3111                    return 0;
3112                }
3113                isnull = false;
3114                zshlex();
3115            }
3116            t if IS_REDIROP(t) => {
3117                // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
3118                // p += nrediradd; if (ppost) ppost += nrediradd;
3119                // sr += nrediradd;`
3120                cmplx_set(true);
3121                let added = par_redir_wordcode(&mut r);
3122                if added == 0 {
3123                    break;
3124                }
3125                p += added as usize;
3126                if ppost != 0 {
3127                    ppost += added as usize;
3128                }
3129                sr += added;
3130            }
3131            INOUTPAR => {
3132                // c:2051-2168 — `name() { body }` funcdef detection.
3133                // C rewrites the SIMPLE placeholder at `p` into a
3134                // FUNCDEF header structure with multiple words:
3135                //   p:        WCB_FUNCDEF(total_offset)
3136                //   p+1:      argc (name count)
3137                //   p+2..N:   the names already ecstr'd above
3138                //   N+1:      0 (placeholder)
3139                //   N+2:      0 (placeholder)
3140                //   N+3:      0 (placeholder)
3141                //   N+4:      0 (placeholder)
3142                //   N+5:      WCB_END()
3143                //   ...body wordcode...
3144                //   ecbuf[p+argc+2] = so - oecssub; (string area)
3145                //   ecbuf[p+argc+3] = ecsoffs - so;
3146                //   ecbuf[p+argc+4] = ecnpats;
3147                //   ecbuf[p+argc+5] = 0;
3148                //
3149                // This Rust port handles the common `name() { … }`
3150                // case (single name + brace body); anonymous funcdef
3151                // and short-body forms are stubbed for now.
3152                if !isset(MULTIFUNCDEF) && argc > 1 {
3153                    error("par_simple: too many function names for funcdef");
3154                    return 0;
3155                }
3156                if assignments || postassigns > 0 {
3157                    error("par_simple: assignments before funcdef");
3158                    return 0;
3159                }
3160                cmplx_set(true);
3161                set_incmdpos(true);
3162                cmdpush(CS_FUNCDEF as u8);
3163                zshlex();
3164                while tok() == SEPER {
3165                    zshlex();
3166                }
3167                // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
3168                // ecadd(0)*4`. Insert the argc word at p+1, then
3169                // append 4 placeholder words.
3170                ecispace(p + 1, 1);
3171                ECBUF.with_borrow_mut(|b| {
3172                    if p + 1 < b.len() {
3173                        b[p + 1] = argc;
3174                    }
3175                });
3176                ecadd(0);
3177                ecadd(0);
3178                ecadd(0);
3179                ecadd(0);
3180                let so = ECSOFFS.get();
3181                let onp = ECNPATS.with(|c| c.get());
3182                ECNPATS.with(|c| c.set(0));
3183                ECNFUNC.set(ECNFUNC.get() + 1);
3184                let oecssub = ECSSUB.get();
3185                ECSSUB.set(so);
3186                if tok() == INBRACE_TOK {
3187                    zshlex();
3188                    par_list_wordcode();
3189                    if tok() != OUTBRACE_TOK {
3190                        cmdpop();
3191                        error("par_simple: funcdef expected `}`");
3192                        return 0;
3193                    }
3194                    if argc == 0 {
3195                        // Anonymous funcdef.
3196                        set_incmdpos(false);
3197                    }
3198                    zshlex();
3199                } else {
3200                    // Short-body or non-brace form not yet ported.
3201                    cmdpop();
3202                    error("par_simple: funcdef expected `{`");
3203                    return 0;
3204                }
3205                cmdpop();
3206                ecadd(WCB_END());
3207                let used = ECUSED.get() as usize;
3208                let header_off = used.saturating_sub(1 + p) as wordcode;
3209                let p_argc = (p + (argc as usize) + 2) as usize;
3210                let cur_so = ECSOFFS.get();
3211                let np_now = ECNPATS.with(|c| c.get());
3212                ECBUF.with_borrow_mut(|b| {
3213                    if p_argc + 3 < b.len() {
3214                        b[p_argc] = (so - oecssub) as wordcode;
3215                        b[p_argc + 1] = (cur_so - so) as wordcode;
3216                        b[p_argc + 2] = np_now as wordcode;
3217                        b[p_argc + 3] = 0;
3218                    }
3219                    if p < b.len() {
3220                        b[p] = WCB_FUNCDEF(header_off);
3221                    }
3222                });
3223                ECNPATS.with(|c| c.set(onp));
3224                ECSSUB.set(oecssub);
3225                ECNFUNC.set(ECNFUNC.get() + 1);
3226                isnull = false;
3227                // Anonymous funcdef may have arguments — not ported
3228                // yet. Break out of the words loop; outer parser
3229                // handles whatever follows.
3230                break;
3231            }
3232            _ => break,
3233        }
3234    }
3235
3236    // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
3237    // return 0; }` — undo everything including pre-cmd assignments
3238    // if no actual command word emerged.
3239    if isnull && sr + nr == 0 && !assignments {
3240        ECUSED.set(p as i32);
3241        return 0;
3242    }
3243    // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
3244    // the placeholder patch so the next-token lex doesn't carry
3245    // typeset/incond state.
3246    set_incmdpos(true);
3247    set_intypeset(false);
3248    // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
3249    // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
3250    // The WCB_TYPESET header is followed by either a postassigns
3251    // count at `ppost` (when assignments were emitted) or a
3252    // trailing 0 word.
3253    let header = if is_typeset {
3254        if postassigns > 0 {
3255            ECBUF.with_borrow_mut(|b| {
3256                if ppost < b.len() {
3257                    b[ppost] = postassigns;
3258                }
3259            });
3260        } else {
3261            ecadd(0);
3262        }
3263        WCB_TYPESET(argc)
3264    } else {
3265        WCB_SIMPLE(argc)
3266    };
3267    ECBUF.with_borrow_mut(|b| {
3268        if p < b.len() {
3269            b[p] = header;
3270        }
3271    });
3272    1 + sr
3273}
3274
3275/// Wrapper for the par_cmd dispatch sites that don't pass `nr`
3276/// (matches C's call shape at parse.c:1054 `par_simple(cmplx, nr)`).
3277pub fn par_simple_wordcode() {
3278    par_simple_wordcode_impl(0);
3279}
3280
3281/// Port of `par_redir(int *rp, char *idstring)` from
3282/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
3283/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
3284/// from the AST `par_redir` (parse.rs:3771) which builds a
3285/// ZshRedir struct for the AST executor pipeline.
3286///
3287/// Returns the number of wordcodes added (3 for the basic shape,
3288/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
3289/// terminator strings inline). Returns 0 on parse error.
3290fn par_redir_wordcode(rp: &mut usize) -> i32 {
3291    let cur = tok();
3292    let rtype: i32 = match cur {
3293        OUTANG_TOK => REDIR_WRITE,
3294        OUTANGBANG => REDIR_WRITENOW,
3295        DOUTANG => REDIR_APP,
3296        DOUTANGBANG => REDIR_APPNOW,
3297        INANG_TOK => REDIR_READ,
3298        INOUTANG => REDIR_READWRITE,
3299        DINANG => REDIR_HEREDOC,
3300        DINANGDASH => REDIR_HEREDOCDASH,
3301        TRINANG => REDIR_HERESTR,
3302        INANGAMP => REDIR_MERGEIN,
3303        OUTANGAMP => REDIR_MERGEOUT,
3304        AMPOUTANG => REDIR_ERRWRITE,
3305        OUTANGAMPBANG => REDIR_ERRWRITENOW,
3306        DOUTANGAMP => REDIR_ERRAPP,
3307        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
3308        _ => return 0,
3309    };
3310    let fd1 = if tokfd() >= 0 {
3311        tokfd()
3312    } else if matches!(
3313        rtype,
3314        REDIR_READ
3315            | REDIR_READWRITE
3316            | REDIR_MERGEIN
3317            | REDIR_HEREDOC
3318            | REDIR_HEREDOCDASH
3319            | REDIR_HERESTR
3320    ) {
3321        0
3322    } else {
3323        1
3324    };
3325    // c:2234-2245 — save+force incmdpos=0 / nocorrect=1 (when not
3326    // INANG/INOUTANG) around the zshlex that consumes the target
3327    // word.
3328    let oldcmdpos = incmdpos();
3329    set_incmdpos(false);
3330    let oldnc = nocorrect();
3331    if cur != INANG_TOK && cur != INOUTANG {
3332        set_nocorrect(1);
3333    }
3334    zshlex();
3335    if tok() != STRING_LEX && tok() != ENVSTRING {
3336        set_incmdpos(oldcmdpos);
3337        set_nocorrect(oldnc);
3338        error("expected word after redirection");
3339        return 0;
3340    }
3341    let name = tokstr().unwrap_or_default();
3342    set_incmdpos(oldcmdpos);
3343    set_nocorrect(oldnc);
3344
3345    // c:2249-2300 — HEREDOC / HEREDOCDASH carry extra words (here
3346    // string + terminator + munged terminator). The C source
3347    // emits 5 words and registers a struct heredocs entry that
3348    // setheredoc patches later. Stub for now: emit the basic
3349    // 3-word shape so wordcode parity at least sees WC_REDIR.
3350    // TODO: full heredoc registration + 5-word emission.
3351    let _ = (REDIR_FROM_HEREDOC_MASK, REDIR_VARID_MASK);
3352
3353    // c:2302-2321 — proc-subst rewriting: detect `>(`/`<(` in the
3354    // target word's first 2 chars and rewrite REDIR_WRITE/READ to
3355    // REDIR_OUTPIPE/INPIPE. The detection compares the FIRST char
3356    // of the unmetafied tokstr against the marker bytes.
3357    let mut rtype = rtype;
3358    let nbytes: Vec<char> = name.chars().collect();
3359    let two = |i: usize| -> Option<(char, char)> {
3360        if i + 1 < nbytes.len() {
3361            Some((nbytes[i], nbytes[i + 1]))
3362        } else {
3363            None
3364        }
3365    };
3366    if let Some((c0, c1)) = two(0) {
3367        match rtype {
3368            x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
3369                if c0 == '\u{96}' /* OutangProc */ && c1 == '\u{88}' /* Inpar */ {
3370                    rtype = REDIR_OUTPIPE;
3371                } else if c0 == '\u{94}' /* Inang */ && c1 == '\u{88}' {
3372                    error("invalid redirection: < before >");
3373                    return 0;
3374                }
3375            }
3376            x if x == REDIR_READ => {
3377                if c0 == '\u{94}' && c1 == '\u{88}' {
3378                    rtype = REDIR_INPIPE;
3379                } else if c0 == '\u{96}' && c1 == '\u{88}' {
3380                    error("invalid redirection: > before <");
3381                    return 0;
3382                }
3383            }
3384            x if x == REDIR_READWRITE => {
3385                if c0 == '\u{94}' && c1 == '\u{88}' {
3386                    rtype = REDIR_INPIPE;
3387                } else if c0 == '\u{96}' && c1 == '\u{88}' {
3388                    rtype = REDIR_OUTPIPE;
3389                }
3390            }
3391            _ => {}
3392        }
3393    }
3394    zshlex();
3395
3396    // c:2326-2333 — emit WCB_REDIR + fd + ecstrcode(name) at the
3397    // CALLER's `r` cursor (NOT at ecused). ecispace shifts later
3398    // words DOWN to make space; the caller bumps its `p` (SIMPLE
3399    // placeholder offset) to compensate. 3-word basic shape;
3400    // idstring (`{var}>file`) form not yet wired here.
3401    let ncodes: usize = 3;
3402    let r = *rp;
3403    ecispace(r, ncodes);
3404    let coded = ecstrcode(&name);
3405    ECBUF.with_borrow_mut(|b| {
3406        if r + 2 < b.len() {
3407            b[r] = WCB_REDIR(rtype as wordcode);
3408            b[r + 1] = fd1 as wordcode;
3409            b[r + 2] = coded;
3410        }
3411    });
3412    *rp += ncodes; // c:2280 `*rp = r + ncodes;`
3413    ncodes as i32
3414}
3415
3416/// Parse a program (list of lists)
3417/// Parse a complete program (top-level entry). Calls
3418/// parse_program_until with no end-token sentinel. Direct port of
3419/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
3420/// `par_event` flow. C distinguishes COND_EVENT (single command
3421/// for here-string) from full event parse; zshrs's parse_program
3422/// is the full-event entry.
3423fn parse_program() -> ZshProgram {
3424    parse_program_until(None)
3425}
3426
3427/// Parse a program until we hit an end token
3428/// Parse a program until one of `end_tokens` is seen (or EOF).
3429/// Drives par_list in a loop. C equivalent: the body of par_event
3430/// (parse.c:635-695) iterating par_list against the lexer.
3431fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
3432    let mut lists = Vec::new();
3433
3434    loop {
3435        if check_limit() {
3436            error("parser exceeded global iteration limit");
3437            break;
3438        }
3439
3440        // Skip separators
3441        while tok() == SEPER || tok() == NEWLIN {
3442            if check_limit() {
3443                error("parser exceeded global iteration limit");
3444                return ZshProgram { lists };
3445            }
3446            zshlex();
3447        }
3448
3449        if tok() == ENDINPUT || tok() == LEXERR {
3450            break;
3451        }
3452
3453        // Check for end tokens
3454        if let Some(end_toks) = end_tokens {
3455            if end_toks.contains(&tok()) {
3456                break;
3457            }
3458        }
3459
3460        // Also stop at these tokens when not explicitly looking for them
3461        // Note: Else/Elif/Then are NOT here - they're handled by par_if
3462        // to allow nested if statements inside case arms, loops, etc.
3463        match tok() {
3464            OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
3465            _ => {}
3466        }
3467
3468        match par_list() {
3469            Some(list) => {
3470                let detected = simple_name_with_inoutpar(&list);
3471                lists.push(list);
3472                // Synthesize a FuncDef for the `name() { body }` shape
3473                // at parse time so body_source is captured while the
3474                // lexer still has the input. The lexer port emits
3475                // `name(` as a single Word ending in `<Inpar><Outpar>`,
3476                // so the Simple list is followed by an Inbrace once
3477                // separators are skipped. For `name() cmd args` the
3478                // body has already been swallowed into the same
3479                // Simple's words tail — synthesize directly from there.
3480                if let Some((names, body_argv)) = detected {
3481                    if !body_argv.is_empty() {
3482                        // One-line body already in the Simple. Build
3483                        // a Simple from body_argv as the function body.
3484                        lists.pop();
3485                        let body_simple = ZshCommand::Simple(ZshSimple {
3486                            assigns: Vec::new(),
3487                            words: body_argv,
3488                            redirs: Vec::new(),
3489                        });
3490                        let body_list = ZshList {
3491                            sublist: ZshSublist {
3492                                pipe: ZshPipe {
3493                                    cmd: body_simple,
3494                                    next: None,
3495                                    lineno: lineno(),
3496                                    merge_stderr: false,
3497                                },
3498                                next: None,
3499                                flags: SublistFlags::default(),
3500                            },
3501                            flags: ListFlags::default(),
3502                        };
3503                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
3504                            names,
3505                            body: Box::new(ZshProgram {
3506                                lists: vec![body_list],
3507                            }),
3508                            tracing: false,
3509                            auto_call_args: None,
3510                            body_source: None,
3511                        });
3512                        let synthetic = ZshList {
3513                            sublist: ZshSublist {
3514                                pipe: ZshPipe {
3515                                    cmd: funcdef,
3516                                    next: None,
3517                                    lineno: lineno(),
3518                                    merge_stderr: false,
3519                                },
3520                                next: None,
3521                                flags: SublistFlags::default(),
3522                            },
3523                            flags: ListFlags::default(),
3524                        };
3525                        lists.push(synthetic);
3526                        continue;
3527                    }
3528                    // Else: words.len() == 1 (only the trailing `name()`
3529                    // word), brace body follows. `names` may carry
3530                    // multiple identifiers from the `fna fnb fnc()`
3531                    // shorthand — all share the same brace body per
3532                    // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
3533                    // Skip separators on the real lexer; safe because
3534                    // parse_program's next iteration would also skip them.
3535                    while tok() == SEPER || tok() == NEWLIN {
3536                        zshlex();
3537                    }
3538                    if tok() == INBRACE_TOK {
3539                        // Capture body_start BEFORE the lexer
3540                        // advances past the first body token. The
3541                        // outer zshlex() consumed `{`; lexer.pos
3542                        // is now right after `{`. The next
3543                        // `zshlex()` would advance past `echo`,
3544                        // making body_start land mid-body and
3545                        // lose the first word — `typeset -f f`
3546                        // printed `a; echo b` instead of
3547                        // `echo a; echo b` for `f() { echo a;
3548                        // echo b }`.
3549                        let body_start = pos();
3550                        zshlex();
3551                        let body = parse_program();
3552                        let body_end = if tok() == OUTBRACE_TOK {
3553                            pos().saturating_sub(1)
3554                        } else {
3555                            pos()
3556                        };
3557                        let body_source = input_slice(body_start, body_end)
3558                            .map(|s| s.trim().to_string())
3559                            .filter(|s| !s.is_empty());
3560                        if tok() == OUTBRACE_TOK {
3561                            zshlex();
3562                        }
3563                        // Replace the Simple list with a FuncDef list.
3564                        lists.pop();
3565                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
3566                            names,
3567                            body: Box::new(body),
3568                            tracing: false,
3569                            auto_call_args: None,
3570                            body_source,
3571                        });
3572                        let synthetic = ZshList {
3573                            sublist: ZshSublist {
3574                                pipe: ZshPipe {
3575                                    cmd: funcdef,
3576                                    next: None,
3577                                    lineno: lineno(),
3578                                    merge_stderr: false,
3579                                },
3580                                next: None,
3581                                flags: SublistFlags::default(),
3582                            },
3583                            flags: ListFlags::default(),
3584                        };
3585                        lists.push(synthetic);
3586                    } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
3587                        // No-brace one-line body: `foo() echo hello`.
3588                        // Parse a single command for the body.
3589                        let body_cmd = par_cmd();
3590                        if let Some(cmd) = body_cmd {
3591                            let body_list = ZshList {
3592                                sublist: ZshSublist {
3593                                    pipe: ZshPipe {
3594                                        cmd,
3595                                        next: None,
3596                                        lineno: lineno(),
3597                                        merge_stderr: false,
3598                                    },
3599                                    next: None,
3600                                    flags: SublistFlags::default(),
3601                                },
3602                                flags: ListFlags::default(),
3603                            };
3604                            lists.pop();
3605                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
3606                                names: names.clone(),
3607                                body: Box::new(ZshProgram {
3608                                    lists: vec![body_list],
3609                                }),
3610                                tracing: false,
3611                                auto_call_args: None,
3612                                body_source: None,
3613                            });
3614                            let synthetic = ZshList {
3615                                sublist: ZshSublist {
3616                                    pipe: ZshPipe {
3617                                        cmd: funcdef,
3618                                        next: None,
3619                                        lineno: lineno(),
3620                                        merge_stderr: false,
3621                                    },
3622                                    next: None,
3623                                    flags: SublistFlags::default(),
3624                                },
3625                                flags: ListFlags::default(),
3626                            };
3627                            lists.push(synthetic);
3628                        }
3629                    }
3630                }
3631            }
3632            None => break,
3633        }
3634    }
3635
3636    ZshProgram { lists }
3637}
3638
3639/// Parse a list (sublist with optional & or ;).
3640///
3641/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
3642/// par_list1 wrapper at parse.c:807-817).
3643///
3644/// **Structural divergence**: zsh's parse.c emits flat wordcode
3645/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
3646/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
3647/// builds an AST node `ZshList { sublist, flags }` instead. The
3648/// async/sync/disown discrimination at parse.c:785-790 maps to
3649/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
3650/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
3651/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
3652/// representation. This divergence is repository-wide: every
3653/// `par_*` function emits wordcode in C, every `parse_*` builds
3654/// AST in Rust. The compile_zsh module then traverses the AST to
3655/// emit fusevm bytecode, which serves the same role as zsh's
3656/// wordcode but with a different opcode set and execution model.
3657fn par_list() -> Option<ZshList> {
3658    let sublist = par_sublist()?;
3659
3660    let flags = match tok() {
3661        AMPER => {
3662            zshlex();
3663            ListFlags {
3664                async_: true,
3665                disown: false,
3666            }
3667        }
3668        AMPERBANG => {
3669            zshlex();
3670            ListFlags {
3671                async_: true,
3672                disown: true,
3673            }
3674        }
3675        SEPER | SEMI | NEWLIN => {
3676            zshlex();
3677            ListFlags::default()
3678        }
3679        _ => ListFlags::default(),
3680    };
3681
3682    Some(ZshList { sublist, flags })
3683}
3684
3685/// Parse a sublist (pipelines connected by && or ||).
3686///
3687/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
3688/// par_sublist2 at parse.c:869-892. par_sublist handles the
3689/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
3690/// handles the leading `!` negation and `coproc` keyword.
3691///
3692/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
3693/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
3694/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
3695fn par_sublist() -> Option<ZshSublist> {
3696    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
3697    if check_recursion() {
3698        error("par_sublist: max recursion depth exceeded");
3699        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3700        return None;
3701    }
3702
3703    let mut flags = SublistFlags::default();
3704
3705    // Handle coproc and !
3706    if tok() == COPROC {
3707        flags.coproc = true;
3708        zshlex();
3709    } else if tok() == BANG_TOK {
3710        flags.not = true;
3711        zshlex();
3712    }
3713
3714    let pipe = match par_pline() {
3715        Some(p) => p,
3716        None => {
3717            PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3718            return None;
3719        }
3720    };
3721
3722    // Check for && or ||
3723    let next = match tok() {
3724        DAMPER => {
3725            zshlex();
3726            skip_separators();
3727            par_sublist().map(|s| (SublistOp::And, Box::new(s)))
3728        }
3729        DBAR => {
3730            zshlex();
3731            skip_separators();
3732            par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
3733        }
3734        _ => None,
3735    };
3736
3737    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3738    Some(ZshSublist { pipe, next, flags })
3739}
3740
3741/// Parse a pipeline
3742/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
3743/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
3744/// C emits WC_PIPE wordcodes per command; same flow.
3745fn par_pline() -> Option<ZshPipe> {
3746    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
3747    if check_recursion() {
3748        error("par_pline: max recursion depth exceeded");
3749        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3750        return None;
3751    }
3752
3753    let lineno = toklineno();
3754    let cmd = match par_cmd() {
3755        Some(c) => c,
3756        None => {
3757            PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3758            return None;
3759        }
3760    };
3761
3762    // Check for | or |&
3763    let mut merge_stderr = false;
3764    let next = match tok() {
3765        BAR_TOK | BARAMP => {
3766            merge_stderr = tok() == BARAMP;
3767            zshlex();
3768            skip_separators();
3769            par_pline().map(Box::new)
3770        }
3771        _ => None,
3772    };
3773
3774    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3775    Some(ZshPipe {
3776        cmd,
3777        next,
3778        lineno,
3779        merge_stderr,
3780    })
3781}
3782
3783/// Parse a command
3784/// Parse a command — dispatches by leading token (FOR / CASE /
3785/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
3786/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
3787/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
3788fn par_cmd() -> Option<ZshCommand> {
3789    // Parse leading redirections
3790    let mut redirs = Vec::new();
3791    while IS_REDIROP(tok()) {
3792        if let Some(redir) = par_redir() {
3793            redirs.push(redir);
3794        }
3795    }
3796
3797    let cmd = match tok() {
3798        FOR | FOREACH => par_for(),
3799        SELECT => parse_select(),
3800        CASE => par_case(),
3801        IF => par_if(),
3802        WHILE => par_while(false),
3803        UNTIL => par_while(true),
3804        REPEAT => par_repeat(),
3805        INPAR_TOK => par_subsh(),
3806        INOUTPAR => parse_anon_funcdef(),
3807        INBRACE_TOK => parse_cursh(),
3808        FUNC => par_funcdef(),
3809        DINBRACK => par_cond(),
3810        DINPAR => parse_arith(),
3811        TIME => par_time(),
3812        _ => par_simple(redirs),
3813    };
3814
3815    // Parse trailing redirections. For Simple commands the redirs were
3816    // already captured inside par_simple; for compound forms (Cursh,
3817    // Subsh, If, While, etc.) we collect them here and wrap in
3818    // ZshCommand::Redirected so compile_zsh can scope-bracket them.
3819    if let Some(inner) = cmd {
3820        let mut trailing: Vec<ZshRedir> = Vec::new();
3821        while IS_REDIROP(tok()) {
3822            if let Some(redir) = par_redir() {
3823                trailing.push(redir);
3824            }
3825        }
3826        // c:1072-1075 — every par_cmd tail resets the lexer state
3827        // toggles so the NEXT command starts in cmd position with
3828        // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
3829        // during their bodies; without this reset the next iteration
3830        // of the outer par_list loop sees `if` / `done` / `select`
3831        // etc. as plain strings and the AST collapses.
3832        set_incmdpos(true);
3833        set_incasepat(0);
3834        set_incond(0);
3835        set_intypeset(false);
3836        if trailing.is_empty() {
3837            return Some(inner);
3838        }
3839        // Simple already absorbed its own redirs (compile path expects
3840        // them on ZshSimple), so don't double-wrap.
3841        if matches!(inner, ZshCommand::Simple(_)) {
3842            if let ZshCommand::Simple(mut s) = inner {
3843                s.redirs.extend(trailing);
3844                return Some(ZshCommand::Simple(s));
3845            }
3846            unreachable!()
3847        }
3848        return Some(ZshCommand::Redirected(Box::new(inner), trailing));
3849    }
3850    // Same reset on the empty-cmd branch (mirror c:1072 unconditional
3851    // path — the C function only returns 0 above when the dispatch
3852    // produced no command, and falls through to the reset block).
3853    set_incmdpos(true);
3854    set_incasepat(0);
3855    set_incond(0);
3856    set_intypeset(false);
3857
3858    None
3859}
3860
3861/// Parse a simple command
3862/// Parse a simple command (assignments + words + redirections).
3863/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
3864/// the largest single function in parse.c. Handles ENVSTRING/
3865/// ENVARRAY assignments at command head, intermixed redirs,
3866/// typeset-style multi-assignment commands, and the trailing
3867/// inout-par `()` that converts a simple command into an inline
3868/// function definition.
3869fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
3870    let mut assigns = Vec::new();
3871    let mut words = Vec::new();
3872    const MAX_ITERATIONS: usize = 10_000;
3873    let mut iterations = 0;
3874
3875    // c:1934 — `if (!isset(IGNOREBRACES) && *tokstr == Inbrace) { ... }`
3876    // gates the `{var}>file` brace-FD recognition (a non-POSIX zsh
3877    // extension that lets `{varname}>file` redirect into the named
3878    // shell variable). zshrs's parser doesn't recognise the brace-FD
3879    // shape yet, so the gate is wired here as a marker — when the
3880    // {var}-FD feature lands, swap this `false` for the actual
3881    // `tokstr starts with Inbrace` test and route into a {var}>file
3882    // redir builder.
3883    let saw_brace_fd_candidate = false;
3884    if !isset(IGNOREBRACES) && saw_brace_fd_candidate {
3885        // TODO: {var}>file FD recognition (par_simple body at c:1934-2000).
3886    }
3887
3888    // Parse leading assignments
3889    while tok() == ENVSTRING || tok() == ENVARRAY {
3890        iterations += 1;
3891        if iterations > MAX_ITERATIONS {
3892            error("par_simple: exceeded max iterations in assignments");
3893            return None;
3894        }
3895        if let Some(assign) = parse_assign() {
3896            assigns.push(assign);
3897        }
3898        zshlex();
3899    }
3900
3901    // Parse words and redirections
3902    loop {
3903        iterations += 1;
3904        if iterations > MAX_ITERATIONS {
3905            error("par_simple: exceeded max iterations");
3906            return None;
3907        }
3908        match tok() {
3909            ENVSTRING | ENVARRAY => {
3910                // Mid-command assignment-shape arg under typeset
3911                // / declare / local / etc. (intypeset gates the
3912                // lexer to emit Envstring/Envarray for `name=val`
3913                // and `name=()` past the command name). Parse the
3914                // assignment, then emit a synthetic word
3915                // `NAME=value` (scalar) or `NAME=( … )` (array)
3916                // string so typeset's builtin arg list sees the
3917                // assignment-shape arg. Avoids the inline-env
3918                // scope path that mistakenly treats it like a
3919                // pre-cmd `X=Y cmd` assignment.
3920                if let Some(assign) = parse_assign() {
3921                    let synthetic = match &assign.value {
3922                        ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
3923                        ZshAssignValue::Array(elems) => {
3924                            format!("{}=({})", assign.name, elems.join(" "))
3925                        }
3926                    };
3927                    words.push(synthetic);
3928                }
3929                zshlex();
3930            }
3931            STRING_LEX | TYPESET => {
3932                let s = tokstr();
3933                if let Some(s) = s {
3934                    words.push(s);
3935                }
3936                // c:1929 — `incmdpos = 0;` so the next zshlex() does
3937                // not re-promote `{`/`[[`/reserved words at the
3938                // continuation position. Without this, `echo {a,b}`
3939                // re-lexes `{` as INBRACE_TOK (current-shell block)
3940                // and the brace expansion never reaches par_simple.
3941                set_incmdpos(false);
3942                // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
3943                // Multi-assign `typeset a=1 b=2` relies on the lexer
3944                // re-emitting `b=2` as ENVSTRING; that path is gated
3945                // on `intypeset`. Without this, follow-on assignment
3946                // words arrive as STRING and the typeset builtin's
3947                // multi-assign form silently degrades.
3948                if tok() == TYPESET {
3949                    set_intypeset(true);
3950                }
3951                zshlex();
3952                // Check for function definition foo() { ... }
3953                if words.len() == 1 && peek_inoutpar() {
3954                    return parse_inline_funcdef(words.pop().unwrap());
3955                }
3956                // `{name}>file` named-fd redirect: the lexer doesn't
3957                // recognize this shape, so the bare word `{name}`
3958                // arrives as a String. If it matches `{IDENT}` and
3959                // the NEXT token is a redirop, pop it off as the
3960                // varid for that redir.
3961                if !words.is_empty() && IS_REDIROP(tok()) {
3962                    let last = words.last().unwrap();
3963                    let untoked = super::lex::untokenize(last);
3964                    if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
3965                        let name = &untoked[1..untoked.len() - 1];
3966                        if !name.is_empty()
3967                            && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
3968                            && name
3969                                .chars()
3970                                .next()
3971                                .map(|c| c == '_' || c.is_ascii_alphabetic())
3972                                .unwrap_or(false)
3973                        {
3974                            let varid = name.to_string();
3975                            words.pop();
3976                            if let Some(mut redir) = par_redir() {
3977                                redir.varid = Some(varid);
3978                                redirs.push(redir);
3979                            }
3980                            continue;
3981                        }
3982                    }
3983                }
3984            }
3985            _ if IS_REDIROP(tok()) => {
3986                match par_redir() {
3987                    Some(redir) => redirs.push(redir),
3988                    None => break, // Error in redir parsing, stop
3989                }
3990            }
3991            INOUTPAR if !words.is_empty() => {
3992                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
3993                // YYERROR(oecused);` — multi-name funcdef gate:
3994                // `f1 f2() { ... }` defines f1 AND f2 to the same
3995                // body, but only when MULTIFUNCDEF is set.
3996                if !isset(MULTIFUNCDEF) && words.len() > 1 {
3997                    error(
3998                        "parse error: multiple names in function definition without MULTIFUNCDEF",
3999                    );
4000                    return None;
4001                }
4002                // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
4003                // !isset(ALIASFUNCDEF) && argc && hasalias !=
4004                // input_hasalias()) { zwarn(...); YYERROR(...); }`
4005                // Alias-as-funcdef warning. zshrs's parser doesn't
4006                // track `hasalias` (alias-expansion provenance
4007                // during parse) yet, so `had_alias` stays false —
4008                // the gate is wired here as a marker so the canonical
4009                // C predicate is visible. Once alias-provenance lands,
4010                // swap `false` for the actual provenance compare.
4011                let had_alias = false;
4012                if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
4013                    crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
4014                    return None;
4015                }
4016                // foo() { ... } style function
4017                return parse_inline_funcdef(words.pop().unwrap());
4018            }
4019            _ => break,
4020        }
4021    }
4022
4023    if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
4024        return None;
4025    }
4026
4027    Some(ZshCommand::Simple(ZshSimple {
4028        assigns,
4029        words,
4030        redirs,
4031    }))
4032}
4033
4034/// Parse an assignment
4035/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
4036/// Sub-routine of par_simple. The C source handles assignments
4037/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
4038/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
4039/// helper for clarity.
4040fn parse_assign() -> Option<ZshAssign> {
4041    // Helper: locate the Equals-marker that delimits NAME from
4042    // VALUE in an assignment-shaped tokstr. The lexer META-encodes
4043    // EVERY `=` (including those inside `${var%%=foo}` strip
4044    // patterns or `[idx]=...` subscripts), so a naive
4045    // `tokstr.find(Equals)` would split at the first inner `=`
4046    // and break the whole assignment. Walk the string skipping
4047    // brace and bracket depth so the assignment's `=` (the one
4048    // after the last `]` of the LHS subscript / or after the
4049    // bare name) is the one we land on.
4050    fn find_assign_equals(s: &str) -> Option<usize> {
4051        let target = crate::ported::zsh_h::Equals;
4052        let mut brace = 0i32;
4053        let mut bracket = 0i32;
4054        let mut paren = 0i32;
4055        for (i, c) in s.char_indices() {
4056            match c {
4057                    '{' | '\u{8f}' /* Inbrace */ => brace += 1,
4058                    '}' | '\u{90}' /* Outbrace */ => {
4059                        if brace > 0 {
4060                            brace -= 1;
4061                        }
4062                    }
4063                    '[' | '\u{91}' /* Inbrack */ => bracket += 1,
4064                    ']' | '\u{92}' /* Outbrack */ => {
4065                        if bracket > 0 {
4066                            bracket -= 1;
4067                        }
4068                    }
4069                    '(' | '\u{88}' /* Inpar */ => paren += 1,
4070                    ')' | '\u{8a}' /* Outpar */ => {
4071                        if paren > 0 {
4072                            paren -= 1;
4073                        }
4074                    }
4075                    _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
4076                        return Some(i);
4077                    }
4078                    _ => {}
4079                }
4080        }
4081        None
4082    }
4083
4084    let _ts_tokstr = tokstr()?;
4085    let tokstr = _ts_tokstr.as_str();
4086
4087    // Parse name=value or name+=value.
4088    let (name, value_str, append) = if tok() == ENVARRAY {
4089        let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
4090            (stripped, true)
4091        } else {
4092            (tokstr, false)
4093        };
4094        (name.to_string(), String::new(), append)
4095    } else if let Some(pos) = find_assign_equals(tokstr) {
4096        let name_part = &tokstr[..pos];
4097        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
4098            (stripped, true)
4099        } else {
4100            (name_part, false)
4101        };
4102        (
4103            name.to_string(),
4104            tokstr[pos + Equals.len_utf8()..].to_string(),
4105            append,
4106        )
4107    } else if let Some(pos) = tokstr.find('=') {
4108        // Fallback to literal '=' for compatibility
4109        let name_part = &tokstr[..pos];
4110        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
4111            (stripped, true)
4112        } else {
4113            (name_part, false)
4114        };
4115        (name.to_string(), tokstr[pos + 1..].to_string(), append)
4116    } else {
4117        return None;
4118    };
4119
4120    let value = if tok() == ENVARRAY {
4121        // Array assignment: name=(...)
4122        let mut elements = Vec::new();
4123        zshlex(); // skip past token
4124
4125        let mut arr_iters = 0;
4126        const MAX_ARRAY_ELEMENTS: usize = 10_000;
4127        while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
4128            arr_iters += 1;
4129            if arr_iters > MAX_ARRAY_ELEMENTS {
4130                error("array assignment exceeded maximum elements");
4131                break;
4132            }
4133            if tok() == STRING_LEX {
4134                let _ts_s = crate::ported::lex::tokstr();
4135                if let Some(s) = _ts_s.as_deref() {
4136                    elements.push(s.to_string());
4137                }
4138            }
4139            zshlex();
4140        }
4141
4142        // The closing Outpar is consumed here. The outer par_simple
4143        // loop will then `zshlex()` past whatever follows (typically
4144        // a separator or the next word) — calling zshlex twice in
4145        // tandem (here AND in par_simple) over-advances and merges
4146        // a following `name() { … }` funcdef into the same Simple.
4147        // We only consume Outpar; let the caller handle the rest.
4148        // Without this guard `g=(o1); f() { :; }` parsed as one
4149        // Simple with assigns=[g] and words=["f()"] (one token).
4150        if tok() == OUTPAR_TOK {
4151            // Note: do NOT zshlex() here. par_simple's `lexer
4152            // .zshlex()` after `parse_assign` returns advances past
4153            // the Outpar onto the next significant token.
4154            //
4155            // Force `incmdpos=true` so the next zshlex() recognizes
4156            // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
4157            // The lexer flips incmdpos to false on bare Outpar (which
4158            // is correct for subshell-close context), but for an
4159            // array-assignment close more assigns/words may follow.
4160            set_incmdpos(true);
4161        }
4162
4163        ZshAssignValue::Array(elements)
4164    } else {
4165        ZshAssignValue::Scalar(value_str)
4166    };
4167
4168    Some(ZshAssign {
4169        name,
4170        value,
4171        append,
4172    })
4173}
4174
4175/// Parse a redirection
4176/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
4177/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
4178/// a ZshRedir node carrying the operator type, fd, target word
4179/// (or here-doc body / pipe-redir command), and any `{var}` style
4180/// fd-binding parameter.
4181fn par_redir() -> Option<ZshRedir> {
4182    let rtype = match tok() {
4183        OUTANG_TOK => REDIR_WRITE,
4184        OUTANGBANG => REDIR_WRITENOW,
4185        DOUTANG => REDIR_APP,
4186        DOUTANGBANG => REDIR_APPNOW,
4187        INANG_TOK => REDIR_READ,
4188        INOUTANG => REDIR_READWRITE,
4189        DINANG => REDIR_HEREDOC,
4190        DINANGDASH => REDIR_HEREDOCDASH,
4191        TRINANG => REDIR_HERESTR,
4192        INANGAMP => REDIR_MERGEIN,
4193        OUTANGAMP => REDIR_MERGEOUT,
4194        AMPOUTANG => REDIR_ERRWRITE,
4195        OUTANGAMPBANG => REDIR_ERRWRITENOW,
4196        DOUTANGAMP => REDIR_ERRAPP,
4197        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
4198        _ => return None,
4199    };
4200
4201    let fd = if tokfd() >= 0 {
4202        tokfd()
4203    } else if matches!(
4204        rtype,
4205        REDIR_READ
4206            | REDIR_READWRITE
4207            | REDIR_MERGEIN
4208            | REDIR_HEREDOC
4209            | REDIR_HEREDOCDASH
4210            | REDIR_HERESTR
4211    ) {
4212        0
4213    } else {
4214        1
4215    };
4216
4217    // c:2234-2245 — save/restore incmdpos and nocorrect around the
4218    // zshlex that consumes the redir target word:
4219    //   oldcmdpos = incmdpos; incmdpos = 0;
4220    //   oldnc = nocorrect;
4221    //   if (tok != INANG && tok != INOUTANG) nocorrect = 1;
4222    //   ... zshlex; check tok; ...
4223    //   incmdpos = oldcmdpos; nocorrect = oldnc;
4224    // Without this, a redir target lexes in the parent's incmdpos
4225    // (re-promoting `{` / reswords) AND with parent nocorrect (so
4226    // spelling-correction wrongly runs inside `> $(cmd)` etc.).
4227    let oldcmdpos = incmdpos();
4228    set_incmdpos(false);
4229    let oldnc = nocorrect();
4230    let cur = tok();
4231    if cur != INANG_TOK && cur != INOUTANG {
4232        set_nocorrect(1);
4233    }
4234    zshlex();
4235
4236    let name = match tok() {
4237        STRING_LEX | ENVSTRING => {
4238            let n = tokstr().unwrap_or_default();
4239            // Restore BEFORE the next zshlex so trailing tokens lex
4240            // in the original parent context (mirrors C ordering at
4241            // parse.c:2244-2245 — restore right after the word is
4242            // confirmed, before any downstream advance).
4243            set_incmdpos(oldcmdpos);
4244            set_nocorrect(oldnc);
4245            zshlex();
4246            n
4247        }
4248        _ => {
4249            set_incmdpos(oldcmdpos);
4250            set_nocorrect(oldnc);
4251            error("expected word after redirection");
4252            return None;
4253        }
4254    };
4255
4256    // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
4257    // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]; zshrs
4258    // pushes a HereDoc onto heredocs[] for process_heredocs (called
4259    // by zshlex on the next NEWLIN) to fill in. Quoted terminators
4260    // (`<<'EOF'` / `<<"EOF"` / `<<\EOF`) disable expansion in the
4261    // body — Snull `\u{9d}` marks single-quote, Dnull `\u{9e}` marks
4262    // double-quote, Bnull `\u{9f}` marks any backslash-escaped char.
4263    let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
4264        let strip_tabs = rtype == REDIR_HEREDOCDASH;
4265        let quoted = name.contains('\u{9d}')
4266            || name.contains('\u{9e}')
4267            || name.contains('\u{9f}')
4268            || name.starts_with('\'')
4269            || name.starts_with('"');
4270        let term = name
4271            .chars()
4272            .filter(|c| {
4273                *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
4274            })
4275            .collect::<String>();
4276        crate::ported::lex::heredocs_push(crate::ported::lex::HereDoc {
4277            terminator: term,
4278            strip_tabs,
4279            content: String::new(),
4280            quoted,
4281            processed: false,
4282        });
4283        Some(heredocs_len() - 1)
4284    } else {
4285        None
4286    };
4287
4288    Some(ZshRedir {
4289        rtype,
4290        fd,
4291        name,
4292        heredoc: None,
4293        varid: None,
4294        heredoc_idx,
4295    })
4296}
4297
4298/// Parse for/foreach loop
4299/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
4300/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
4301/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
4302/// inner branch for the `((...))` arithmetic-header variant
4303/// (parse.c:1100-1140 inside par_for).
4304fn par_for() -> Option<ZshCommand> {
4305    let is_foreach = tok() == FOREACH;
4306    zshlex();
4307
4308    // Check for C-style: for (( init; cond; step ))
4309    if tok() == DINPAR {
4310        return parse_for_cstyle();
4311    }
4312
4313    // Get variable name(s). zsh parse.c par_for accepts multiple
4314    // identifier tokens before `in`/`(`/newline — `for k v in ...`
4315    // assigns each iteration's pair of values to k and v in turn.
4316    // We store the names space-joined since variable identifiers
4317    // can't contain whitespace.
4318    let mut names: Vec<String> = Vec::new();
4319    while tok() == STRING_LEX {
4320        let v = tokstr().unwrap_or_default();
4321        if v == "in" {
4322            break;
4323        }
4324        names.push(v);
4325        zshlex();
4326    }
4327    if names.is_empty() {
4328        error("expected variable name in for");
4329        return None;
4330    }
4331    let var = names.join(" ");
4332
4333    // Skip newlines
4334    skip_separators();
4335
4336    // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
4337    // single String token with the parens lexed-as-content
4338    // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
4339    // Outpar tokens. Detect that shape and split it manually.
4340    let list = if tok() == STRING_LEX
4341        && tokstr()
4342            .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
4343            .unwrap_or(false)
4344    {
4345        let raw = tokstr().unwrap_or_default();
4346        // Strip leading Inpar + trailing Outpar, then untokenize the
4347        // inner content and split on whitespace for the word list.
4348        let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
4349            ..raw
4350                .char_indices()
4351                .last()
4352                .map(|(i, _)| i)
4353                .unwrap_or(raw.len())];
4354        let cleaned = super::lex::untokenize(inner);
4355        let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
4356        zshlex();
4357        ForList::Words(words)
4358    } else if tok() == STRING_LEX {
4359        let s = tokstr();
4360        if s.map(|s| s == "in").unwrap_or(false) {
4361            zshlex();
4362            let mut words = Vec::new();
4363            let mut word_count = 0;
4364            while tok() == STRING_LEX {
4365                word_count += 1;
4366                if word_count > 500 || check_limit() {
4367                    error("for: too many words");
4368                    return None;
4369                }
4370                let _ts_s = tokstr();
4371                if let Some(s) = _ts_s.as_deref() {
4372                    words.push(s.to_string());
4373                }
4374                zshlex();
4375            }
4376            ForList::Words(words)
4377        } else {
4378            ForList::Positional
4379        }
4380    } else if tok() == INPAR_TOK {
4381        // for var (...)
4382        zshlex();
4383        let mut words = Vec::new();
4384        let mut word_count = 0;
4385        while tok() == STRING_LEX || tok() == SEPER {
4386            word_count += 1;
4387            if word_count > 500 || check_limit() {
4388                error("for: too many words in parens");
4389                return None;
4390            }
4391            if tok() == STRING_LEX {
4392                let _ts_s = tokstr();
4393                if let Some(s) = _ts_s.as_deref() {
4394                    words.push(s.to_string());
4395                }
4396            }
4397            zshlex();
4398        }
4399        if tok() == OUTPAR_TOK {
4400            // After the `)` of a for-list, the next token is the
4401            // body opener — `do`/`{`. zsh's lexer needs incmdpos
4402            // set so `{` lexes as Inbrace (not as a literal). C
4403            // analogue: parse.c::par_for sets `incmdpos = 1`
4404            // after consuming the Outpar before the body parse.
4405            set_incmdpos(true);
4406            zshlex();
4407        }
4408        ForList::Words(words)
4409    } else {
4410        ForList::Positional
4411    };
4412
4413    // Skip to body
4414    skip_separators();
4415
4416    // Parse body
4417    let body = parse_loop_body(is_foreach)?;
4418
4419    Some(ZshCommand::For(ZshFor {
4420        var,
4421        list,
4422        body: Box::new(body),
4423        is_select: false,
4424    }))
4425}
4426
4427/// Parse C-style for loop: for (( init; cond; step ))
4428/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
4429/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
4430/// Recognized when the token after FOR is DINPAR (the `((`
4431/// detected by gettok via dbparens setup).
4432fn parse_for_cstyle() -> Option<ZshCommand> {
4433    // We're at (( (Dinpar None) - the opening ((
4434    // Lexer returns:
4435    //   Dinpar None     - opening ((
4436    //   Dinpar "init"   - init expression, semicolon consumed
4437    //   Dinpar "cond"   - cond expression, semicolon consumed
4438    //   Doutpar "step"  - step expression, closing )) consumed
4439
4440    zshlex(); // Get init: Dinpar "i=0"
4441
4442    if tok() != DINPAR {
4443        error("expected init expression in for ((");
4444        return None;
4445    }
4446    let init = tokstr().unwrap_or_default();
4447
4448    zshlex(); // Get cond: Dinpar "i<10"
4449
4450    if tok() != DINPAR {
4451        error("expected condition in for ((");
4452        return None;
4453    }
4454    let cond = tokstr().unwrap_or_default();
4455
4456    zshlex(); // Get step: Doutpar "i++"
4457
4458    if tok() != DOUTPAR {
4459        error("expected )) in for");
4460        return None;
4461    }
4462    let step = tokstr().unwrap_or_default();
4463
4464    zshlex(); // Move past ))
4465
4466    skip_separators();
4467    let body = parse_loop_body(false)?;
4468
4469    Some(ZshCommand::For(ZshFor {
4470        var: String::new(),
4471        list: ForList::CStyle { init, cond, step },
4472        body: Box::new(body),
4473        is_select: false,
4474    }))
4475}
4476
4477/// Parse select loop (same syntax as for)
4478/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
4479/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
4480/// the executor. C equivalent: the SELECT case in par_for at
4481/// parse.c:1087-1207 (selects share parser flow with foreach).
4482fn parse_select() -> Option<ZshCommand> {
4483    // `select` shares par_for's grammar (var, words, body) but the
4484    // compile path is different (interactive prompt loop).
4485    match par_for()? {
4486        ZshCommand::For(mut f) => {
4487            f.is_select = true;
4488            Some(ZshCommand::For(f))
4489        }
4490        other => Some(other),
4491    }
4492}
4493
4494/// Parse case statement
4495/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
4496/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
4497/// (pattern_list, body, terminator) tuple where terminator is
4498/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
4499fn par_case() -> Option<ZshCommand> {
4500    // C par_case (parse.c:1209-1241). Order of state toggles
4501    // matters — the lexer reads the case word in `incmdpos=0`
4502    // (so it's not promoted to a reswd), then the `in`/`{` in
4503    // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
4504    // isn't alias-expanded or spell-corrected), then sets
4505    // `incasepat=1, incmdpos=0` before the first pattern.
4506    set_incmdpos(false);
4507    zshlex(); // skip 'case'
4508
4509    let word = match tok() {
4510        STRING_LEX => {
4511            let w = tokstr().unwrap_or_default();
4512            // c:1222 — `incmdpos = 1;` before the next zshlex so the
4513            // `in` keyword is recognised. c:1223-1225 — save+force
4514            // noaliases / nocorrect.
4515            set_incmdpos(true);
4516            let ona = noaliases();
4517            let onc = nocorrect();
4518            set_noaliases(true);
4519            set_nocorrect(1);
4520            zshlex();
4521            // Restore noaliases/nocorrect after the `in`-or-`{` token
4522            // is in hand; both are unconditionally restored at c:1238-1239.
4523            let restore = |ona: bool, onc: i32| {
4524                set_noaliases(ona);
4525                set_nocorrect(onc);
4526            };
4527            (w, ona, onc, restore)
4528        }
4529        _ => {
4530            error("expected word after case");
4531            return None;
4532        }
4533    };
4534    let (word, ona, onc, restore) = word;
4535
4536    skip_separators();
4537
4538    // Expect 'in' or {
4539    let use_brace = tok() == INBRACE_TOK;
4540    if tok() == STRING_LEX {
4541        let s = tokstr();
4542        if s.map(|s| s != "in").unwrap_or(true) {
4543            // c:1228-1232 — restore noaliases/nocorrect on error path.
4544            restore(ona, onc);
4545            error("expected 'in' in case");
4546            return None;
4547        }
4548    } else if !use_brace {
4549        restore(ona, onc);
4550        error("expected 'in' or '{' in case");
4551        return None;
4552    }
4553    // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
4554    // nocorrect = onc;` — set the case-pattern context AND restore
4555    // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
4556    set_incasepat(1);
4557    set_incmdpos(false);
4558    restore(ona, onc);
4559    zshlex();
4560
4561    let mut arms = Vec::new();
4562    const MAX_ARMS: usize = 10_000;
4563
4564    loop {
4565        if arms.len() > MAX_ARMS {
4566            error("par_case: too many arms");
4567            break;
4568        }
4569
4570        // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
4571        // This affects how [ and | are lexed
4572        set_incasepat(1);
4573
4574        skip_separators();
4575
4576        // Check for end
4577        // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
4578        let is_esac = tok() == ESAC
4579            || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
4580        if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
4581            set_incasepat(0);
4582            zshlex();
4583            break;
4584        }
4585
4586        // Also break on EOF
4587        if tok() == ENDINPUT || tok() == LEXERR {
4588            set_incasepat(0);
4589            break;
4590        }
4591
4592        // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
4593        // The leading `(` is paired with a matching `)` that closes
4594        // the pattern itself; the arm-close `)` follows separately.
4595        // Track whether we consumed it so we can skip the matching
4596        // `)` after pattern parsing — otherwise the arm-close would
4597        // be interpreted as the pattern-close and the actual body
4598        // would get the leftover `)`.
4599        let had_leading_paren = tok() == INPAR_TOK;
4600        if had_leading_paren {
4601            zshlex();
4602        }
4603
4604        // incasepat is already set above
4605        let mut patterns = Vec::new();
4606        let mut pattern_iterations = 0;
4607        loop {
4608            pattern_iterations += 1;
4609            if pattern_iterations > 1000 {
4610                error("par_case: too many pattern iterations");
4611                set_incasepat(0);
4612                return None;
4613            }
4614
4615            if tok() == STRING_LEX {
4616                let s = tokstr();
4617                if s.map(|s| s == "esac").unwrap_or(false) {
4618                    break;
4619                }
4620                patterns.push(tokstr().unwrap_or_default());
4621                // After first pattern token, set incasepat=2 so ( is treated as part of pattern
4622                set_incasepat(2);
4623                zshlex();
4624            } else if tok() != BAR_TOK {
4625                break;
4626            }
4627
4628            if tok() == BAR_TOK {
4629                // Reset to 1 (start of next alternative pattern)
4630                set_incasepat(1);
4631                zshlex();
4632            } else {
4633                break;
4634            }
4635        }
4636        set_incasepat(0);
4637
4638        // zsh's `(P)` form (parse.c:1320-1360 hack) treats the entire
4639        // parenthesized contents as ONE zsh pattern with internal `|`
4640        // as the literal alternation operator — NOT as multiple
4641        // case-arm alternatives. Without a leading `(`, the bare
4642        // `P1|P2)` form splits into multiple alts. Mirror that here:
4643        // when a leading `(` was consumed, fold the |-separated
4644        // pieces back into a single pattern string.
4645        if had_leading_paren && patterns.len() > 1 {
4646            let joined = patterns.join("|");
4647            patterns = vec![joined];
4648        }
4649
4650        // Expect ).  Also handle the `(P))` wrapped-pattern form:
4651        // when a leading `(` was consumed, accept an extra `)` —
4652        // the inner `)` closes the optional-paren wrapper, the
4653        // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
4654        // (bare pattern, leading-paren is just the opt-marker, the
4655        // close is arm-close) and `(P)) BODY` (paren-wrapped
4656        // pattern, then arm-close). The first form is unambiguous
4657        // when the bare pattern was simple; the second is needed
4658        // when the body starts with `(`.
4659        if tok() != OUTPAR_TOK {
4660            error("expected ')' in case pattern");
4661            return None;
4662        }
4663        // Port of Src/parse.c:1310-1313 — when the case pattern
4664        // closes with `)`, set `incmdpos = 1` BEFORE consuming
4665        // the token so the first word of the arm body is lexed
4666        // in command position. Without this, `case X in X) c1=v ;;`
4667        // lexes `c1=v` as a plain STRING rather than an assignment
4668        // word, and exec treats it as a command name (yielding
4669        // "command not found: c1=v"). Subsequent statements after
4670        // `;` parse correctly because the `;` separator restores
4671        // command position; only the FIRST body word was broken.
4672        set_incmdpos(true);
4673        zshlex();
4674        if had_leading_paren && tok() == OUTPAR_TOK {
4675            set_incmdpos(true);
4676            zshlex();
4677        }
4678
4679        // Parse body
4680        let body = parse_program();
4681
4682        // Get terminator. Set incasepat=1 BEFORE the zshlex
4683        // advance so the next token (the next arm's pattern, like
4684        // `[a-z]`) gets tokenized in pattern context. Without
4685        // this, a `[`-prefixed pattern after the FIRST arm became
4686        // Inbrack instead of String and the pattern-loop bailed
4687        // out with "expected ')' in case pattern".
4688        let terminator = match tok() {
4689            DSEMI => {
4690                set_incasepat(1);
4691                zshlex();
4692                CaseTerm::Break
4693            }
4694            SEMIAMP => {
4695                set_incasepat(1);
4696                zshlex();
4697                CaseTerm::Continue
4698            }
4699            SEMIBAR => {
4700                set_incasepat(1);
4701                zshlex();
4702                CaseTerm::TestNext
4703            }
4704            _ => CaseTerm::Break,
4705        };
4706
4707        if !patterns.is_empty() {
4708            arms.push(CaseArm {
4709                patterns,
4710                body,
4711                terminator,
4712            });
4713        }
4714    }
4715
4716    Some(ZshCommand::Case(ZshCase { word, arms }))
4717}
4718
4719/// Parse if statement
4720/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
4721/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
4722/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
4723/// (cond, then_body) tuples plus an optional else_body.
4724fn par_if() -> Option<ZshCommand> {
4725    zshlex(); // skip 'if'
4726
4727    // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
4728    let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
4729
4730    skip_separators();
4731
4732    // Expect 'then' or {
4733    let use_brace = tok() == INBRACE_TOK;
4734    if tok() != THEN && !use_brace {
4735        error("expected 'then' or '{' after if condition");
4736        return None;
4737    }
4738    zshlex();
4739
4740    // Parse then-body - stops at else/elif/fi, or } if using brace syntax
4741    let then = if use_brace {
4742        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
4743        if tok() == OUTBRACE_TOK {
4744            zshlex();
4745        }
4746        Box::new(body)
4747    } else {
4748        Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
4749    };
4750
4751    // Parse elif and else. zsh accepts the SAME elif/else
4752    // continuations for both classic `then/fi` AND the brace
4753    // form `{ ... } elif ... { ... } else { ... }`. Direct port
4754    // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
4755    // arms are checked AFTER the body close regardless of which
4756    // delimiter style opened the block. Without this, zinit's
4757    //   if [[ -z $sel ]] { ... } else { ... }
4758    // hung the parser — `else` was treated as an external
4759    // command following the if-statement, which the lexer state
4760    // mis-classified inside the still-open function body.
4761    //
4762    // For brace-form: skip the `fi` consumption at the end of
4763    // the loop (no `fi` after a brace block), and `else` may
4764    // arrive after a `}` close. Skip-separators between the
4765    // body close and the elif/else token.
4766    let mut elif = Vec::new();
4767    let mut else_ = None;
4768
4769    {
4770        loop {
4771            skip_separators();
4772
4773            match tok() {
4774                ELIF => {
4775                    zshlex();
4776                    // elif condition stops at 'then' or '{'
4777                    let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
4778                    skip_separators();
4779
4780                    let elif_use_brace = tok() == INBRACE_TOK;
4781                    if tok() != THEN && !elif_use_brace {
4782                        error("expected 'then' after elif");
4783                        return None;
4784                    }
4785                    zshlex();
4786
4787                    // elif body stops at else/elif/fi or } if using braces
4788                    let ebody = if elif_use_brace {
4789                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
4790                        if tok() == OUTBRACE_TOK {
4791                            zshlex();
4792                        }
4793                        body
4794                    } else {
4795                        parse_program_until(Some(&[ELSE, ELIF, FI]))
4796                    };
4797
4798                    elif.push((econd, ebody));
4799                }
4800                ELSE => {
4801                    zshlex();
4802                    skip_separators();
4803
4804                    let else_use_brace = tok() == INBRACE_TOK;
4805                    if else_use_brace {
4806                        zshlex();
4807                    }
4808
4809                    // else body stops at 'fi' or '}'
4810                    else_ = Some(Box::new(if else_use_brace {
4811                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
4812                        if tok() == OUTBRACE_TOK {
4813                            zshlex();
4814                        }
4815                        body
4816                    } else {
4817                        parse_program_until(Some(&[FI]))
4818                    }));
4819
4820                    // Consume the 'fi' if present (not for brace syntax)
4821                    if !else_use_brace && tok() == FI {
4822                        zshlex();
4823                    }
4824                    break;
4825                }
4826                FI => {
4827                    zshlex();
4828                    break;
4829                }
4830                _ => break,
4831            }
4832        }
4833    }
4834
4835    Some(ZshCommand::If(ZshIf {
4836        cond,
4837        then,
4838        elif,
4839        else_,
4840    }))
4841}
4842
4843/// Parse while/until loop
4844/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
4845/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
4846/// `until` variant is the same loop with the condition negated.
4847fn par_while(until: bool) -> Option<ZshCommand> {
4848    zshlex(); // skip while/until
4849
4850    let cond = Box::new(parse_program());
4851
4852    skip_separators();
4853    let body = parse_loop_body(false)?;
4854
4855    Some(ZshCommand::While(ZshWhile {
4856        cond,
4857        body: Box::new(body),
4858        until,
4859    }))
4860}
4861
4862/// Parse repeat loop
4863/// Parse `repeat N; do BODY; done`. Direct port of
4864/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
4865/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
4866/// parser doesn't yet special-case that variant.
4867fn par_repeat() -> Option<ZshCommand> {
4868    zshlex(); // skip 'repeat'
4869
4870    let count = match tok() {
4871        STRING_LEX => {
4872            let c = tokstr().unwrap_or_default();
4873            zshlex();
4874            c
4875        }
4876        _ => {
4877            error("expected count after repeat");
4878            return None;
4879        }
4880    };
4881
4882    skip_separators();
4883    // c:1600 — par_repeat's short-form gate is wider: it unlocks
4884    // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
4885    // for/while). Pass `is_repeat=true` so parse_loop_body_kind
4886    // applies that widened gate.
4887    let body = parse_loop_body_kind(false, true)?;
4888
4889    Some(ZshCommand::Repeat(ZshRepeat {
4890        count,
4891        body: Box::new(body),
4892    }))
4893}
4894
4895/// Parse loop body (do...done, {...}, or shortloop)
4896/// Parse the `do BODY done` body of a for/while/until/select/
4897/// repeat loop. Direct equivalent of zsh's parse.c handling
4898/// inside the loop builders — they all consume DOLOOP, parse a
4899/// list until DONE, and return the list. The `foreach_style`
4900/// flag signals foreach (where short-form `for NAME in WORDS;
4901/// CMD` may skip do/done) vs c-style (which always requires
4902/// do/done).
4903fn parse_loop_body(foreach_style: bool) -> Option<ZshProgram> {
4904    parse_loop_body_kind(foreach_style, false)
4905}
4906
4907/// Body-dispatch helper. `is_repeat` widens the SHORTLOOPS gate so
4908/// `SHORTREPEAT` also unlocks the short form for `repeat N CMD`
4909/// (per c:1600 `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
4910fn parse_loop_body_kind(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
4911    // c:1180-1194 — body dispatch order per par_for:
4912    //   `do ... done` (DOLOOP) — primary form.
4913    //   `{ ... }`   (INBRACE) — alternate.
4914    //   csh/CSHJUNKIELOOPS — terminator is `end`.
4915    //   else if (unset(SHORTLOOPS)) — YYERROR.
4916    //   else — short form (single command).
4917    if tok() == DOLOOP {
4918        zshlex();
4919        let body = parse_program();
4920        if tok() == DONE {
4921            zshlex();
4922        }
4923        Some(body)
4924    } else if tok() == INBRACE_TOK {
4925        zshlex();
4926        let body = parse_program();
4927        if tok() == OUTBRACE_TOK {
4928            zshlex();
4929        }
4930        Some(body)
4931    } else if foreach_style || isset(CSHJUNKIELOOPS) {
4932        // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
4933        let body = parse_program();
4934        if tok() == ZEND {
4935            zshlex();
4936        }
4937        Some(body)
4938    } else {
4939        // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
4940        // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
4941        // unset(SHORTREPEAT))`. zshrs's option machinery isn't
4942        // initialised at parse-test time (no `init_main` →
4943        // `install_emulation_defaults`), so a strict port here
4944        // body. parse_init seeds SHORTLOOPS=on mirroring C
4945        // `install_emulation_defaults`, so this fires only when a
4946        // script explicitly disabled the option.
4947        if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
4948            error("parse error: short loop form requires SHORTLOOPS option");
4949            return None;
4950        }
4951        // c:1192-1193 — short form: single command body.
4952        par_list().map(|list| ZshProgram { lists: vec![list] })
4953    }
4954}
4955
4956/// Parse (...) subshell
4957/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
4958/// `par_subsh`. Body parses as a normal list; the subshell wrapper
4959/// fork-isolates execution in the executor.
4960fn par_subsh() -> Option<ZshCommand> {
4961    zshlex(); // skip (
4962    let prog = parse_program();
4963    if tok() == OUTPAR_TOK {
4964        zshlex();
4965    }
4966    Some(ZshCommand::Subsh(Box::new(prog)))
4967}
4968
4969/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
4970/// function named `_zshrs_anon_N`, invokes it with the args, and the
4971/// body runs with positional params set. Implemented as the desugared
4972/// pair (FuncDef + Simple call) so the compile path doesn't need new
4973/// machinery.
4974/// Parse an anonymous function definition `() { BODY }` followed
4975/// by call args. zsh treats `() { echo hi; } a b c` as defining
4976/// and immediately calling an anon fn with args a/b/c. C
4977/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
4978/// triggers an anon-funcdef path.
4979fn parse_anon_funcdef() -> Option<ZshCommand> {
4980    zshlex(); // skip ()
4981    skip_separators();
4982    // No `{` after `()` → bare empty subshell shape `()`. Fall back
4983    // to a Subsh with an empty program so the status is 0 (matches
4984    // zsh's `()` no-op behavior).
4985    if tok() != INBRACE_TOK {
4986        return Some(ZshCommand::Subsh(Box::new(ZshProgram {
4987            lists: Vec::new(),
4988        })));
4989    }
4990    zshlex(); // skip {
4991    let body = parse_program();
4992    if tok() == OUTBRACE_TOK {
4993        zshlex();
4994    }
4995    // Collect any trailing args until a separator. zsh's anon-fn form
4996    // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
4997    let mut args = Vec::new();
4998    while tok() == STRING_LEX {
4999        if let Some(s) = tokstr() {
5000            args.push(s);
5001        }
5002        zshlex();
5003    }
5004
5005    // Generate a unique name. Module-level static would be cleaner but
5006    // a thread-local atomic is enough — anonymous functions are
5007    // ephemeral and the name isn't user-visible.
5008    static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
5009    let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
5010    let name = format!("_zshrs_anon_{}", n);
5011    Some(ZshCommand::FuncDef(ZshFuncDef {
5012        names: vec![name],
5013        body: Box::new(body),
5014        tracing: false,
5015        auto_call_args: Some(args),
5016        body_source: None,
5017    }))
5018}
5019
5020/// Parse {...} cursh
5021/// Parse a current-shell brace block `{ BODY }`. C source
5022/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
5023/// and recurses into the list. zshrs's parse_cursh extracts that
5024/// arm into a dedicated method.
5025fn parse_cursh() -> Option<ZshCommand> {
5026    zshlex(); // skip {
5027    let prog = parse_program();
5028
5029    // Check for { ... } always { ... }. Direct port of zsh's
5030    // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
5031    // forces (parse.c:1632, 1637): after consuming the closing
5032    // Outbrace AND after matching the `always` keyword, the parser
5033    // explicitly resets command position so the next `{` lexes as
5034    // Inbrace. Without these resets the lexer's String-clears-cmdpos
5035    // rule (lex.rs:976-983) leaves the second `{` in word position,
5036    // turning `always { ... }` into a Simple `{` `echo` … and the
5037    // try/always pairing is silently lost.
5038    if tok() == OUTBRACE_TOK {
5039        set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
5040        zshlex();
5041
5042        // Check for 'always'
5043        if tok() == STRING_LEX {
5044            let s = tokstr();
5045            if s.map(|s| s == "always").unwrap_or(false) {
5046                set_incmdpos(true); // parse.c:1637 incmdpos = 1
5047                zshlex();
5048                skip_separators();
5049
5050                if tok() == INBRACE_TOK {
5051                    zshlex();
5052                    let always = parse_program();
5053                    if tok() == OUTBRACE_TOK {
5054                        zshlex();
5055                    }
5056                    return Some(ZshCommand::Try(ZshTry {
5057                        try_block: Box::new(prog),
5058                        always: Box::new(always),
5059                    }));
5060                }
5061            }
5062        }
5063    }
5064
5065    Some(ZshCommand::Cursh(Box::new(prog)))
5066}
5067
5068/// Parse function definition
5069/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
5070/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
5071/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
5072/// the optional `[fname1 fname2 ...]` for multi-name function defs,
5073/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
5074fn par_funcdef() -> Option<ZshCommand> {
5075    zshlex(); // skip 'function'
5076
5077    let mut names = Vec::new();
5078    let mut tracing = false;
5079
5080    // Handle options like -T and function names. Two subtleties:
5081    //
5082    //   1. Flags: zsh's lexer encodes a leading `-` as
5083    //      `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
5084    //      The previous `s.starts_with('-')` check failed for
5085    //      `\u{9b}T`, so `function -T NAME { body }` slipped the
5086    //      `-T` token into `names` and the function got registered
5087    //      as `T` plus the intended `NAME`.
5088    //
5089    //   2. Body opener: zsh's lexer emits the opening `{` as a
5090    //      String (not INBRACE_TOK) when it follows the String
5091    //      NAME — the preceding name token resets incmdpos to
5092    //      false, and only `{` immediately followed by `}` (the
5093    //      empty-body case) gets promoted to Inbrace. The funcdef
5094    //      parser must recognise the bare-`{` String as the body
5095    //      opener; otherwise `function NAME { body }` falls through
5096    //      to `_ => break`, no body parses, and the FuncDef never
5097    //      lands in the AST. This is consistent with C zsh's
5098    //      par_funcdef which knows it's in funcdef-header context
5099    //      and accepts the brace either way.
5100    loop {
5101        match tok() {
5102            STRING_LEX => {
5103                let _ts_s = tokstr()?;
5104                let s = _ts_s.as_str();
5105                // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
5106                // Body opener can be either the literal `{` (early-return
5107                // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
5108                // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
5109                // post-switch add(c) where c was rewritten via lextok2).
5110                if s == "{" || s == "\u{8f}" {
5111                    break;
5112                }
5113                let first = s.chars().next();
5114                if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
5115                    if s.contains('T') {
5116                        tracing = true;
5117                    }
5118                    zshlex();
5119                    continue;
5120                }
5121                names.push(s.to_string());
5122                zshlex();
5123            }
5124            INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
5125            _ => break,
5126        }
5127    }
5128
5129    // Optional ()
5130    let saw_paren = tok() == INOUTPAR;
5131    if saw_paren {
5132        zshlex();
5133    }
5134
5135    skip_separators();
5136
5137    // Body opener: real Inbrace OR a String containing the literal `{`
5138    // (early-return path) OR a String containing the Inbrace marker
5139    // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
5140    // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
5141    let body_opener_is_string_brace =
5142        tok() == STRING_LEX && (tokstr_eq("{") || tokstr_eq("\u{8f}"));
5143    if tok() == INBRACE_TOK || body_opener_is_string_brace {
5144        // Capture body_start BEFORE the lexer advances past the
5145        // first body token. After the previous zshlex consumed
5146        // `{`, lexer.pos points just past `{` (which is where the
5147        // body source starts). The next `zshlex()` would advance
5148        // past the first token (`echo`), making body_start land
5149        // mid-body and lose the first word — `typeset -f f` would
5150        // print `a; echo b` for `{ echo a; echo b }`.
5151        let body_start = pos();
5152        zshlex();
5153        let body = parse_program();
5154        let body_end = if tok() == OUTBRACE_TOK {
5155            // Lexer has just consumed `}`; pos is past it. Body content
5156            // ends one byte before pos.
5157            pos().saturating_sub(1)
5158        } else {
5159            pos()
5160        };
5161        let body_source = input_slice(body_start, body_end)
5162            .map(|s| s.trim().to_string())
5163            .filter(|s| !s.is_empty());
5164        if tok() == OUTBRACE_TOK {
5165            zshlex();
5166        }
5167
5168        // Anonymous form `function () { body } a b c` (with `()`) or
5169        // `function { body } a b c` (zsh-only shorthand, no `()`). No
5170        // name was collected. Mirror parse_anon_funcdef: synthesize
5171        // `_zshrs_anon_N`, collect trailing args, set auto_call_args
5172        // so compile_funcdef registers + immediately calls the
5173        // function with the args as positional params.
5174        if names.is_empty() {
5175            let mut args = Vec::new();
5176            while tok() == STRING_LEX {
5177                if let Some(s) = tokstr() {
5178                    args.push(s);
5179                }
5180                zshlex();
5181            }
5182            static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
5183            let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
5184            let name = format!("_zshrs_anon_kw_{}", n);
5185            return Some(ZshCommand::FuncDef(ZshFuncDef {
5186                names: vec![name],
5187                body: Box::new(body),
5188                tracing,
5189                auto_call_args: Some(args),
5190                body_source,
5191            }));
5192        }
5193
5194        Some(ZshCommand::FuncDef(ZshFuncDef {
5195            names,
5196            body: Box::new(body),
5197            tracing,
5198            auto_call_args: None,
5199            body_source,
5200        }))
5201    } else {
5202        // Short form
5203        par_list().map(|list| {
5204            ZshCommand::FuncDef(ZshFuncDef {
5205                names,
5206                body: Box::new(ZshProgram { lists: vec![list] }),
5207                tracing,
5208                auto_call_args: None,
5209                body_source: None,
5210            })
5211        })
5212    }
5213}
5214
5215/// Parse inline function definition: name() { ... }
5216/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
5217/// without the `function` keyword). The name has already been
5218/// consumed and pushed by par_simple before this method fires.
5219/// C source: handled inline in par_simple's INOUTPAR-after-name
5220/// arm (parse.c:1836-2228).
5221fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
5222    // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
5223    // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
5224    // as INBRACE_TOK (current-shell block opener) instead of a
5225    // literal `{` STRING. Without this, `myfunc() { echo body }`
5226    // parsed the body as the single STRING `"{"`, then `echo body`
5227    // fell out at top level. Mirrors the C path where par_cmd's
5228    // dispatcher (parse.c:958) is called with `incmdpos = 1` for
5229    // the funcdef body.
5230    set_incmdpos(true);
5231    // Skip ()
5232    if tok() == INOUTPAR {
5233        zshlex();
5234    }
5235
5236    skip_separators();
5237
5238    // Parse body
5239    if tok() == INBRACE_TOK {
5240        // Same body_start-before-zshlex fix as par_funcdef.
5241        let body_start = pos();
5242        zshlex();
5243        let body = parse_program();
5244        let body_end = if tok() == OUTBRACE_TOK {
5245            pos().saturating_sub(1)
5246        } else {
5247            pos()
5248        };
5249        let body_source = input_slice(body_start, body_end)
5250            .map(|s| s.trim().to_string())
5251            .filter(|s| !s.is_empty());
5252        if tok() == OUTBRACE_TOK {
5253            zshlex();
5254        }
5255        Some(ZshCommand::FuncDef(ZshFuncDef {
5256            names: vec![name],
5257            body: Box::new(body),
5258            tracing: false,
5259            auto_call_args: None,
5260            body_source,
5261        }))
5262    } else if unset(SHORTLOOPS) {
5263        // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
5264        // funcdef short body (`name() cmd` without `{...}`) only
5265        // accepted when SHORTLOOPS is set. parse_init seeds
5266        // SHORTLOOPS=on so this fires only when a script
5267        // explicitly disabled the option.
5268        error("parse error: short function body form requires SHORTLOOPS option");
5269        None
5270    } else {
5271        match par_cmd() {
5272            Some(cmd) => {
5273                let list = ZshList {
5274                    sublist: ZshSublist {
5275                        pipe: ZshPipe {
5276                            cmd,
5277                            next: None,
5278                            lineno: lineno(),
5279                            merge_stderr: false,
5280                        },
5281                        next: None,
5282                        flags: SublistFlags::default(),
5283                    },
5284                    flags: ListFlags::default(),
5285                };
5286                Some(ZshCommand::FuncDef(ZshFuncDef {
5287                    names: vec![name],
5288                    body: Box::new(ZshProgram { lists: vec![list] }),
5289                    tracing: false,
5290                    auto_call_args: None,
5291                    body_source: None,
5292                }))
5293            }
5294            None => None,
5295        }
5296    }
5297}
5298
5299/// Parse [[ ... ]] conditional
5300/// Parse `[[ EXPR ]]` conditional expression. Direct port of
5301/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
5302/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
5303/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
5304/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
5305///   <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
5306fn par_cond() -> Option<ZshCommand> {
5307    // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
5308    // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
5309    // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
5310    // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
5311    // cond body bleeds past the close bracket — the parser then
5312    // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
5313    // failed with `command not found: ]]` before this fix.
5314    set_incond(1);
5315    set_incmdpos(false);
5316    zshlex(); // skip [[
5317    // Empty cond `[[ ]]` is a parse error in zsh — emit the
5318    // diagnostic and return None so the caller produces a
5319    // non-zero exit. Without this, `[[ ]]` silently passed and
5320    // returned exit 0.
5321    if tok() == DOUTBRACK {
5322        error("parse error near `]]'");
5323        set_incond(0);
5324        set_incmdpos(true);
5325        zshlex();
5326        return None;
5327    }
5328    let cond = parse_cond_expr();
5329
5330    if tok() == DOUTBRACK {
5331        set_incond(0);
5332        set_incmdpos(true);
5333        zshlex();
5334    } else {
5335        // Recover incond/incmdpos so subsequent parsing isn't stuck
5336        // in cond-mode if the close bracket is missing.
5337        set_incond(0);
5338        set_incmdpos(true);
5339    }
5340
5341    cond.map(ZshCommand::Cond)
5342}
5343
5344/// Parse conditional expression
5345/// Top of `[[ ]]` cond-expression parsing — entry to recursive
5346/// descent (or → and → not → primary). Direct port of zsh's
5347/// par_cond_1 at parse.c:2434-2475.
5348fn parse_cond_expr() -> Option<ZshCond> {
5349    parse_cond_or()
5350}
5351
5352/// Cond-expression `||` level. C: inside par_cond_1 at
5353/// parse.c:2434-2475 (the `cond_or` ladder).
5354fn parse_cond_or() -> Option<ZshCond> {
5355    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
5356    if check_recursion() {
5357        error("parse_cond_or: max recursion depth exceeded");
5358        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5359        return None;
5360    }
5361
5362    let left = match parse_cond_and() {
5363        Some(l) => l,
5364        None => {
5365            PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5366            return None;
5367        }
5368    };
5369
5370    skip_cond_separators();
5371
5372    let result = if tok() == DBAR {
5373        zshlex();
5374        skip_cond_separators();
5375        parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
5376    } else {
5377        Some(left)
5378    };
5379
5380    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5381    result
5382}
5383
5384/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
5385fn parse_cond_and() -> Option<ZshCond> {
5386    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
5387    if check_recursion() {
5388        error("parse_cond_and: max recursion depth exceeded");
5389        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5390        return None;
5391    }
5392
5393    let left = match parse_cond_not() {
5394        Some(l) => l,
5395        None => {
5396            PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5397            return None;
5398        }
5399    };
5400
5401    skip_cond_separators();
5402
5403    let result = if tok() == DAMPER {
5404        zshlex();
5405        skip_cond_separators();
5406        parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
5407    } else {
5408        Some(left)
5409    };
5410
5411    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5412    result
5413}
5414
5415/// Cond-expression `!` negation level. C: handled inside
5416/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
5417fn parse_cond_not() -> Option<ZshCond> {
5418    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
5419    if check_recursion() {
5420        error("parse_cond_not: max recursion depth exceeded");
5421        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5422        return None;
5423    }
5424
5425    skip_cond_separators();
5426
5427    // ! can be either BANG_TOK or String "!"
5428    let is_not =
5429        tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
5430    if is_not {
5431        zshlex();
5432        let inner = match parse_cond_not() {
5433            Some(i) => i,
5434            None => {
5435                PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5436                return None;
5437            }
5438        };
5439        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5440        return Some(ZshCond::Not(Box::new(inner)));
5441    }
5442
5443    if tok() == INPAR_TOK {
5444        zshlex();
5445        skip_cond_separators();
5446        let inner = match parse_cond_expr() {
5447            Some(i) => i,
5448            None => {
5449                PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5450                return None;
5451            }
5452        };
5453        skip_cond_separators();
5454        if tok() == OUTPAR_TOK {
5455            zshlex();
5456        }
5457        PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5458        return Some(inner);
5459    }
5460
5461    let result = parse_cond_primary();
5462    PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5463    result
5464}
5465
5466/// Cond-expression primary: unary tests (-f, -d, ...), binary
5467/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
5468/// sub-expressions. Direct port of par_cond_double / par_cond_triple
5469/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
5470fn parse_cond_primary() -> Option<ZshCond> {
5471    let s1 = match tok() {
5472        STRING_LEX => {
5473            let s = tokstr().unwrap_or_default();
5474            zshlex();
5475            s
5476        }
5477        _ => return None,
5478    };
5479
5480    skip_cond_separators();
5481
5482    // Check for unary operator. zsh's lexer tokenizes leading `-` as
5483    // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
5484    // LX2_DASH — `-` always becomes Dash, untokenized later). Match
5485    // either form here, and use char-count not byte-count since Dash
5486    // is 2 UTF-8 bytes (`\xc2\x9b`).
5487    let s1_chars: Vec<char> = s1.chars().collect();
5488    if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) {
5489        let s2 = match tok() {
5490            STRING_LEX => {
5491                let s = tokstr().unwrap_or_default();
5492                zshlex();
5493                s
5494            }
5495            _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
5496        };
5497        return Some(ZshCond::Unary(s1, s2));
5498    }
5499
5500    // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
5501    //   incond++;  /* parentheses do globbing */
5502    //   do condlex(); while (COND_SEP());
5503    //   incond--;  /* parentheses do grouping */
5504    // The bump makes the lexer treat `(` as a literal character inside
5505    // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
5506    // and splitting the regex into multiple tokens.
5507    let op = match tok() {
5508        STRING_LEX => {
5509            let s = tokstr().unwrap_or_default();
5510            set_incond(incond() + 1);
5511            zshlex();
5512            set_incond(incond() - 1);
5513            s
5514        }
5515        INANG_TOK => {
5516            set_incond(incond() + 1);
5517            zshlex();
5518            set_incond(incond() - 1);
5519            "<".to_string()
5520        }
5521        OUTANG_TOK => {
5522            set_incond(incond() + 1);
5523            zshlex();
5524            set_incond(incond() - 1);
5525            ">".to_string()
5526        }
5527        _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
5528    };
5529
5530    skip_cond_separators();
5531
5532    let s2 = match tok() {
5533        STRING_LEX => {
5534            let s = tokstr().unwrap_or_default();
5535            zshlex();
5536            s
5537        }
5538        _ => return Some(ZshCond::Binary(s1, op, String::new())),
5539    };
5540
5541    if op == "=~" {
5542        Some(ZshCond::Regex(s1, s2))
5543    } else {
5544        Some(ZshCond::Binary(s1, op, s2))
5545    }
5546}
5547
5548fn skip_cond_separators() {
5549    while tok() == SEPER && {
5550        let s = tokstr();
5551        s.map(|s| !s.contains(';')).unwrap_or(true)
5552    } {
5553        zshlex();
5554    }
5555}
5556
5557/// Parse (( ... )) arithmetic command
5558/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
5559/// `par_dinbrack` (despite the name; the function actually handles
5560/// DINPAR `(( ))` blocks too).
5561fn parse_arith() -> Option<ZshCommand> {
5562    let expr = tokstr().unwrap_or_default();
5563    zshlex();
5564    Some(ZshCommand::Arith(expr))
5565}
5566
5567/// Parse time command
5568/// Parse `time CMD` (POSIX time keyword). Direct port of
5569/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
5570/// times the execution of the following pipeline / cmd.
5571fn par_time() -> Option<ZshCommand> {
5572    zshlex(); // skip 'time'
5573
5574    // Check if there's a pipeline to time
5575    if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
5576        Some(ZshCommand::Time(None))
5577    } else {
5578        let sublist = par_sublist();
5579        Some(ZshCommand::Time(sublist.map(Box::new)))
5580    }
5581}
5582
5583/// Check if next token is ()
5584fn peek_inoutpar() -> bool {
5585    tok() == INOUTPAR
5586}
5587
5588/// Skip separator tokens
5589fn skip_separators() {
5590    let mut iterations = 0;
5591    while tok() == SEPER || tok() == NEWLIN {
5592        iterations += 1;
5593        if iterations > 100_000 {
5594            error("skip_separators: too many iterations");
5595            return;
5596        }
5597        zshlex();
5598    }
5599}
5600
5601/// Record a parse error. Direct port of zsh's `zerr` invocation
5602/// from `Src/parse.c:625-633 yyerror`. Sets `errflag |=
5603/// ERRFLAG_ERROR` (when `noerrs == 0`) and emits a diagnostic on
5604/// stderr via `zwarning`.
5605fn error(msg: &str) {
5606    crate::ported::utils::zerr(msg);
5607}
5608
5609// =====================================================================
5610// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
5611//
5612// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
5613// `mmap()` and dispatch from without re-parsing on every shell start.
5614// File layout (one struct = `FD_PRELEN` `u32`s):
5615//   - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
5616//     opposite byte-order).
5617//   - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
5618//   - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
5619//   - `pre[12]` = `fdheaderlen` (total prelude+header word count).
5620//   - Then a sequence of `struct fdhead` records, one per function,
5621//     each followed by its NUL-terminated name (padded to 4-byte).
5622//   - Then the wordcode bytes for every function back-to-back.
5623//
5624// On a little-endian host writing a dump twice: first `FD_MAGIC` for
5625// native readers, then re-walks the body byte-swapped and emits a
5626// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
5627// =====================================================================
5628
5629// File-format constants — port of `Src/parse.c:3104-3150`.
5630
5631/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
5632pub const FD_EXT: &str = ".zwc";
5633
5634/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
5635/// — `-M` mode only kicks in when the wordcode body is at least
5636/// this many bytes (otherwise read(2) is preferred).
5637pub const FD_MINMAP: usize = 4096;
5638
5639/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
5640/// length in u32 words: magic + packed-flags-byte + 10 version words.
5641pub const FD_PRELEN: usize = 12;
5642
5643/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
5644/// for native-byte-order dumps.
5645pub const FD_MAGIC: u32 = 0x04050607;
5646
5647/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
5648/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
5649pub const FD_OMAGIC: u32 = 0x07060504;
5650
5651/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
5652/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
5653pub const FDF_MAP: u32 = 1;
5654
5655/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
5656/// this dump has an opposite-byte-order copy at `fdother(f)`.
5657pub const FDF_OTHER: u32 = 2;
5658
5659/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
5660/// flag word — `-k` ksh-style autoload marker.
5661pub const FDHF_KSHLOAD: u32 = 1;
5662
5663/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
5664/// autoload marker.
5665pub const FDHF_ZSHLOAD: u32 = 2;
5666
5667/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
5668/// inside a wordcode dump. All fields are `wordcode` (u32).
5669#[allow(non_camel_case_types)]
5670#[derive(Debug, Clone, Copy)]
5671pub struct fdhead {
5672    /// Offset (in u32 words) to the start of this function's
5673    /// wordcode body inside the dump.
5674    pub start: u32, // c:3117
5675    /// Wordcode-byte length of the body (excludes pattern-prog slots).
5676    pub len: u32, // c:3118
5677    /// Number of compiled patterns the body references.
5678    pub npats: u32, // c:3119
5679    /// Offset of the string table inside `prog->prog`.
5680    pub strs: u32, // c:3120
5681    /// Header-record length in u32 words (record + name).
5682    pub hlen: u32, // c:3121
5683    /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
5684    pub flags: u32, // c:3122
5685}
5686
5687/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
5688/// the header-walk macros below.
5689pub const FDHEAD_WORDS: usize = std::mem::size_of::<fdhead>() / 4;
5690
5691/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
5692/// per-function aggregate before write_dump emits it. The Rust
5693/// port stores the source-text body inline since the C-side
5694/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
5695/// layer yet (`build_dump` falls back to source-text caching).
5696#[allow(non_camel_case_types)]
5697#[derive(Debug, Clone)]
5698pub struct wcfunc {
5699    pub name: String, // c:3159
5700    pub flags: u32,   // c:3161
5701    /// Compiled body wordcode (one `u32` array per fn). Empty until
5702    /// the eprog emit-side lands; `write_dump` then walks each entry.
5703    pub body: Vec<u32>,
5704}
5705
5706// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
5707// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
5708// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
5709
5710/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
5711/// length in u32 words (read from prelude word `FD_PRELEN`).
5712#[inline]
5713pub fn fdheaderlen(f: &[u32]) -> u32 {
5714    f[FD_PRELEN]
5715}
5716
5717/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
5718/// word, either `FD_MAGIC` or `FD_OMAGIC`.
5719#[inline]
5720pub fn fdmagic(f: &[u32]) -> u32 {
5721    f[0]
5722}
5723
5724/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
5725/// the packed `pre[1]` word.
5726#[inline]
5727pub fn fdflags(f: &[u32]) -> u32 {
5728    // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
5729    f[1] & 0xff
5730}
5731
5732/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
5733/// the low byte of `pre[1]`.
5734#[inline]
5735pub fn fdsetflags(f: &mut [u32], v: u8) {
5736    f[1] = (f[1] & !0xff) | (v as u32);
5737}
5738
5739/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
5740/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
5741/// dump copy.
5742#[inline]
5743pub fn fdother(f: &[u32]) -> u32 {
5744    (f[1] >> 8) & 0x00ff_ffff
5745}
5746
5747/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
5748#[inline]
5749pub fn fdsetother(f: &mut [u32], o: u32) {
5750    f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
5751}
5752
5753/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
5754/// `ZSH_VERSION` C-string from `pre[2..]`.
5755pub fn fdversion(f: &[u32]) -> String {
5756    let bytes: Vec<u8> = f[2..]
5757        .iter()
5758        .take(10)
5759        .flat_map(|w| w.to_le_bytes().into_iter())
5760        .collect();
5761    let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
5762    String::from_utf8_lossy(&bytes[..end]).into_owned()
5763}
5764
5765/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
5766/// to the first `struct fdhead` past the prelude.
5767#[inline]
5768pub fn firstfdhead_offset() -> usize {
5769    FD_PRELEN
5770}
5771
5772/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
5773/// the next header by reading the current `hlen` slot.
5774#[inline]
5775pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
5776    cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
5777}
5778
5779/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
5780/// of the header's `flags` field (the kshload/zshload marker).
5781#[inline]
5782pub fn fdhflags(h: &fdhead) -> u32 {
5783    h.flags & 0x3
5784}
5785
5786/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
5787/// of `flags`, byte offset from the name start to its basename.
5788#[inline]
5789pub fn fdhtail(h: &fdhead) -> u32 {
5790    h.flags >> 2
5791}
5792
5793/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
5794/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
5795#[inline]
5796pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
5797    flags | (tail << 2)
5798}
5799
5800/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
5801/// follows the fdhead record immediately. Reads bytes from the
5802/// dump buffer until NUL.
5803pub fn fdname(buf: &[u32], header_offset: usize) -> String {
5804    let name_word_off = header_offset + FDHEAD_WORDS;
5805    let bytes: Vec<u8> = buf[name_word_off..]
5806        .iter()
5807        .flat_map(|w| w.to_le_bytes().into_iter())
5808        .take_while(|&b| b != 0)
5809        .collect();
5810    String::from_utf8_lossy(&bytes).into_owned()
5811}
5812
5813/// Decode a `fdhead` record at the given u32-word offset in the
5814/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
5815pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
5816    if offset + FDHEAD_WORDS > buf.len() {
5817        return None;
5818    }
5819    Some(fdhead {
5820        start: buf[offset],
5821        len: buf[offset + 1],
5822        npats: buf[offset + 2],
5823        strs: buf[offset + 3],
5824        hlen: buf[offset + 4],
5825        flags: buf[offset + 5],
5826    })
5827}
5828
5829/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
5830/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
5831/// opposite-byte-order copy of a wordcode dump.
5832pub fn fdswap(p: &mut [u32]) {
5833    // c:3318
5834    for w in p.iter_mut() {
5835        *w = w.swap_bytes();
5836    }
5837}
5838
5839/// Port of `dump_find_func(Wordcode h, char *name)` from
5840/// `Src/parse.c:3167`. Walks the header table inside a loaded
5841/// dump for a function with the given basename; returns true on hit.
5842pub fn dump_find_func(h: &[u32], name: &str) -> bool {
5843    // c:3167
5844    let header_words = fdheaderlen(h) as usize;
5845    let end = header_words; // walking u32 offsets, end-exclusive
5846    let mut cur = firstfdhead_offset();
5847    while cur < end {
5848        if let Some(fh) = read_fdhead(h, cur) {
5849            let full = fdname(h, cur);
5850            let tail = fdhtail(&fh) as usize;
5851            let basename = if tail <= full.len() {
5852                &full[tail..]
5853            } else {
5854                ""
5855            };
5856            if basename == name {
5857                return true;
5858            }
5859            cur = nextfdhead_offset(h, cur);
5860        } else {
5861            break;
5862        }
5863    }
5864    false
5865}
5866
5867/// Port of `load_dump_header(char *nam, char *name, int err)` from
5868/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
5869/// and version, then slurps the full header table into memory.
5870/// Returns the header u32-array on success or None on any failure
5871/// (emitting C-shaped warnings when `err != 0`).
5872pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
5873    // c:3258
5874
5875    let mut f = match File::open(name) {
5876        // c:3263
5877        Ok(h) => h,
5878        Err(_) => {
5879            if err != 0 {
5880                zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
5881            }
5882            return None;
5883        }
5884    };
5885
5886    // Read FD_PRELEN+1 u32 words = 52 bytes.
5887    let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
5888    if f.read_exact(&mut buf_bytes).is_err() {
5889        if err != 0 {
5890            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
5891        }
5892        return None;
5893    }
5894    let mut buf: Vec<u32> = buf_bytes
5895        .chunks_exact(4)
5896        .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
5897        .collect();
5898
5899    // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
5900    // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
5901    let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
5902    let v_ok = fdversion(&buf) == "5.9";
5903    if !magic_ok {
5904        if err != 0 {
5905            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
5906        }
5907        return None;
5908    }
5909    if !v_ok {
5910        if err != 0 {
5911            zwarnnam(
5912                nam,
5913                &format!(
5914                    "zwc file has wrong version (zsh-{}): {}", // c:3274
5915                    fdversion(&buf),
5916                    name
5917                ),
5918            );
5919        }
5920        return None;
5921    }
5922
5923    // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
5924    // Else seek to `fdother(buf)` and re-read.
5925    if fdmagic(&buf) != FD_MAGIC {
5926        let other = fdother(&buf) as u64; // c:3290
5927        if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
5928            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
5929            return None;
5930        }
5931        buf = buf_bytes
5932            .chunks_exact(4)
5933            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
5934            .collect();
5935    }
5936
5937    let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
5938    if total_words < FD_PRELEN + 1 {
5939        zwarnnam(nam, &format!("invalid zwc file: {}", name));
5940        return None;
5941    }
5942
5943    // Read the remaining header words.
5944    let mut head: Vec<u32> = Vec::with_capacity(total_words);
5945    head.extend_from_slice(&buf);
5946    let remaining_words = total_words - (FD_PRELEN + 1);
5947    if remaining_words > 0 {
5948        let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
5949        if f.read_exact(&mut rest_bytes).is_err() {
5950            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
5951            return None;
5952        }
5953        for c in rest_bytes.chunks_exact(4) {
5954            head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
5955        }
5956    }
5957    Some(head) // c:3311
5958}
5959
5960/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
5961/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
5962///
5963/// Status: scaffolded but the wordcode-emit step depends on
5964/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
5965/// npats` fields populated. The current `parse_string`/`parse` shape
5966/// emits an AST (`ZshProgram`) but not yet the wordcode array C
5967/// expects in this dump format. Until that lands, this returns 1
5968/// with a clear "wordcode emit not yet ported" message so callers
5969/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
5970pub fn build_dump(
5971    nam: &str, // c:3397
5972    dump: &str,
5973    _files: &[String],
5974    _ali: i32,
5975    _map: i32,
5976    _flags: u32,
5977) -> i32 {
5978    crate::ported::utils::zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
5979    1
5980}
5981
5982/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
5983/// from `Src/parse.c:3536`. Compiles currently-loaded functions
5984/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
5985/// Same wordcode-emit dependency as `build_dump`.
5986pub fn build_cur_dump(
5987    nam: &str, // c:3536
5988    dump: &str,
5989    _names: &[String],
5990    _match_: i32,
5991    _map: i32,
5992    _what: i32,
5993) -> i32 {
5994    crate::ported::utils::zwarnnam(
5995        nam,
5996        &format!("{}: wordcode dump-current emit not yet ported", dump),
5997    );
5998    1
5999}
6000
6001/// Port of `zwcstat(char *filename, struct stat *buf)` from
6002/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
6003/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
6004/// suffix to keep a previous dump readable while a rewrite is in
6005/// progress).
6006pub fn zwcstat(filename: &str) -> Option<std::fs::Metadata> {
6007    // c:3656
6008    if let Ok(m) = std::fs::metadata(filename) {
6009        return Some(m);
6010    }
6011    let old = format!("{}.old", filename);
6012    std::fs::metadata(&old).ok()
6013}
6014
6015/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
6016/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
6017/// file into memory. Returns the u32 buffer or None on I/O error.
6018pub fn load_dump_file(
6019    dump: &str, // c:3675
6020    _sbuf: &std::fs::Metadata,
6021    other: i32,
6022    _len: usize,
6023) -> Option<Vec<u32>> {
6024    let mut f = File::open(dump).ok()?;
6025    if other != 0 {
6026        f.seek(SeekFrom::Start(other as u64)).ok()?;
6027    }
6028    let mut bytes = Vec::new();
6029    f.read_to_end(&mut bytes).ok()?;
6030    Some(
6031        bytes
6032            .chunks_exact(4)
6033            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
6034            .collect(),
6035    )
6036}
6037
6038/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
6039/// from `Src/parse.c:3746`. Tries to load a function from a `.zwc`
6040/// in the given fpath directory. Returns `(found, ksh_load)` —
6041/// stub: returns false until the dump-cache port (`FuncDump`) lands.
6042pub fn try_dump_file(
6043    _path: &str,
6044    _name: &str,
6045    _file: &str, // c:3746
6046    _test_only: bool,
6047) -> Option<(bool, bool)> {
6048    None
6049}
6050
6051/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
6052/// Tries `source <file>` then falls back to `source <file>.zwc`.
6053/// Returns the resolved path on hit. Stub: returns None until the
6054/// dump-cache port lands.
6055pub fn try_source_file(_file: &str) -> Option<String> {
6056    // c:3795
6057    None
6058}
6059
6060/// Port of `check_dump_file(char *file, struct stat *sbuf, char *name, int *ksh, int test_only)`
6061/// from `Src/parse.c:3833`. Opens + validates a `.zwc` file,
6062/// returning its loaded buffer or None.
6063pub fn check_dump_file(
6064    _file: &str, // c:3833
6065    _sbuf: &std::fs::Metadata,
6066    _name: &str,
6067    _test_only: bool,
6068) -> Option<(Vec<u32>, bool)> {
6069    None
6070}
6071
6072/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
6073/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
6074/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
6075/// so refcount ops can find an entry without raw-pointer compare.
6076pub static DUMPS: std::sync::Mutex<Vec<crate::ported::zsh_h::funcdump>> =
6077    std::sync::Mutex::new(Vec::new());
6078
6079/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
6080/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
6081/// keys lookup by `filename` because Rust can't raw-pointer-compare
6082/// funcdump values inside a `Mutex<Vec<...>>`; same observable
6083/// effect (the count of the matching entry increments).
6084pub fn incrdumpcount(f: &crate::ported::zsh_h::funcdump) {
6085    // c:3970
6086    let key = f.filename.as_deref();
6087    let mut g = DUMPS.lock().unwrap();
6088    for d in g.iter_mut() {
6089        if d.filename.as_deref() == key {
6090            d.count += 1; // c:3973
6091            return;
6092        }
6093    }
6094}
6095
6096/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
6097/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
6098/// port relies on Drop for the `funcdump` (no mmap held in this
6099/// port — `addr`/`map` are byte-offset placeholders), so the
6100/// equivalent is removing the entry from the dumps list. Called
6101/// by `decrdumpcount` when the refcount hits zero (c:3988) and
6102/// by `closedumps` when shutting down (c:4008).
6103fn freedump_locked(
6104    g: &mut std::sync::MutexGuard<'_, Vec<crate::ported::zsh_h::funcdump>>,
6105    filename: &str,
6106) {
6107    // c:3976
6108    g.retain(|d| d.filename.as_deref() != Some(filename));
6109}
6110
6111/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
6112/// helper for the rare external caller; locks the dumps mutex and
6113/// drops the entry with the given filename.
6114pub fn freedump(f: &crate::ported::zsh_h::funcdump) {
6115    // c:3976
6116    let mut g = DUMPS.lock().unwrap();
6117    if let Some(name) = f.filename.as_deref() {
6118        freedump_locked(&mut g, name);
6119    }
6120}
6121
6122/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
6123/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
6124pub fn decrdumpcount(f: &crate::ported::zsh_h::funcdump) {
6125    // c:3988
6126    let key = f.filename.clone();
6127    let mut g = DUMPS.lock().unwrap();
6128    let mut hit_zero: Option<String> = None;
6129    for d in g.iter_mut() {
6130        if d.filename == key {
6131            d.count -= 1; // c:3991
6132            if d.count == 0 {
6133                // c:3992
6134                hit_zero = d.filename.clone();
6135            }
6136            break;
6137        }
6138    }
6139    if let Some(name) = hit_zero {
6140        // c:3994-4001
6141        freedump_locked(&mut g, &name);
6142    }
6143}
6144
6145/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
6146/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
6147pub fn closedumps() {
6148    // c:4008
6149    let mut g = DUMPS.lock().unwrap();
6150    g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
6151}
6152
6153/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
6154/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
6155/// for autoload via `shfunctab`. Stub: returns 1 (error) until the
6156/// dump-cache port lands.
6157pub fn dump_autoload(
6158    nam: &str,
6159    file: &str, // c:4042
6160    _on: i32,
6161    _ops: &crate::ported::zsh_h::options,
6162    _func: i32,
6163) -> i32 {
6164    zwarnnam(nam, &format!("{}: zwc-based autoload not yet ported", file));
6165    1
6166}
6167
6168/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
6169/// from `Src/parse.c:3180`. Validates the option set, then dispatches
6170/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
6171/// or the default (compile source files to `.zwc`).
6172pub fn bin_zcompile(
6173    nam: &str, // c:3180
6174    args: &[String],
6175    ops: &crate::ported::zsh_h::options,
6176    _func: i32,
6177) -> i32 {
6178    // c:3185-3192 — illegal-combination guard.
6179    if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
6180        || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
6181        || (OPT_ISSET(ops, b'c')
6182            && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
6183        || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
6184    {
6185        zwarnnam(nam, "illegal combination of options"); // c:3192
6186        return 1;
6187    }
6188
6189    // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
6190    if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
6191        zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
6192    }
6193
6194    // c:3196-3197 — flag word from `-k` / `-z`.
6195    let flags: u32 = if OPT_ISSET(ops, b'k') {
6196        FDHF_KSHLOAD
6197    } else if OPT_ISSET(ops, b'z') {
6198        FDHF_ZSHLOAD
6199    } else {
6200        0
6201    };
6202
6203    // c:3199 — `-t` test/list mode.
6204    if OPT_ISSET(ops, b't') {
6205        // c:3199
6206        if args.is_empty() {
6207            zwarnnam(nam, "too few arguments"); // c:3202
6208            return 1;
6209        }
6210        let dump_name = if args[0].ends_with(FD_EXT) {
6211            args[0].clone()
6212        } else {
6213            format!("{}{}", args[0], FD_EXT)
6214        };
6215        let f = match load_dump_header(nam, &dump_name, 1) {
6216            // c:3206
6217            Some(buf) => buf,
6218            None => return 1,
6219        };
6220        // c:3209 — per-function check.
6221        if args.len() > 1 {
6222            for name in &args[1..] {
6223                // c:3210
6224                if !dump_find_func(&f, name) {
6225                    // c:3212
6226                    return 1;
6227                }
6228            }
6229            return 0;
6230        }
6231        // c:3215-3221 — listing arm. Walk every fdhead, print
6232        // each function's full name. C uses `fdname(h)` which
6233        // includes the path prefix; matches our `fdname()` impl.
6234        let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
6235            "mapped"
6236        } else {
6237            "read"
6238        };
6239        println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
6240        let header_words = fdheaderlen(&f) as usize;
6241        let mut cur = firstfdhead_offset();
6242        while cur < header_words {
6243            if read_fdhead(&f, cur).is_none() {
6244                break;
6245            }
6246            println!("{}", fdname(&f, cur));
6247            cur = nextfdhead_offset(&f, cur);
6248        }
6249        return 0;
6250    }
6251
6252    if args.is_empty() {
6253        zwarnnam(nam, "too few arguments"); // c:3226
6254        return 1;
6255    }
6256
6257    // c:3228 — map mode discriminant.
6258    let map: i32 = if OPT_ISSET(ops, b'M') {
6259        2
6260    } else if OPT_ISSET(ops, b'R') {
6261        0
6262    } else {
6263        1
6264    };
6265
6266    // c:3230-3236 — single-file default-mode short path.
6267    if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
6268        let dump = format!("{}{}", args[0], FD_EXT);
6269        return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
6270    }
6271
6272    // c:3239-3247 — multi-file or `-c`/`-a` mode.
6273    let dump = if args[0].ends_with(FD_EXT) {
6274        args[0].clone()
6275    } else {
6276        format!("{}{}", args[0], FD_EXT)
6277    };
6278    let rest = &args[1..];
6279    if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
6280        let what =
6281            (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
6282        build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
6283    } else {
6284        build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
6285    }
6286}
6287
6288// =====================================================================
6289// Remaining `Src/parse.c` ports (this section finishes the file).
6290//
6291// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
6292// are kept for completeness — the live zshrs runtime uses the
6293// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
6294// and any future `.zwc`-emit pipeline both call into these.
6295// =====================================================================
6296
6297/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
6298/// `Src/parse.c:482` used everywhere by the par_* emitters.
6299#[inline]
6300pub fn ecstr(s: &str) {
6301    let code = ecstrcode(s);
6302    ecadd(code);
6303}
6304
6305/// Port of `condlex` function-pointer global from `Src/parse.c`. C
6306/// flips this between `zshlex` and `testlex` depending on whether
6307/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
6308/// separate `testlex` yet, so this just defers to `zshlex`.
6309#[inline]
6310pub fn condlex() {
6311    zshlex();
6312}
6313
6314/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
6315/// token is a separator usable inside `[[ … ]]` (newline / semi /
6316/// `&`). C uses it to skip optional whitespace between cond terms.
6317#[inline]
6318pub fn COND_SEP() -> bool {
6319    matches!(tok(), NEWLIN | SEMI | AMPER)
6320}
6321
6322/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
6323/// Walks the in-build string-eccstr tree and writes each entry to
6324/// `p[s->aoffs..]`. The Rust port mirrors via the
6325/// `ECSTRS_REVERSE` HashMap (eccstr-tree replacement) and writes
6326/// into a `Vec<u8>` slice.
6327pub fn copy_ecstr(table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
6328    // c:537. Map key is the wordcode-encoded offs from `ecstrcode`
6329    // (`(byte_offset << 2) | token_bit`, parse.c:459); strip the
6330    // low 2 bits to get the real byte offset. Map value is the
6331    // metafied byte form — written verbatim to match C's strs
6332    // region byte-for-byte.
6333    for (&offs, bytes) in table.iter() {
6334        let off = (offs >> 2) as usize;
6335        let need = off + bytes.len() + 1;
6336        if need > p.len() {
6337            continue;
6338        }
6339        p[off..off + bytes.len()].copy_from_slice(bytes);
6340        p[off + bytes.len()] = 0;
6341    }
6342}
6343
6344/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
6345/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
6346/// Resets the build state so a new parse can start.
6347pub fn bld_eprog(heap: bool) -> crate::ported::zsh_h::eprog {
6348    // c:547
6349
6350    // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
6351    ecadd(0);
6352
6353    let ecused = ECUSED.with(|c| c.get()) as usize;
6354    let ecnpats = ECNPATS.with(|c| c.get()) as usize;
6355    let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
6356
6357    let prog_bytes = ecused * 4; // c:559
6358    let len = (ecnpats * 4) + prog_bytes + ecsoffs;
6359
6360    // Snapshot the wordcode buffer + string table.
6361    let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
6362    let mut strs_bytes = vec![0u8; ecsoffs];
6363    ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
6364
6365    // c:566 — store strs as raw bytes via from_utf8_unchecked so
6366    // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
6367    // `String::from_utf8_lossy` would replace them with U+FFFD
6368    // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
6369    // strs region. SAFETY: downstream consumers of `eprog.strs`
6370    // index by byte offset (per the wordcode `(offs >> 2)` offset
6371    // encoding) and call `.as_bytes()` — they never iterate as
6372    // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
6373    // in a String is safe in practice. C zsh's strs is `char *`
6374    // with the same byte-not-char semantics.
6375    let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
6376    let ret = eprog {
6377        flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
6378        len: len as i32,                             // c:559
6379        npats: ecnpats as i32,                       // c:561
6380        nref: if heap { -1 } else { 1 },             // c:562
6381        pats: Vec::new(),                            // c:563 dummy_patprog
6382        prog: prog_words,                            // c:565
6383        strs: Some(strs_string),
6384        shf: None,
6385        dump: None,
6386    };
6387
6388    // c:577 — free ecbuf so next parse starts fresh.
6389    ECBUF.with(|c| c.borrow_mut().clear());
6390    ECLEN.with(|c| c.set(0));
6391    ECUSED.with(|c| c.set(0));
6392    ECNPATS.with(|c| c.set(0));
6393    ECSOFFS.with(|c| c.set(0));
6394    ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
6395    ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
6396    ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
6397
6398    ret
6399}
6400
6401/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
6402/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
6403/// `None` on syntax error.
6404pub fn parse_list() -> Option<eprog> {
6405    // c:697
6406    set_tok(ENDINPUT);
6407    init_parse();
6408    zshlex();
6409    let _ = par_list();
6410    if tok() != ENDINPUT {
6411        clear_hdocs();
6412        set_tok(LEXERR);
6413        yyerror("syntax error");
6414        return None;
6415    }
6416    Some(bld_eprog(true))
6417}
6418
6419/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
6420/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
6421/// `condlex` global must already point at `testlex` before entry.
6422pub fn parse_cond() -> Option<eprog> {
6423    // c:722
6424    init_parse();
6425    if par_cond().is_none() {
6426        clear_hdocs();
6427        return None;
6428    }
6429    Some(bld_eprog(true))
6430}
6431
6432/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
6433/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
6434/// in front of a pline. Returns the WC_SUBLIST flag word added.
6435pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
6436    // c:869
6437    let mut f = 0i32;
6438    if tok() == COPROC {
6439        *cmplx = 1;
6440        f |= WC_SUBLIST_COPROC as i32;
6441        zshlex();
6442    } else if tok() == BANG_TOK {
6443        *cmplx = 1;
6444        f |= WC_SUBLIST_NOT as i32;
6445        zshlex();
6446    }
6447    // c:884 — `if (!par_pline(cmplx) && !f) return -1;`
6448    // The wordcode-emitter call chain (par_sublist_wordcode →
6449    // par_sublist2 → par_pipe_wordcode) needs the wordcode pipe
6450    // emitter, NOT the AST `par_pline`. The previous version called
6451    // `par_pline` which builds AST nodes and never writes to ECBUF —
6452    // the entire wordcode dispatch tree was broken below sublist
6453    // level (every script lexed to LIST + END only, since pipes /
6454    // commands / args never got emitted).
6455    let outer = cmplx_get();
6456    cmplx_set(false);
6457    let ok = par_pipe_wordcode();
6458    *cmplx |= cmplx_get() as i32;
6459    cmplx_set(outer | cmplx_get());
6460    if !ok && f == 0 {
6461        return None;
6462    }
6463    Some(f)
6464}
6465
6466/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
6467/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
6468/// condition wordcode then advances past `]]`.
6469pub fn par_dinbrack() -> Option<()> {
6470    // c:1810
6471    set_incond(1); // c:1814
6472    set_incmdpos(false); // c:1815
6473    zshlex(); // c:1816
6474    let _ = par_cond(); // c:1817
6475    if tok() != DOUTBRACK {
6476        // c:1818
6477        yyerror("missing ]]");
6478        return None;
6479    }
6480    set_incond(0); // c:1820
6481    set_incmdpos(true); // c:1821
6482    zshlex(); // c:1822
6483    Some(())
6484}
6485
6486/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
6487/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
6488/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
6489/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
6490/// must call into HERE so that `[[ a || b ]]` and friends land
6491/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
6492/// emitter for `[[ ... ]]` produced zero words and parity dropped
6493/// 148 words on `/etc/zshrc` alone.
6494pub fn par_cond_top() -> i32 {
6495    // c:2411 — `int p = ecused, r;`
6496    let p = ECUSED.with(|c| c.get()) as usize;
6497    let r = par_cond_1();
6498    while COND_SEP() {
6499        condlex();
6500    }
6501    if tok() == DBAR {
6502        // c:2417 — `condlex(); while (COND_SEP()) condlex();`
6503        condlex();
6504        while COND_SEP() {
6505            condlex();
6506        }
6507        // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
6508        // WCB_COND(COND_OR, ecused-1-p);`
6509        ecispace(p, 1);
6510        par_cond_top();
6511        let ecused = ECUSED.with(|c| c.get()) as usize;
6512        ECBUF.with(|c| {
6513            c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
6514        });
6515        return 1;
6516    }
6517    r
6518}
6519
6520/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
6521/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
6522/// when an `&&` is found and recurses.
6523pub fn par_cond_1() -> i32 {
6524    // c:2434
6525
6526    let p = ECUSED.with(|c| c.get()) as usize;
6527    let r = par_cond_2();
6528    while COND_SEP() {
6529        condlex();
6530    }
6531    if tok() == DAMPER {
6532        condlex();
6533        while COND_SEP() {
6534            condlex();
6535        }
6536        ecispace(p, 1);
6537        par_cond_1();
6538        let ecused = ECUSED.with(|c| c.get()) as usize;
6539        ECBUF.with(|c| {
6540            c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
6541        });
6542        return 1;
6543    }
6544    r
6545}
6546
6547/// Port of `static int check_cond(const char *input, const char *cond)`
6548/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
6549/// form whose `X` matches `cond` — used by par_cond_2 to detect
6550/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
6551/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
6552fn check_cond(input: &str, cond: &str) -> bool {
6553    let mut chars = input.chars();
6554    match chars.next() {
6555        Some(c) if IS_DASH(c) => chars.as_str() == cond,
6556        _ => false,
6557    }
6558}
6559
6560/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
6561/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
6562/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
6563pub fn par_cond_2() -> i32 {
6564    // c:2476
6565    // `n_testargs` only applies in `testlex` mode (=== /bin/test
6566    // compat). zshrs has no testlex yet, so always 0.
6567    let n_testargs: i32 = 0;
6568
6569    // c:2481 — handled inline; this Rust port skips the n_testargs
6570    // arm since zshrs invokes par_cond via [[ ... ]] only.
6571
6572    while COND_SEP() {
6573        condlex();
6574    }
6575    if tok() == BANG_TOK {
6576        // c:2522 — `[[ ! cond ]]`
6577        condlex();
6578        ecadd(WCB_COND(COND_NOT as u32, 0));
6579        return par_cond_2();
6580    }
6581    if tok() == INPAR_TOK {
6582        // c:2533 — `[[ (cond) ]]`
6583        condlex();
6584        while COND_SEP() {
6585            condlex();
6586        }
6587        let r = par_cond();
6588        while COND_SEP() {
6589            condlex();
6590        }
6591        if tok() != OUTPAR_TOK {
6592            yyerror("missing )");
6593            return 0;
6594        }
6595        condlex();
6596        return r.map_or(0, |_| 1);
6597    }
6598    let s1 = tokstr().unwrap_or_default();
6599    // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
6600    // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
6601    // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
6602    // carries Dash as a marker byte, so `starts_with('-')` alone
6603    // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
6604    // etc. — every such cond emitted the AST-only `condition
6605    // expected` error from par_cond_double. Use IS_DASH and count
6606    // chars (Dash is a single code point) instead of bytes.
6607    let s1_chars: Vec<char> = s1.chars().collect();
6608    let dble = !s1_chars.is_empty()
6609        && IS_DASH(s1_chars[0])
6610        && s1_chars.len() == 2
6611        && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
6612    if tok() != STRING_LEX {
6613        if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
6614            // c:2486-2497 — `if (n_testargs == 1)` block: under
6615            // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
6616            // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
6617            // && check_cond(s1, "t")`. zshrs's parser has
6618            // n_testargs=0 (no testlex), so this rewrite path is
6619            // unreachable from zshrs's [[ ]] / [ ] entry points;
6620            // wired here as a marker for parity. When testlex is
6621            // ported the call below activates.
6622            if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
6623                condlex();
6624                return par_cond_double(&s1, "1");
6625            }
6626            // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
6627            condlex();
6628            while COND_SEP() {
6629                condlex();
6630            }
6631            return par_cond_double("-n", &s1);
6632        }
6633        yyerror("condition expected");
6634        return 0;
6635    }
6636    condlex();
6637    while COND_SEP() {
6638        condlex();
6639    }
6640    if tok() == INANG_TOK || tok() == OUTANG_TOK {
6641        // c:2576 — `<` / `>` string compare.
6642        let xtok = tok();
6643        condlex();
6644        while COND_SEP() {
6645            condlex();
6646        }
6647        if tok() != STRING_LEX {
6648            yyerror("string expected");
6649            return 0;
6650        }
6651        let s3 = tokstr().unwrap_or_default();
6652        condlex();
6653        while COND_SEP() {
6654            condlex();
6655        }
6656        let op = if xtok == INANG_TOK {
6657            COND_STRLT
6658        } else {
6659            COND_STRGTR
6660        };
6661        ecadd(WCB_COND(op as u32, 0));
6662        ecstr(&s1);
6663        ecstr(&s3);
6664        return 1;
6665    }
6666    if tok() != STRING_LEX {
6667        // c:2592 — only one operand seen → `[ -n s1 ]`.
6668        if tok() != LEXERR {
6669            if !dble || n_testargs != 0 {
6670                return par_cond_double("-n", &s1);
6671            }
6672            return par_cond_multi(&s1, &[]);
6673        }
6674        yyerror("syntax error");
6675        return 0;
6676    }
6677    let s2 = tokstr().unwrap_or_default();
6678    set_incond(incond() + 1);
6679    condlex();
6680    while COND_SEP() {
6681        condlex();
6682    }
6683    set_incond(incond() - 1);
6684    if tok() == STRING_LEX && !dble {
6685        let s3 = tokstr().unwrap_or_default();
6686        condlex();
6687        while COND_SEP() {
6688            condlex();
6689        }
6690        if tok() == STRING_LEX {
6691            // c:2615 — n-ary `[ A op B C D ... ]`.
6692            let mut l: Vec<String> = vec![s2, s3];
6693            while tok() == STRING_LEX {
6694                l.push(tokstr().unwrap_or_default());
6695                condlex();
6696                while COND_SEP() {
6697                    condlex();
6698                }
6699            }
6700            return par_cond_multi(&s1, &l);
6701        }
6702        return par_cond_triple(&s1, &s2, &s3);
6703    }
6704    par_cond_double(&s1, &s2)
6705}
6706
6707/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
6708/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
6709pub fn par_cond_double(a: &str, b: &str) -> i32 {
6710    // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
6711    // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
6712    // BYTES would still pass for "-z" but fail for the marker form
6713    // `\u{9b}z` (2 bytes). Walk by chars.
6714    let ac: Vec<char> = a.chars().collect();
6715    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
6716        crate::ported::utils::zerr(&format!("parse error: condition expected: {}", a));
6717        return 1;
6718    }
6719    // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
6720    let unary_set = "abcdefgknoprstuvwxzhLONGS";
6721    if ac.len() == 2 && unary_set.contains(ac[1]) {
6722        // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
6723        // letter byte as the opcode payload. Use the ASCII char's
6724        // code-point value directly — every letter in `unary_set`
6725        // fits in 7 bits.
6726        ecadd(WCB_COND(ac[1] as u32, 0));
6727        ecstr(b);
6728    } else {
6729        ecadd(WCB_COND(COND_MOD as u32, 1));
6730        ecstr(a);
6731        ecstr(b);
6732    }
6733    1
6734}
6735
6736/// Port of `par_cond_triple(char *a, char *b, char *c)` from
6737/// `Src/parse.c:2659`. Emits wordcode for the binary forms
6738/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
6739///
6740/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
6741/// raw ASCII operator char AND its tokenized marker form (Equals =
6742/// `\u{8d}`, Outang = `\u{8e}`, Inang = `\u{91}`, Tilde = `\u{96}`,
6743/// Bang = `\u{8b}`, Dash = `\u{9b}`). Inside `[[ ... ]]` the lexer
6744/// emits the marker bytes — comparing against literal-only `b"=="`
6745/// misses every cond op.
6746pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
6747    // c:2659
6748    let bc: Vec<char> = b.chars().collect();
6749    let is_eq = |ch: char| ch == '=' || ch == Equals;
6750    let is_gt = |ch: char| ch == '>' || ch == Outang;
6751    let is_lt = |ch: char| ch == '<' || ch == Inang;
6752    let is_tilde = |ch: char| ch == '~' || ch == Tilde;
6753    let is_bang = |ch: char| ch == '!' || ch == Bang;
6754
6755    // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
6756    if bc.len() == 1 && is_eq(bc[0]) {
6757        ecadd(WCB_COND(COND_STREQ as u32, 0));
6758        ecstr(a);
6759        ecstr(c);
6760        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6761        ecadd(np);
6762        return 1;
6763    }
6764    // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
6765    if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
6766        let op = if is_gt(bc[0]) { COND_STRGTR } else { COND_STRLT };
6767        ecadd(WCB_COND(op as u32, 0));
6768        ecstr(a);
6769        ecstr(c);
6770        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6771        ecadd(np);
6772        return 1;
6773    }
6774    // c:2674-2679 — `==` STRDEQ.
6775    if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
6776        ecadd(WCB_COND(COND_STRDEQ as u32, 0));
6777        ecstr(a);
6778        ecstr(c);
6779        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6780        ecadd(np);
6781        return 1;
6782    }
6783    // c:2680-2684 — `!=` STRNEQ.
6784    if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
6785        ecadd(WCB_COND(COND_STRNEQ as u32, 0));
6786        ecstr(a);
6787        ecstr(c);
6788        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6789        ecadd(np);
6790        return 1;
6791    }
6792    // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
6793    if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
6794        ecadd(WCB_COND(COND_REGEX as u32, 0));
6795        ecstr(a);
6796        ecstr(c);
6797        return 1;
6798    }
6799    // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
6800    if !bc.is_empty() && IS_DASH(bc[0]) {
6801        let rest: String = bc[1..].iter().collect();
6802        let t = get_cond_num(&rest);
6803        if t > -1 {
6804            ecadd(WCB_COND((t + COND_NT) as u32, 0));
6805            ecstr(a);
6806            ecstr(c);
6807            return 1;
6808        }
6809        ecadd(WCB_COND(COND_MODI as u32, 0));
6810        ecstr(b);
6811        ecstr(a);
6812        ecstr(c);
6813        return 1;
6814    }
6815    // c:2703-2707 — `-mod A B C` modular cond on `a`.
6816    let ac: Vec<char> = a.chars().collect();
6817    if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
6818        ecadd(WCB_COND(COND_MOD as u32, 2));
6819        ecstr(a);
6820        ecstr(b);
6821        ecstr(c);
6822        return 1;
6823    }
6824    crate::ported::utils::zerr(&format!("condition expected: {}", b));
6825    1
6826}
6827
6828/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
6829/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
6830pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
6831    // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
6832    // matching as par_cond_double, char-walked because Dash is a
6833    // single code point.
6834    let ac: Vec<char> = a.chars().collect();
6835    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
6836        crate::ported::utils::zerr(&format!("condition expected: {}", a));
6837        return 1;
6838    }
6839    ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
6840    ecstr(a);
6841    for item in l {
6842        ecstr(item);
6843    }
6844    1
6845}
6846
6847/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
6848/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
6849/// progs+names lists. Stub: `Eprog` for the function body isn't
6850/// yet wired through `shfunc.funcdef` to be serializable here.
6851pub fn cur_add_func(
6852    nam: &str, // c:3489
6853    shf_name: &str,
6854    shf_flags: i32,
6855    names: &mut Vec<String>,
6856    progs: &mut Vec<wcfunc>,
6857    hlen: &mut i32,
6858    tlen: &mut i32,
6859    what: i32,
6860) -> i32 {
6861    let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
6862    if is_undef {
6863        if (what & 2) == 0 {
6864            // c:3498
6865            zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
6866            return 1;
6867        }
6868        // c:3503 — would call `getfpfunc` to load body for dump.
6869        zwarnnam(nam, &format!("can't load function: {}", shf_name));
6870        return 1;
6871    } else if (what & 1) == 0 {
6872        zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
6873        return 1;
6874    }
6875    // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
6876    let wcf = wcfunc {
6877        name: shf_name.to_string(),
6878        flags: FDHF_ZSHLOAD,
6879        body: Vec::new(),
6880    };
6881    progs.push(wcf);
6882    names.push(shf_name.to_string());
6883
6884    // c:3526 — bump hlen / tlen.
6885    let name_words = (shf_name.len() as i32 + 4) / 4;
6886    *hlen += (FDHEAD_WORDS as i32) + name_words;
6887    *tlen += 0; // body is empty in stub; real path adds prog->len in words.
6888
6889    0
6890}
6891
6892/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
6893/// from `Src/parse.c:3334`. Writes the prelude + header records +
6894/// body wordcode bytes to the dump file descriptor.
6895///
6896/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
6897/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
6898/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
6899pub fn write_dump(
6900    dfd: &mut std::fs::File, // c:3334
6901    progs: &[wcfunc],
6902    mut map: i32,
6903    hlen: i32,
6904    tlen: i32,
6905) -> std::io::Result<()> {
6906    if map == 1 && (tlen as usize) >= FD_MINMAP {
6907        // c:3344
6908        map = 1;
6909    } else if map == 1 {
6910        map = 0;
6911    }
6912
6913    let mut other = 0u32; // c:3338
6914    let ohlen = hlen;
6915    let mut cur_hlen = hlen;
6916
6917    loop {
6918        cur_hlen = ohlen;
6919        // c:3347 — build the prelude.
6920        let mut pre = vec![0u32; FD_PRELEN];
6921        pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
6922        let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
6923        fdsetflags(&mut pre, flags as u8); // c:3351
6924        fdsetother(&mut pre, tlen as u32); // c:3352
6925                                           // c:3353 — copy ZSH_VERSION C-string into pre[2..].
6926        let ver = b"5.9";
6927        for (i, &b) in ver.iter().enumerate() {
6928            let word = 2 + i / 4;
6929            let shift = (i % 4) * 8;
6930            pre[word] |= (b as u32) << shift;
6931        }
6932        // Write prelude.
6933        for w in &pre {
6934            dfd.write_all(&w.to_le_bytes())?;
6935        }
6936        // c:3356 — per-fn header records.
6937        for wcf in progs {
6938            let n = &wcf.name;
6939            let prog = &wcf.body;
6940            let mut head = fdhead {
6941                start: cur_hlen as u32,                                     // c:3360
6942                len: (prog.len() * 4) as u32,                               // c:3363
6943                npats: 0, // c:3364 (npats not tracked yet)
6944                strs: 0,  // c:3365
6945                hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
6946                flags: 0,
6947            };
6948            cur_hlen += prog.len() as i32; // c:3361
6949                                           // c:3368 — name tail offset from path basename.
6950            let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
6951            head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
6952                                                              // c:3373 — opposite-byte-order swap on second pass.
6953            let mut head_words: Vec<u32> = vec![
6954                head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
6955            ];
6956            if other != 0 {
6957                fdswap(&mut head_words);
6958            }
6959            for w in &head_words {
6960                dfd.write_all(&w.to_le_bytes())?;
6961            }
6962            // c:3376 — write the name + NUL + pad-to-4.
6963            dfd.write_all(n.as_bytes())?;
6964            dfd.write_all(&[0u8])?;
6965            let pad = (4 - ((n.len() + 1) & 3)) & 3;
6966            if pad > 0 {
6967                dfd.write_all(&vec![0u8; pad])?;
6968            }
6969        }
6970        // c:3381 — per-fn body words.
6971        for wcf in progs {
6972            let mut body = wcf.body.clone();
6973            if other != 0 {
6974                fdswap(&mut body);
6975            }
6976            for w in &body {
6977                dfd.write_all(&w.to_le_bytes())?;
6978            }
6979        }
6980        if other != 0 {
6981            // c:3389
6982            break;
6983        }
6984        other = FDF_OTHER; // c:3391
6985    }
6986    Ok(())
6987}
6988
6989#[cfg(test)]
6990mod tests {
6991    use super::*;
6992    use crate::utils::{errflag, ERRFLAG_ERROR};
6993    use std::fs;
6994    use std::path::Path;
6995    use std::sync::atomic::Ordering;
6996    use std::sync::mpsc;
6997    use std::thread;
6998    use std::time::{Duration, Instant};
6999
7000    /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
7001    /// around a parse — see `Src/init.c:loop` which clears errflag
7002    /// before parse_event() and tests it after. Returns `Err` if the
7003    /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
7004    fn parse(input: &str) -> Result<ZshProgram, String> {
7005        let saved = errflag.load(Ordering::Relaxed);
7006        errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
7007        crate::ported::parse::parse_init(input);
7008        let prog = crate::ported::parse::parse();
7009        let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
7010        // Restore prior error bits; don't carry our new error into the
7011        // outer test runner.
7012        errflag.store(saved, Ordering::Relaxed);
7013        if had_err {
7014            Err("parse error".to_string())
7015        } else {
7016            Ok(prog)
7017        }
7018    }
7019
7020    #[test]
7021    fn test_simple_command() {
7022        let prog = parse("echo hello world").unwrap();
7023        assert_eq!(prog.lists.len(), 1);
7024        match &prog.lists[0].sublist.pipe.cmd {
7025            ZshCommand::Simple(s) => {
7026                assert_eq!(s.words, vec!["echo", "hello", "world"]);
7027            }
7028            _ => panic!("expected simple command"),
7029        }
7030    }
7031
7032    #[test]
7033    fn test_pipeline() {
7034        let prog = parse("ls | grep foo | wc -l").unwrap();
7035        assert_eq!(prog.lists.len(), 1);
7036
7037        let pipe = &prog.lists[0].sublist.pipe;
7038        assert!(pipe.next.is_some());
7039
7040        let pipe2 = pipe.next.as_ref().unwrap();
7041        assert!(pipe2.next.is_some());
7042    }
7043
7044    #[test]
7045    fn test_and_or() {
7046        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
7047        let sublist = &prog.lists[0].sublist;
7048
7049        assert!(sublist.next.is_some());
7050        let (op, _) = sublist.next.as_ref().unwrap();
7051        assert_eq!(*op, SublistOp::And);
7052    }
7053
7054    #[test]
7055    fn test_if_then() {
7056        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
7057        match &prog.lists[0].sublist.pipe.cmd {
7058            ZshCommand::If(_) => {}
7059            _ => panic!("expected if command"),
7060        }
7061    }
7062
7063    #[test]
7064    fn test_for_loop() {
7065        let prog = parse("for i in a b c; do echo $i; done").unwrap();
7066        match &prog.lists[0].sublist.pipe.cmd {
7067            ZshCommand::For(f) => {
7068                assert_eq!(f.var, "i");
7069                match &f.list {
7070                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
7071                    _ => panic!("expected word list"),
7072                }
7073            }
7074            _ => panic!("expected for command"),
7075        }
7076    }
7077
7078    #[test]
7079    fn test_case() {
7080        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
7081        match &prog.lists[0].sublist.pipe.cmd {
7082            ZshCommand::Case(c) => {
7083                assert_eq!(c.arms.len(), 2);
7084            }
7085            _ => panic!("expected case command"),
7086        }
7087    }
7088
7089    #[test]
7090    fn test_function() {
7091        // First test just parsing "function foo" to see what happens
7092        let prog = parse("function foo { }").unwrap();
7093        match &prog.lists[0].sublist.pipe.cmd {
7094            ZshCommand::FuncDef(f) => {
7095                assert_eq!(f.names, vec!["foo"]);
7096            }
7097            _ => panic!(
7098                "expected function, got {:?}",
7099                prog.lists[0].sublist.pipe.cmd
7100            ),
7101        }
7102    }
7103
7104    #[test]
7105    fn test_redirection() {
7106        let prog = parse("echo hello > file.txt").unwrap();
7107        match &prog.lists[0].sublist.pipe.cmd {
7108            ZshCommand::Simple(s) => {
7109                assert_eq!(s.redirs.len(), 1);
7110                assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
7111            }
7112            _ => panic!("expected simple command"),
7113        }
7114    }
7115
7116    #[test]
7117    fn test_assignment() {
7118        let prog = parse("FOO=bar echo $FOO").unwrap();
7119        match &prog.lists[0].sublist.pipe.cmd {
7120            ZshCommand::Simple(s) => {
7121                assert_eq!(s.assigns.len(), 1);
7122                assert_eq!(s.assigns[0].name, "FOO");
7123            }
7124            _ => panic!("expected simple command"),
7125        }
7126    }
7127
7128    #[test]
7129    fn test_parse_completion_function() {
7130        let input = r#"_2to3_fixes() {
7131  local -a fixes
7132  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
7133  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
7134}"#;
7135        let result = parse(input);
7136        assert!(
7137            result.is_ok(),
7138            "Failed to parse completion function: {:?}",
7139            result.err()
7140        );
7141        let prog = result.unwrap();
7142        assert!(
7143            !prog.lists.is_empty(),
7144            "Expected at least one list in program"
7145        );
7146    }
7147
7148    #[test]
7149    fn test_parse_array_with_complex_elements() {
7150        let input = r#"arguments=(
7151  '(- * :)'{-h,--help}'[show this help message and exit]'
7152  {-d,--doctests_only}'[fix up doctests only]'
7153  '*:filename:_files'
7154)"#;
7155        let result = parse(input);
7156        assert!(
7157            result.is_ok(),
7158            "Failed to parse array assignment: {:?}",
7159            result.err()
7160        );
7161    }
7162
7163    #[test]
7164    fn test_parse_full_completion_file() {
7165        let input = r##"#compdef 2to3
7166
7167# zsh completions for '2to3'
7168
7169_2to3_fixes() {
7170  local -a fixes
7171  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
7172  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
7173}
7174
7175local -a arguments
7176
7177arguments=(
7178  '(- * :)'{-h,--help}'[show this help message and exit]'
7179  {-d,--doctests_only}'[fix up doctests only]'
7180  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
7181  {-j,--processes}'[run 2to3 concurrently]:number: '
7182  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
7183  {-l,--list-fixes}'[list available transformations]'
7184  {-p,--print-function}'[modify the grammar so that print() is a function]'
7185  {-v,--verbose}'[more verbose logging]'
7186  '--no-diffs[do not show diffs of the refactoring]'
7187  {-w,--write}'[write back modified files]'
7188  {-n,--nobackups}'[do not write backups for modified files]'
7189  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
7190  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
7191  '--add-suffix[append this string to all output filenames]:suffix: '
7192  '*:filename:_files'
7193)
7194
7195_arguments -s -S $arguments
7196"##;
7197        let result = parse(input);
7198        assert!(
7199            result.is_ok(),
7200            "Failed to parse full completion file: {:?}",
7201            result.err()
7202        );
7203        let prog = result.unwrap();
7204        // Should have parsed successfully with at least one statement
7205        assert!(!prog.lists.is_empty(), "Expected at least one list");
7206    }
7207
7208    #[test]
7209    fn test_parse_logs_sh() {
7210        let input = r#"#!/usr/bin/env bash
7211shopt -s globstar
7212
7213if [[ $(uname) == Darwin ]]; then
7214    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
7215else
7216    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
7217        tail -f /var/log/**/*.log | lolcat
7218    else
7219        printf "Unsupported...\n" >&2
7220    fi
7221fi
7222"#;
7223        let result = parse(input);
7224        assert!(
7225            result.is_ok(),
7226            "Failed to parse logs.sh: {:?}",
7227            result.err()
7228        );
7229    }
7230
7231    #[test]
7232    fn test_parse_case_with_glob() {
7233        let input = r#"case "$ZPWR_OS_TYPE" in
7234    darwin*)  open_cmd='open'
7235      ;;
7236    cygwin*)  open_cmd='cygstart'
7237      ;;
7238    linux*)
7239        open_cmd='xdg-open'
7240      ;;
7241esac"#;
7242        let result = parse(input);
7243        assert!(
7244            result.is_ok(),
7245            "Failed to parse case with glob: {:?}",
7246            result.err()
7247        );
7248    }
7249
7250    #[test]
7251    fn test_parse_case_with_nested_if() {
7252        // Test case with nested if and glob patterns
7253        let input = r##"function zpwrGetOpenCommand(){
7254    local open_cmd
7255    case "$ZPWR_OS_TYPE" in
7256        darwin*)  open_cmd='open' ;;
7257        cygwin*)  open_cmd='cygstart' ;;
7258        linux*)
7259            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
7260                open_cmd='nohup xdg-open'
7261            fi
7262            ;;
7263    esac
7264}"##;
7265        let result = parse(input);
7266        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
7267    }
7268
7269    #[test]
7270    fn test_parse_zpwr_scripts() {
7271        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
7272        if !scripts_dir.exists() {
7273            eprintln!("Skipping test: scripts directory not found");
7274            return;
7275        }
7276
7277        let mut total = 0;
7278        let mut passed = 0;
7279        let mut failed_files = Vec::new();
7280        let mut timeout_files = Vec::new();
7281
7282        for ext in &["sh", "zsh"] {
7283            let pattern = scripts_dir.join(format!("*.{}", ext));
7284            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
7285                for entry in entries.flatten() {
7286                    total += 1;
7287                    let file_path = entry.display().to_string();
7288                    let content = match fs::read_to_string(&entry) {
7289                        Ok(c) => c,
7290                        Err(e) => {
7291                            failed_files.push((file_path, format!("read error: {}", e)));
7292                            continue;
7293                        }
7294                    };
7295
7296                    // Parse with timeout
7297                    let content_clone = content.clone();
7298                    let (tx, rx) = mpsc::channel();
7299                    let handle = thread::spawn(move || {
7300                        let result = parse(&content_clone);
7301                        let _ = tx.send(result);
7302                    });
7303
7304                    match rx.recv_timeout(Duration::from_secs(2)) {
7305                        Ok(Ok(_)) => passed += 1,
7306                        Ok(Err(err)) => {
7307                            failed_files.push((file_path, err));
7308                        }
7309                        Err(_) => {
7310                            timeout_files.push(file_path);
7311                            // Thread will be abandoned
7312                        }
7313                    }
7314                }
7315            }
7316        }
7317
7318        eprintln!("\n=== ZPWR Scripts Parse Results ===");
7319        eprintln!("Passed: {}/{}", passed, total);
7320
7321        if !timeout_files.is_empty() {
7322            eprintln!("\nTimeout files (>2s):");
7323            for file in &timeout_files {
7324                eprintln!("  {}", file);
7325            }
7326        }
7327
7328        if !failed_files.is_empty() {
7329            eprintln!("\nFailed files:");
7330            for (file, err) in &failed_files {
7331                eprintln!("  {} - {}", file, err);
7332            }
7333        }
7334
7335        // Allow some failures initially, but track progress
7336        let pass_rate = if total > 0 {
7337            (passed as f64 / total as f64) * 100.0
7338        } else {
7339            0.0
7340        };
7341        eprintln!("Pass rate: {:.1}%", pass_rate);
7342
7343        // Require at least 50% pass rate for now
7344        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
7345    }
7346
7347    #[test]
7348    #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
7349    fn test_parse_zsh_stdlib_functions() {
7350        let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
7351        if !functions_dir.exists() {
7352            eprintln!(
7353                "Skipping test: zsh_functions directory not found at {:?}",
7354                functions_dir
7355            );
7356            return;
7357        }
7358
7359        let mut total = 0;
7360        let mut passed = 0;
7361        let mut failed_files = Vec::new();
7362        let mut timeout_files = Vec::new();
7363
7364        if let Ok(entries) = fs::read_dir(&functions_dir) {
7365            for entry in entries.flatten() {
7366                let path = entry.path();
7367                if !path.is_file() {
7368                    continue;
7369                }
7370
7371                total += 1;
7372                let file_path = path.display().to_string();
7373                let content = match fs::read_to_string(&path) {
7374                    Ok(c) => c,
7375                    Err(e) => {
7376                        failed_files.push((file_path, format!("read error: {}", e)));
7377                        continue;
7378                    }
7379                };
7380
7381                // Parse with timeout
7382                let content_clone = content.clone();
7383                let (tx, rx) = mpsc::channel();
7384                thread::spawn(move || {
7385                    let result = parse(&content_clone);
7386                    let _ = tx.send(result);
7387                });
7388
7389                match rx.recv_timeout(Duration::from_secs(2)) {
7390                    Ok(Ok(_)) => passed += 1,
7391                    Ok(Err(err)) => {
7392                        failed_files.push((file_path, err));
7393                    }
7394                    Err(_) => {
7395                        timeout_files.push(file_path);
7396                    }
7397                }
7398            }
7399        }
7400
7401        eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
7402        eprintln!("Passed: {}/{}", passed, total);
7403
7404        if !timeout_files.is_empty() {
7405            eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
7406            for file in timeout_files.iter().take(10) {
7407                eprintln!("  {}", file);
7408            }
7409            if timeout_files.len() > 10 {
7410                eprintln!("  ... and {} more", timeout_files.len() - 10);
7411            }
7412        }
7413
7414        if !failed_files.is_empty() {
7415            eprintln!("\nFailed files: {}", failed_files.len());
7416            for (file, err) in failed_files.iter().take(20) {
7417                let filename = Path::new(file)
7418                    .file_name()
7419                    .unwrap_or_default()
7420                    .to_string_lossy();
7421                eprintln!("  {} - {}", filename, err);
7422            }
7423            if failed_files.len() > 20 {
7424                eprintln!("  ... and {} more", failed_files.len() - 20);
7425            }
7426        }
7427
7428        let pass_rate = if total > 0 {
7429            (passed as f64 / total as f64) * 100.0
7430        } else {
7431            0.0
7432        };
7433        eprintln!("Pass rate: {:.1}%", pass_rate);
7434
7435        // Require at least 50% pass rate
7436        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
7437    }
7438}