zsh/ported/
parse.rs

1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free fns (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10    lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11    DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12    DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13    FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14    IS_REDIROP, LEXERR, NEWLIN, NOCORRECT, NULLTOK, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15    OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16    STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19    eprog, estate, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang, Inpar,
20    Outang, Outpar, Stringg, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT,
21    COND_OR, COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ,
22    CSHJUNKIELOOPS,
23    EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
24    PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW,
25    REDIR_FROM_HEREDOC_MASK, REDIR_VARID_MASK, REDIR_ERRAPP,
26    REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_HEREDOC, REDIR_HEREDOCDASH,
27    REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE, REDIR_READ,
28    REDIR_READWRITE, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS, SHORTREPEAT, WCB_COND, WCB_SIMPLE,
29    WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE, WC_REDIR_VARID, WC_SUBLIST_COPROC,
30    WC_SUBLIST_NOT,
31};
32use crate::ported::utils::{zerr, zwarnnam};
33use serde::{Deserialize, Serialize};
34use std::fs::File;
35use std::io::{Read, Seek, SeekFrom, Write};
36use std::sync::atomic::{AtomicUsize, Ordering};
37
38// Direct port of `Src/parse.c:287-289` grow-policy constants.
39const EC_INIT_SIZE: i32 = 256;
40
41// Pending-here-document list — direct port of `Src/parse.c:84
42// struct heredocs *hdocs;`. Per-parser file-static (bucket-1 in
43// PORT_PLAN.md): each worker thread parsing a separate program needs
44// its own pending-heredoc list. Saved/restored across nested parses
45// by `parse_context_save`/`parse_context_restore` (parse.c:299/337).
46thread_local! {
47    /// Port of file-static `struct heredocs *hdocs;` from `Src/parse.c:84`.
48    pub static HDOCS: std::cell::RefCell<Option<Box<crate::ported::zsh_h::heredocs>>>
49        = const { std::cell::RefCell::new(None) };
50}
51
52// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
53// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
54// thread parsing a separate program needs its own wordcode buffer.
55//
56// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
57// (parse.c:275).
58// ECLEN: allocated entries in ECBUF (parse.c:269).
59// ECUSED: entries actually used so far (parse.c:271).
60// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
61// ECSOFFS / ECSSUB: byte offsets into the string region
62// (parse.c:279). ECSSUB subtracts substring overlap.
63// ECNFUNC: count of functions defined so far (parse.c:285).
64// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
65// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
66// at zsh_h::eccstr but stays unused at runtime here. The HashMap
67// preserves the API contract (lookup by (nfunc, str) → offs) with
68// simpler ownership semantics.
69thread_local! {
70    pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
71    static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
72    static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
73    static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
74    static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
75    static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
76    static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
77    static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
78        = std::cell::RefCell::new(std::collections::HashMap::new());
79    /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
80    /// a hashval-ordered binary search tree of long-strings for
81    /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
82    /// HashMap above is a fast-path lookup; this tree is the
83    /// C-fidelity walker that mirrors C's exact dedup-hit pattern
84    /// (including its quirks for hash-colliding content).
85    static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
86        = const { std::cell::RefCell::new(None) };
87    /// Reverse index for `ecgetstr`: offs → owned string. Populated
88    /// at ecstrcode time so the consumer can recover the string from
89    /// the wordcode offs without walking the encode-time HashMap.
90    /// Stores the METAFIED BYTE form of each long-string, exactly
91    /// matching what C's strs region holds. `String` would not work
92    /// here because Rust strings carry UTF-8-encoded chars (e.g.
93    /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
94    /// `\xc2 \x9b`) while C stores zsh markers as single bytes
95    /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
96    /// what C writes after metafy.
97    pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
98        = std::cell::RefCell::new(std::collections::HashMap::new());
99}
100const EC_DOUBLE_THRESHOLD: i32 = 32768;
101const EC_INCREMENT: i32 = 1024;
102
103/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
104/// Snapshots the lexer-side file-statics (which currently live on
105/// `lexer` until Phase 7 dissolution makes them file-scope
106/// thread_local!s) plus the pending heredoc list, plus the
107/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
108/// recursion counters too so nested parses get fresh limits.
109/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
110pub fn parse_context_save(ps: &mut parse_stack) {
111    // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;` — save the
112    // canonical C linked-list and clear it for the nested parse.
113    ps.hdocs = HDOCS.with_borrow_mut(|h| h.take());
114    // zshrs-only: save the parallel AST-glue Vec the same way.
115    // LEX_HEREDOCS carries terminator/strip_tabs/quoted metadata
116    // that has no C analog (C stores it implicitly via tokstr).
117    ps.lex_heredocs = crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| std::mem::take(v));
118    // parse.c:302-310 — save lexer-side state.
119    ps.incmdpos = incmdpos();
120    // parse.c:303 — aliasspaceflag — not yet a LEX_* thread_local.
121    // STUB; Phase 7 wires it. Same for the few below marked STUB.
122    ps.aliasspaceflag = 0;
123    ps.incond = incond();
124    ps.inredir = inredir();
125    ps.incasepat = incasepat();
126    ps.isnewlin = isnewlin();
127    ps.infor = infor();
128    ps.inrepeat_ = inrepeat();
129    ps.intypeset = intypeset();
130    // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
131    // (zshrs has no ecbuf yet).
132    ps.eclen = 0;
133    ps.ecused = 0;
134    ps.ecnpats = 0;
135    ps.ecbuf = None;
136    ps.ecstrs = None;
137    ps.ecsoffs = 0;
138    ps.ecssub = 0;
139    ps.ecnfunc = 0;
140    set_incmdpos(true);
141    set_incond(0);
142    set_inredir(false);
143    set_incasepat(0);
144    set_infor(0);
145    set_inrepeat(0);
146    set_intypeset(false);
147}
148
149/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
150/// Inverse of `parse_context_save`. Restores lexer-side state +
151/// pending heredocs + Rust-only counters from `ps`, then clears
152/// `errflag & ERRFLAG_ERROR` per parse.c:354.
153/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
154pub fn parse_context_restore(ps: &parse_stack) {
155    // parse.c:330-331 — free any in-progress wordcode buffer.
156    // zshrs has no wordcode yet (STUB until Phase 9b); the AST
157    // nodes are owned by their parent so dropping the parser
158    // frees them.
159
160    // parse.c:333-352 — restore saved state.
161    // parse.c:337 — `hdocs = ps->hdocs;`
162    HDOCS.with_borrow_mut(|h| *h = ps.hdocs.clone());
163    // zshrs-only: restore the parallel AST-glue Vec.
164    crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| *v = ps.lex_heredocs.clone());
165    set_incmdpos(ps.incmdpos);
166    // aliasspaceflag STUB until Phase 7.
167    set_incond(ps.incond);
168    set_inredir(ps.inredir);
169    set_incasepat(ps.incasepat);
170    set_isnewlin(ps.isnewlin);
171    set_infor(ps.infor);
172    set_inrepeat(ps.inrepeat_);
173    set_intypeset(ps.intypeset);
174    // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
175    // STUB until Phase 9b.
176
177    // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
178    // error flag so the outer parse sees a clean state.
179    crate::ported::utils::errflag.fetch_and(
180        !crate::ported::utils::ERRFLAG_ERROR,
181        std::sync::atomic::Ordering::Relaxed,
182    );
183}
184
185/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
186/// the pending-heredocs list and bump each `pc` by `d` if it's
187/// at or after position `p`. Called by `ecispace` / `ecdel` when
188/// wordcodes shift.
189#[allow(unused_variables)]
190pub fn ecadjusthere(p: usize, d: i32) {
191    // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
192    // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
193    // Vec<HereDoc> on the lexer (pre-P9c migration); since none
194    // of them carry a wordcode pc today (the AST tree has no pc
195    // slots), this is a no-op until Phase 9c wires
196    // `hdocs.pc` into wordcode emission.
197}
198
199// === AST tree relocated to src/extensions/zsh_ast.rs ===
200//
201// zsh C does NOT have an AST tree — it emits wordcode directly via
202// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
203// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
204// Shell* AST node types lived in this file as a Rust-only IR that
205// stands in for that wordcode.
206//
207// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
208// to make their Rust-only-extension nature explicit. The full P9c +
209// P9d rewrite (par_* emitting wordcode + exec.rs reading wordcode)
210// retires them entirely — until then, callers reach them via this
211// re-export.
212pub use crate::heredoc_ast::HereDoc;
213pub use crate::zsh_ast::{
214    CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
215    Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
216    VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
217    ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
218    ZshTry, ZshWhile,
219};
220use crate::ported::lex::{
221    incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset,
222    isnewlin, lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond, set_lineno,
223    set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_noaliases,
224    set_nocorrect, set_pos, set_tokfd, set_toklineno, set_tokstr, tok, tokfd, toklineno, tokstr, zshlex,
225};
226use crate::prompt::{cmdpop, cmdpush};
227use crate::zsh_h::{
228    wc_bdata, CS_ALWAYS, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF, CS_ELSE,
229    CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT, CS_SELECT,
230    CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH, WCB_END,
231    WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
232    WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY, WC_ASSIGN_INC,
233    WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR, WC_CASE_TESTAND,
234    WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_ELIF, WC_IF_ELSE, WC_IF_HEAD, WC_IF_IF,
235    WC_PIPE_END, WC_PIPE_LINENO,
236    WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST, WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END,
237    WC_SUBLIST_FLAGS, WC_SUBLIST_OR, WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY,
238    WC_TIMED_PIPE, WC_WHILE_UNTIL, WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
239};
240
241/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
242/// empty wordcode slots at position `p`, shifting later entries
243/// right, growing the buffer as needed, adjusting heredoc pointers.
244pub fn ecispace(p: usize, n: usize) {
245    // parse.c:376-381 — grow if needed.
246    let need = n as i32;
247    if (ECLEN.get() - ECUSED.get()) < need {
248        let cur = ECLEN.get();
249        let mut a = if cur < EC_DOUBLE_THRESHOLD {
250            cur
251        } else {
252            EC_INCREMENT
253        };
254        if need > a {
255            a = need;
256        }
257        ECBUF.with_borrow_mut(|buf| {
258            buf.resize((cur + a) as usize, 0);
259        });
260        ECLEN.set(cur + a);
261    }
262    // parse.c:382-385 — memmove p → p+n, gap of n.
263    let m = ECUSED.get() as usize - p;
264    if m > 0 {
265        ECBUF.with_borrow_mut(|buf| {
266            let needed = (ECUSED.get() as usize) + n;
267            if buf.len() < needed {
268                buf.resize(needed, 0);
269            }
270            for i in (0..m).rev() {
271                buf[p + n + i] = buf[p + i];
272            }
273            for i in 0..n {
274                buf[p + i] = 0;
275            }
276        });
277    }
278    // parse.c:386 — bump ecused by n.
279    ECUSED.set(ECUSED.get() + need);
280    // parse.c:387 — `ecadjusthere(p, n)`.
281    ecadjusthere(p, need);
282}
283
284/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
285/// the wordcode buffer with grow-on-demand, return the new index.
286pub fn ecadd(c: u32) -> usize {
287    // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
288    if (ECLEN.get() - ECUSED.get()) < 1 {
289        let cur = ECLEN.get();
290        let a = if cur < EC_DOUBLE_THRESHOLD {
291            cur
292        } else {
293            EC_INCREMENT
294        };
295        ECBUF.with_borrow_mut(|buf| {
296            buf.resize((cur + a) as usize, 0);
297        });
298        ECLEN.set(cur + a);
299    }
300    let idx = ECUSED.get();
301    ECBUF.with_borrow_mut(|buf| {
302        if (idx as usize) >= buf.len() {
303            buf.resize((idx + 1) as usize, 0);
304        }
305        buf[idx as usize] = c;
306    });
307    ECUSED.set(idx + 1);
308    idx as usize
309}
310
311/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
312/// wordcode at position `p`, shift later entries left by one,
313/// decrement ecused, adjust pending heredoc pointers.
314pub fn ecdel(p: usize) {
315    // parse.c:415-418 — memmove + decrement ecused.
316    let n = ECUSED.get() as usize - p - 1;
317    if n > 0 {
318        ECBUF.with_borrow_mut(|buf| {
319            for i in 0..n {
320                buf[p + i] = buf[p + i + 1];
321            }
322        });
323    }
324    ECUSED.set(ECUSED.get() - 1);
325    // parse.c:420 — `ecadjusthere(p, -1)`.
326    ecadjusthere(p, -1);
327}
328
329/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
330/// string into a single wordcode (short strings ≤4 bytes packed
331/// inline; longer strings get an offset into the deduped registry).
332///
333/// The long-string path stores the METAFIED bytes (matches what C's
334/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
335/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
336/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
337/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
338/// is already metafied at this point.
339pub fn ecstrcode(s: &str) -> u32 {
340    // Convert Rust char-form → C-byte form. zsh's metafy() at
341    // Src/utils.c only converts bytes flagged IMETA: 0x00, 0x83
342    // (Meta itself), and 0x84..=0xa2 (Pound..Marker, the lex
343    // markers). Other bytes 0x01..=0x82 and 0xa3..=0xff pass
344    // through unchanged. See utils.c:4195-4204 typtab init.
345    //
346    // Rust receives chars. Classify each:
347    //   - codepoint in [0x83..=0xa2] → marker char (emitted by lex
348    //     post-metafy in C); 1 byte unchanged
349    //   - codepoint < 0x80 → ASCII, 1 byte unchanged
350    //   - codepoint in [0x80..=0x82] or [0xa3..=0xff] → single
351    //     non-imeta byte (user-input range); 1 byte unchanged
352    //   - codepoint > 0xff → multi-byte UTF-8 source char (e.g.
353    //     '━' = U+2501 = 0xe2 0x94 0x81). Metafy ONLY the bytes
354    //     that fall in 0x83..=0xa2; pass others through. For '━':
355    //     0xe2 stays, 0x94 → 0x83 0xb4, 0x81 stays.
356    let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
357    let imeta = |b: u8| -> bool { b == 0 || (0x83..=0xa2).contains(&b) };
358    for ch in s.chars() {
359        let cu = ch as u32;
360        if cu < 0x80 {
361            // ASCII — single byte unchanged.
362            c_bytes.push(cu as u8);
363        } else if (0x83..=0xa2).contains(&cu) {
364            // Lex marker char (emitted by lex.add(Marker) post-metafy
365            // in C). Stored as single byte.
366            c_bytes.push(cu as u8);
367        } else {
368            // User-input char: encode UTF-8 then metafy imeta bytes.
369            // For chars 0x80..=0xff (like 'º' U+00BA), UTF-8 gives
370            // 2 bytes (e.g. `0xc2 0xba`) — zsh's lex reads these as
371            // raw bytes from input and metafy passes 0xc2 / 0xba
372            // through (both NOT imeta).
373            let mut tmp = [0u8; 4];
374            for &b in ch.encode_utf8(&mut tmp).as_bytes() {
375                if imeta(b) {
376                    c_bytes.push(0x83);
377                    c_bytes.push(b ^ 0x20);
378                } else {
379                    c_bytes.push(b);
380                }
381            }
382        }
383    }
384    // c:`has_token` (Src/utils.c:2282) → `itok(*s)` → `typtab[c] & ITOK`.
385    // ITOK is set for bytes `Pound..=Nularg` (0x84..=0xa1) per
386    // Src/utils.c:4198 (`for (t0=Pound; t0<=LAST_NORMAL_TOK; t0++)
387    // typtab[t0]|=ITOK`) plus :4200 (`for (t0=Snull; t0<=Nularg; t0++)
388    // typtab[t0]|=ITOK|IMETA|INULL`). Pound=0x84 Bang=0x9c (last normal),
389    // Snull=0x9d..Nularg=0xa1. Meta=0x83 has IMETA but NOT ITOK.
390    let t = c_bytes.iter().any(|&b| (0x84..=0xa1).contains(&b));
391    let l = c_bytes.len() + 1; // include NUL terminator
392    if l <= 4 {
393        // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
394        // (NOT metafied — the inline packing stores 1 byte per slot).
395        let mut c: u32 = if t { 3 } else { 2 };
396        match l {
397            4 => {
398                c |= (c_bytes[2] as u32) << 19;
399                c |= (c_bytes[1] as u32) << 11;
400                c |= (c_bytes[0] as u32) << 3;
401            }
402            3 => {
403                c |= (c_bytes[1] as u32) << 11;
404                c |= (c_bytes[0] as u32) << 3;
405            }
406            2 => {
407                c |= (c_bytes[0] as u32) << 3;
408            }
409            1 => {
410                // parse.c:443 — empty string special case.
411                c = if t { 7 } else { 6 };
412            }
413            _ => {}
414        }
415        c
416    } else {
417        // parse.c:447-466 — long string. Port of C's eccstr BST walk
418        // exactly: walk the tree comparing nfunc, then hashval, then
419        // strcmp on bytes. Return offs on full match; insert new
420        // leaf otherwise. Matches C's exact dedup-hit pattern
421        // (which is content-dependent — hash collisions and the
422        // lazy short-circuit cmp chain make the tree shape determine
423        // whether matching nodes are reachable).
424        // hasher is byte-by-byte polynomial (hashtable.c:86); pass
425        // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
426        // bytes feed straight in. SAFETY: hasher only iterates
427        // `.bytes()` — no UTF-8 validity assumed.
428        let val = crate::ported::hashtable::hasher(unsafe {
429            std::str::from_utf8_unchecked(&c_bytes)
430        });
431        let nfunc = ECNFUNC.get();
432        let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
433            // Walk the tree. At each node, if all 3 cmps == 0,
434            // return the node's offs. Otherwise descend left/right
435            // by the first non-zero cmp's sign.
436            let mut cur: &mut Option<Box<EccstrNode>> = root;
437            loop {
438                let p = match cur.as_mut() {
439                    Some(p) => p,
440                    None => break None,
441                };
442                // c:448 — `cmp = p->nfunc - ecnfunc`
443                let mut cmp = (p.nfunc as i64) - (nfunc as i64);
444                if cmp == 0 {
445                    // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
446                    // C does `(int)(p->hashval - val)` — unsigned 32-bit
447                    // subtraction wraps, then cast to int. Use
448                    // wrapping_sub + as i32 to match the bit pattern.
449                    cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
450                    if cmp == 0 {
451                        // c:448 — `&& !(cmp = strcmp(p->str, s))`
452                        cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
453                            std::cmp::Ordering::Less => -1,
454                            std::cmp::Ordering::Equal => 0,
455                            std::cmp::Ordering::Greater => 1,
456                        };
457                        if cmp == 0 {
458                            // c:450 — `return p->offs;`
459                            break Some(p.offs);
460                        }
461                    }
462                }
463                // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
464                cur = if cmp < 0 { &mut p.left } else { &mut p.right };
465            }
466        });
467        if let Some(offs) = found_offs {
468            return offs;
469        }
470        // c:462 — `p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);`
471        let offs =
472            (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
473        // c:463 — `p->aoffs = ecsoffs;` (absolute write position).
474        let aoffs = ECSOFFS.get() as u32;
475        // c:457-465 — insert new node at the NULL slot the walk
476        // terminated at. Encode the walk path as a Vec<bool> of
477        // left/right turns (true = right), then re-descend to
478        // insert. Borrow-checker friendly: a single mutable walk
479        // that either finds an existing node (descend) or fills
480        // the empty slot (return).
481        let stored = c_bytes.clone();
482        let stored_len = stored.len();
483        let new_node = Box::new(EccstrNode {
484            left: None,
485            right: None,
486            str: stored.clone(),
487            offs,
488            aoffs,
489            nfunc,
490            hashval: val,
491        });
492        ECSTRS_TREE.with_borrow_mut(|root| {
493            // Build the path first (immutable-walk; safe because we
494            // only ever go further down).
495            let mut path: Vec<bool> = Vec::new();
496            {
497                let mut cur: &Option<Box<EccstrNode>> = root;
498                while let Some(p) = cur.as_ref() {
499                    let mut cmp = (p.nfunc as i64) - (nfunc as i64);
500                    if cmp == 0 {
501                        // C does `(int)(p->hashval - val)` — unsigned 32-bit
502                    // subtraction wraps, then cast to int. Use
503                    // wrapping_sub + as i32 to match the bit pattern.
504                    cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
505                        if cmp == 0 {
506                            cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
507                                std::cmp::Ordering::Less => -1,
508                                std::cmp::Ordering::Equal => 0,
509                                std::cmp::Ordering::Greater => 1,
510                            };
511                        }
512                    }
513                    let go_right = cmp >= 0;
514                    path.push(go_right);
515                    cur = if go_right { &p.right } else { &p.left };
516                }
517            }
518            // Descend mutably along the recorded path and assign at
519            // the NULL leaf.
520            let mut cur: &mut Option<Box<EccstrNode>> = root;
521            for turn in path {
522                let p = cur.as_mut().expect("path matches walk");
523                cur = if turn { &mut p.right } else { &mut p.left };
524            }
525            *cur = Some(new_node);
526        });
527        // Also keep the existing reverse index (offs → bytes) for
528        // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
529        ECSTRS_REVERSE.with_borrow_mut(|m| {
530            m.insert(offs, stored);
531        });
532        let _ = l;
533        ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
534        offs
535    }
536}
537
538/// Initialize parser status. Direct port of zsh/Src/parse.c:491
539/// `init_parse_status`. Clears the per-parse-call lexer flags
540/// so a fresh parse starts from cmd-position with no nesting
541/// state inherited from a prior parse.
542///
543/// Previously the Rust port omitted `inrepeat_ = 0` at c:501.
544/// `inrepeat_` is the `repeat N <body>` parse-state counter that
545/// the lexer toggles in 3 phases (1 → 2 → 3 → 0). Without the
546/// reset, a fresh parse called after an in-flight `repeat`
547/// command would inherit the stale counter and silently misread
548/// the next token as a body of an already-completed repeat.
549pub fn init_parse_status() {                                                  // c:491
550    // parse.c:500-502 — `incasepat = incond = inredir = infor =
551    // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
552    set_incasepat(0);                                                         // c:500
553    set_incond(0);                                                            // c:500
554    set_inredir(false);                                                       // c:500
555    set_infor(0);                                                             // c:500
556    set_intypeset(false);                                                     // c:500
557    crate::ported::lex::set_inrepeat(0);                                      // c:501 inrepeat_ = 0
558    set_incmdpos(true);                                                       // c:502
559}
560
561/// Initialize parser for a fresh parse. Direct port of
562/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
563/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
564/// per-parse-call counters, and calls init_parse_status. zshrs
565/// has no flat wordcode buffer (AST is built inline) so this
566/// function reduces to init_parse_status + recursion_depth/
567/// global_iterations clear.
568pub fn init_parse() {
569    // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
570    // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
571    // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
572    // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
573    // buffer for this parse call. zshrs uses thread-local
574    // statics declared at file scope (parse.rs:25-50).
575    ECBUF.with_borrow_mut(|buf| {
576        buf.clear();
577        buf.resize(EC_INIT_SIZE as usize, 0);
578    });
579    ECLEN.set(EC_INIT_SIZE);
580    ECUSED.set(0);
581    ECNPATS.set(0);
582    ECSOFFS.set(0);
583    ECSSUB.set(0);
584    ECNFUNC.set(0);
585    ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
586    ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
587    ECSTRS_TREE.with_borrow_mut(|t| *t = None);
588
589    // parse.c:522 — `init_parse_status();`
590    init_parse_status();
591}
592
593/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
594/// Walks the BST and writes each entry to `p[s->aoffs..]` matching
595/// C's recursive in-order traversal exactly. The old impl used the
596/// `ECSTRS_REVERSE` HashMap keyed by `offs` (= ecssub-relative
597/// wordcode-encoded offset), which collides across funcdef scopes:
598/// a string at relative offs=0 inside funcdef A and another at
599/// relative offs=0 inside funcdef B share the same key, so one
600/// overwrites the other.
601pub fn copy_ecstr(_table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
602    // c:537-544 — walk eccstr BST recursively, writing each node's
603    // str at p[node->aoffs..node->aoffs + strlen + 1] (NUL-terminated).
604    ECSTRS_TREE.with_borrow(|root| {
605        copy_ecstr_walk(root, p);
606    });
607}
608
609/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
610/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
611/// Resets the build state so a new parse can start.
612pub fn bld_eprog(heap: bool) -> crate::ported::zsh_h::eprog {
613    // c:547
614
615    // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
616    ecadd(0);
617
618    let ecused = ECUSED.with(|c| c.get()) as usize;
619    let ecnpats = ECNPATS.with(|c| c.get()) as usize;
620    let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
621
622    // c:557-559 — `ret->len = ((ecnpats * sizeof(Patprog)) +
623    //                            (ecused * sizeof(wordcode)) +
624    //                            ecsoffs);`
625    // sizeof(Patprog) = sizeof(struct patprog *) = pointer size.
626    // On 64-bit targets that's 8, on 32-bit that's 4. C's eprog
627    // ->len is the canonical value for parity tests, so we use
628    // the same arithmetic.
629    let prog_bytes = ecused * 4; // sizeof(wordcode) = 4
630    let len = (ecnpats * std::mem::size_of::<*const u8>()) + prog_bytes + ecsoffs;
631
632    // Snapshot the wordcode buffer + string table.
633    let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
634    let mut strs_bytes = vec![0u8; ecsoffs];
635    ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
636
637    // c:566 — store strs as raw bytes via from_utf8_unchecked so
638    // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
639    // `String::from_utf8_lossy` would replace them with U+FFFD
640    // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
641    // strs region. SAFETY: downstream consumers of `eprog.strs`
642    // index by byte offset (per the wordcode `(offs >> 2)` offset
643    // encoding) and call `.as_bytes()` — they never iterate as
644    // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
645    // in a String is safe in practice. C zsh's strs is `char *`
646    // with the same byte-not-char semantics.
647    let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
648    let ret = eprog {
649        flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
650        len: len as i32,                             // c:559
651        npats: ecnpats as i32,                       // c:561
652        nref: if heap { -1 } else { 1 },             // c:562
653        pats: Vec::new(),                            // c:563 dummy_patprog
654        prog: prog_words,                            // c:565
655        strs: Some(strs_string),
656        shf: None,
657        dump: None,
658    };
659
660    // c:577 — free ecbuf so next parse starts fresh.
661    ECBUF.with(|c| c.borrow_mut().clear());
662    ECLEN.with(|c| c.set(0));
663    ECUSED.with(|c| c.set(0));
664    ECNPATS.with(|c| c.set(0));
665    ECSOFFS.with(|c| c.set(0));
666    ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
667    ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
668    ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
669
670    ret
671}
672
673/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
674/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
675/// the eprog is empty when its prog buffer is missing or the
676/// first wordcode is the WC_END marker. Used by signal handlers
677/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
678/// an empty program.
679pub fn empty_eprog(p: &crate::ported::zsh_h::eprog) -> bool {
680    p.prog.is_empty() || p.prog[0] == crate::ported::zsh_h::WCB_END()
681}
682
683/// Clear pending here-document list. Direct port of
684/// `clear_hdocs(void)` from `Src/parse.c:591`. The C version walks
685/// `hdocs` and frees each node; Rust drops the `Box<heredocs>`
686/// chain automatically when the head is replaced with None.
687pub fn clear_hdocs() {                                                            // c:591
688    // c:593-598 — for (p = hdocs; p; p = n) { n = p->next; zfree(p); }
689    // c:599 — hdocs = NULL;
690    HDOCS.with_borrow_mut(|h| *h = None);
691    // zshrs-only: also drop the parallel AST-glue Vec. No C
692    // analog — LEX_HEREDOCS is Rust-only working-set state.
693    crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| v.clear());
694}
695
696/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
697/// 612-631 `parse_event`. Reads one event from the lexer (a
698/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
699/// returns the resulting ZshProgram.
700///
701/// `endtok` is the token that terminates the event — usually
702/// ENDINPUT, but for command-style substitutions the closing
703/// `)` (zsh's CMD_SUBST_CLOSE).
704///
705/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
706/// allocated wordcode program). zshrs returns a `ZshProgram`
707/// (AST root). Same role at the parse-output boundary.
708pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
709    // parse.c:616-619 — reset state and prime the lexer.
710    set_tok(ENDINPUT);
711    set_incmdpos(true);
712    zshlex();
713    // parse.c:620 — `init_parse();`
714    init_parse();
715
716    // parse.c:622-625 — drive par_event; on failure clear hdocs.
717    if !par_event(endtok) {
718        clear_hdocs();
719        return None;
720    }
721    // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
722    // parse for a substitution that doesn't need its own eprog.
723    // zshrs returns an empty program in that case (caller
724    // discards).
725    if endtok != ENDINPUT {
726        return Some(ZshProgram { lists: Vec::new() });
727    }
728    // parse.c:630 — `bld_eprog(1);` — build the final eprog.
729    // zshrs has already built the AST via parse_program_until,
730    // but parse_event uses par_event directly so we need to
731    // collect what par_event accumulated.
732    Some(parse_program_until(None))
733}
734
735/// Parse one event (sublist with optional separator). Direct
736/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
737/// an event was successfully parsed, false on EOF / endtok.
738///
739/// zshrs port note: the C version emits wordcodes via ecadd/
740/// set_list_code; zshrs's parser builds AST nodes via
741/// par_sublist + par_list. Same flow, different output.
742pub fn par_event(endtok: lextok) -> bool {
743    // parse.c:639-643 — skip leading SEPERs.
744    while tok() == SEPER {
745        // parse.c:640-641 — at top-level (endtok == ENDINPUT),
746        // a SEPER on a fresh line ends the event.
747        if isnewlin() > 0 && endtok == ENDINPUT {
748            return false;
749        }
750        zshlex();
751    }
752    // parse.c:644-647 — terminate on EOF or matching close-token.
753    if tok() == ENDINPUT {
754        return false;
755    }
756    if tok() == endtok {
757        return true;
758    }
759    // parse.c:649-... — drive par_sublist + handle terminator.
760    // zshrs's par_sublist already builds the AST node directly.
761    match par_sublist() {
762        Some(_) => {
763            // parse.c:651-693 — terminator handling. zshrs's
764            // par_list wraps this; for parse_event we just
765            // confirm the sublist parsed.
766            true
767        }
768        None => false,
769    }
770}
771
772/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
773/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
774/// `None` on syntax error.
775pub fn parse_list() -> Option<eprog> {
776    // c:697
777    set_tok(ENDINPUT);
778    init_parse();
779    zshlex();
780    let _ = par_list();
781    if tok() != ENDINPUT {
782        clear_hdocs();
783        set_tok(LEXERR);
784        yyerror("syntax error");
785        return None;
786    }
787    Some(bld_eprog(true))
788}
789
790/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
791/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
792/// `condlex` global must already point at `testlex` before entry.
793pub fn parse_cond() -> Option<eprog> {
794    // c:722
795    init_parse();
796    if par_cond().is_none() {
797        clear_hdocs();
798        return None;
799    }
800    Some(bld_eprog(true))
801}
802
803// ============================================================
804// Wordcode emission helpers (parse.c private helpers)
805//
806// Direct ports of zsh's wordcode-emission helpers in parse.c.
807// These write u32 opcodes into a flat `ecbuf` array thread-local
808// via ecadd / ecdel / ecispace / ecstrcode and friends. The
809// par_*_wordcode family at parse.rs:1700-3500 walks the lex
810// stream and emits a real wordcode buffer here.
811//
812// (The AST tree built by par_program / par_simple / etc. is a
813// separate path used by fusevm; see compile_zsh.rs for the AST
814// → fusevm-bytecode compiler.)
815// ============================================================
816
817/// Patch a list-placeholder wordcode with its actual opcode +
818/// jump distance. Direct port of zsh/Src/parse.c:738
819/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
820/// par_sublist runs, then comes back through set_list_code to
821/// rewrite the slot with WCB_LIST(type, distance) once the
822/// sublist's final length is known.
823///
824/// Port of `set_list_code(int p, int type, int cmplx)` from
825/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
826/// whether the sublist body is simple (single command, no
827/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
828/// header when possible, otherwise the plain WCB_LIST(type, 0).
829pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
830    let _ = wc_bdata;
831    // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
832    // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
833    let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
834    let z = type_code;
835    let qualifies = !cmplx
836        && (z == Z_SYNC || z == (Z_SYNC | Z_END))
837        && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
838    if qualifies {
839        // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
840        // & WC_SUBLIST_SIMPLE);`
841        let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
842        // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
843        let used = ECUSED.get() as usize;
844        let off = used.saturating_sub(2 + p);
845        ECBUF.with_borrow_mut(|b| {
846            if p < b.len() {
847                b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
848            }
849        });
850        // c:744 — `ecdel(p+1);`
851        ecdel(p + 1);
852        // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
853        if ispipe {
854            ECBUF.with_borrow_mut(|b| {
855                if p + 1 < b.len() {
856                    b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
857                }
858            });
859        }
860    } else {
861        // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
862        ECBUF.with_borrow_mut(|b| {
863            if p < b.len() {
864                b[p] = WCB_LIST(z as wordcode, 0);
865            }
866        });
867    }
868}
869
870/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
871/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
872/// When the sublist is non-complex (single command, no pipeline),
873/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
874/// `WC_PIPE_LINENO`.
875pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
876    if cmplx {
877        // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
878        ECBUF.with_borrow_mut(|b| {
879            if p < b.len() {
880                b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
881            }
882        });
883    } else {
884        // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
885        ECBUF.with_borrow_mut(|b| {
886            if p < b.len() {
887                b[p] = WCB_SUBLIST(
888                    type_code as wordcode,
889                    (flags as wordcode) | WC_SUBLIST_SIMPLE,
890                    skip as wordcode,
891                );
892            }
893        });
894        // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
895        ECBUF.with_borrow_mut(|b| {
896            if p + 1 < b.len() {
897                b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
898            }
899        });
900    }
901}
902
903/// Parse a list (sublist with optional & or ;).
904///
905/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
906/// par_list1 wrapper at parse.c:807-817).
907///
908/// **Structural divergence**: zsh's parse.c emits flat wordcode
909/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
910/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
911/// builds an AST node `ZshList { sublist, flags }` instead. The
912/// async/sync/disown discrimination at parse.c:785-790 maps to
913/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
914/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
915/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
916/// representation. This divergence is repository-wide: every
917/// `par_*` function emits wordcode in C, every `parse_*` builds
918/// AST in Rust. The compile_zsh module then traverses the AST to
919/// emit fusevm bytecode, which serves the same role as zsh's
920/// wordcode but with a different opcode set and execution model.
921fn par_list() -> Option<ZshList> {
922    let sublist = par_sublist()?;
923
924    let flags = match tok() {
925        AMPER => {
926            zshlex();
927            ListFlags {
928                async_: true,
929                disown: false,
930            }
931        }
932        AMPERBANG => {
933            zshlex();
934            ListFlags {
935                async_: true,
936                disown: true,
937            }
938        }
939        SEPER | SEMI | NEWLIN => {
940            zshlex();
941            ListFlags::default()
942        }
943        _ => ListFlags::default(),
944    };
945
946    Some(ZshList { sublist, flags })
947}
948
949/// Parse one list — non-recursing variant. Direct port of
950/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
951/// doesn't recurse on the trailing-separator path; used by
952/// callers that only want one statement (e.g. each arm of a
953/// case body).
954pub fn par_list1() -> Option<ZshSublist> {
955    // parse.c:810-816 — body is a single par_sublist call wrapped
956    // in the eu/ecused tracking that zshrs doesn't need (no
957    // wordcode buffer).
958    par_sublist()
959}
960
961/// Parse a sublist (pipelines connected by && or ||).
962///
963/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
964/// par_sublist2 at parse.c:869-892. par_sublist handles the
965/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
966/// handles the leading `!` negation and `coproc` keyword.
967///
968/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
969/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
970/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
971fn par_sublist() -> Option<ZshSublist> {
972    let mut flags = SublistFlags::default();
973
974    // Handle coproc and !
975    if tok() == COPROC {
976        flags.coproc = true;
977        zshlex();
978    } else if tok() == BANG_TOK {
979        flags.not = true;
980        zshlex();
981    }
982
983    let pipe = par_pline()?;
984
985    // Check for && or ||
986    let next = match tok() {
987        DAMPER => {
988            zshlex();
989            skip_separators();
990            par_sublist().map(|s| (SublistOp::And, Box::new(s)))
991        }
992        DBAR => {
993            zshlex();
994            skip_separators();
995            par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
996        }
997        _ => None,
998    };
999
1000    Some(ZshSublist { pipe, next, flags })
1001}
1002
1003/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
1004/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
1005/// in front of a pline. Returns the WC_SUBLIST flag word added.
1006pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
1007    // c:870 — `int f = 0;`
1008    let mut f: i32 = 0;
1009    // c:873-880 — COPROC / BANG prefix flags.
1010    if tok() == COPROC {
1011        *cmplx = 1;
1012        f |= WC_SUBLIST_COPROC as i32;
1013        zshlex();
1014    } else if tok() == BANG_TOK {
1015        *cmplx = 1;
1016        f |= WC_SUBLIST_NOT as i32;
1017        zshlex();
1018    }
1019    // c:882-883 — `if (!par_pline(cmplx) && !f) return -1;`
1020    if !par_pipe_wordcode(cmplx) && f == 0 {
1021        return None;
1022    }
1023    // c:885 — `return f;`
1024    Some(f)
1025}
1026
1027/// Parse a pipeline
1028/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1029/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1030/// C emits WC_PIPE wordcodes per command; same flow.
1031fn par_pline() -> Option<ZshPipe> {
1032    let lineno = toklineno();
1033    let cmd = par_cmd()?;
1034
1035    // Check for | or |&
1036    let mut merge_stderr = false;
1037    let next = match tok() {
1038        BAR_TOK | BARAMP => {
1039            merge_stderr = tok() == BARAMP;
1040            zshlex();
1041            skip_separators();
1042            par_pline().map(Box::new)
1043        }
1044        _ => None,
1045    };
1046
1047    Some(ZshPipe {
1048        cmd,
1049        next,
1050        lineno,
1051        merge_stderr,
1052    })
1053}
1054
1055/// Parse a command
1056/// Parse a command — dispatches by leading token (FOR / CASE /
1057/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1058/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
1059/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
1060fn par_cmd() -> Option<ZshCommand> {
1061    // Parse leading redirections
1062    let mut redirs = Vec::new();
1063    while IS_REDIROP(tok()) {
1064        if let Some(redir) = par_redir() {
1065            redirs.push(redir);
1066        }
1067    }
1068
1069    let cmd = match tok() {
1070        FOR | FOREACH => par_for(),
1071        SELECT => parse_select(),
1072        CASE => par_case(),
1073        IF => par_if(),
1074        WHILE => par_while(false),
1075        UNTIL => par_while(true),
1076        REPEAT => par_repeat(),
1077        INPAR_TOK => par_subsh(),
1078        INOUTPAR => parse_anon_funcdef(),
1079        INBRACE_TOK => parse_cursh(),
1080        FUNC => par_funcdef(),
1081        DINBRACK => par_cond(),
1082        DINPAR => parse_arith(),
1083        TIME => par_time(),
1084        _ => par_simple(redirs),
1085    };
1086
1087    // Parse trailing redirections. For Simple commands the redirs were
1088    // already captured inside par_simple; for compound forms (Cursh,
1089    // Subsh, If, While, etc.) we collect them here and wrap in
1090    // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1091    if let Some(inner) = cmd {
1092        let mut trailing: Vec<ZshRedir> = Vec::new();
1093        while IS_REDIROP(tok()) {
1094            if let Some(redir) = par_redir() {
1095                trailing.push(redir);
1096            }
1097        }
1098        // c:1072-1075 — every par_cmd tail resets the lexer state
1099        // toggles so the NEXT command starts in cmd position with
1100        // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
1101        // during their bodies; without this reset the next iteration
1102        // of the outer par_list loop sees `if` / `done` / `select`
1103        // etc. as plain strings and the AST collapses.
1104        set_incmdpos(true);
1105        set_incasepat(0);
1106        set_incond(0);
1107        set_intypeset(false);
1108        if trailing.is_empty() {
1109            return Some(inner);
1110        }
1111        // Simple already absorbed its own redirs (compile path expects
1112        // them on ZshSimple), so don't double-wrap.
1113        if matches!(inner, ZshCommand::Simple(_)) {
1114            if let ZshCommand::Simple(mut s) = inner {
1115                s.redirs.extend(trailing);
1116                return Some(ZshCommand::Simple(s));
1117            }
1118            unreachable!()
1119        }
1120        return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1121    }
1122    // Same reset on the empty-cmd branch (mirror c:1072 unconditional
1123    // path — the C function only returns 0 above when the dispatch
1124    // produced no command, and falls through to the reset block).
1125    set_incmdpos(true);
1126    set_incasepat(0);
1127    set_incond(0);
1128    set_intypeset(false);
1129
1130    None
1131}
1132
1133/// Parse for/foreach loop
1134/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1135/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1136/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
1137/// inner branch for the `((...))` arithmetic-header variant
1138/// (parse.c:1100-1140 inside par_for).
1139fn par_for() -> Option<ZshCommand> {
1140    let is_foreach = tok() == FOREACH;
1141    zshlex();
1142
1143    // Check for C-style: for (( init; cond; step ))
1144    if tok() == DINPAR {
1145        return parse_for_cstyle();
1146    }
1147
1148    // Get variable name(s). zsh parse.c par_for accepts multiple
1149    // identifier tokens before `in`/`(`/newline — `for k v in ...`
1150    // assigns each iteration's pair of values to k and v in turn.
1151    // We store the names space-joined since variable identifiers
1152    // can't contain whitespace.
1153    let mut names: Vec<String> = Vec::new();
1154    while tok() == STRING_LEX {
1155        let v = tokstr().unwrap_or_default();
1156        if v == "in" {
1157            break;
1158        }
1159        names.push(v);
1160        zshlex();
1161    }
1162    if names.is_empty() {
1163        crate::ported::utils::zerr("expected variable name in for");
1164        return None;
1165    }
1166    let var = names.join(" ");
1167
1168    // Skip newlines
1169    skip_separators();
1170
1171    // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1172    // single String token with the parens lexed-as-content
1173    // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
1174    // Outpar tokens. Detect that shape and split it manually.
1175    let list = if tok() == STRING_LEX
1176        && tokstr()
1177            .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1178            .unwrap_or(false)
1179    {
1180        let raw = tokstr().unwrap_or_default();
1181        // Strip leading Inpar + trailing Outpar, then untokenize the
1182        // inner content and split on whitespace for the word list.
1183        let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1184            ..raw
1185                .char_indices()
1186                .last()
1187                .map(|(i, _)| i)
1188                .unwrap_or(raw.len())];
1189        let cleaned = super::lex::untokenize(inner);
1190        let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1191        zshlex();
1192        ForList::Words(words)
1193    } else if tok() == STRING_LEX {
1194        let s = tokstr();
1195        if s.map(|s| s == "in").unwrap_or(false) {
1196            zshlex();
1197            let mut words = Vec::new();
1198            while tok() == STRING_LEX {
1199                let _ts_s = tokstr();
1200                if let Some(s) = _ts_s.as_deref() {
1201                    words.push(s.to_string());
1202                }
1203                zshlex();
1204            }
1205            ForList::Words(words)
1206        } else {
1207            ForList::Positional
1208        }
1209    } else if tok() == INPAR_TOK {
1210        // for var (...)
1211        zshlex();
1212        let mut words = Vec::new();
1213        while tok() == STRING_LEX || tok() == SEPER {
1214            if tok() == STRING_LEX {
1215                let _ts_s = tokstr();
1216                if let Some(s) = _ts_s.as_deref() {
1217                    words.push(s.to_string());
1218                }
1219            }
1220            zshlex();
1221        }
1222        if tok() == OUTPAR_TOK {
1223            // After the `)` of a for-list, the next token is the
1224            // body opener — `do`/`{`. zsh's lexer needs incmdpos
1225            // set so `{` lexes as Inbrace (not as a literal). C
1226            // analogue: parse.c::par_for sets `incmdpos = 1`
1227            // after consuming the Outpar before the body parse.
1228            set_incmdpos(true);
1229            zshlex();
1230        }
1231        ForList::Words(words)
1232    } else {
1233        ForList::Positional
1234    };
1235
1236    // Skip to body
1237    skip_separators();
1238
1239    // Parse body
1240    let body = parse_loop_body(is_foreach, false)?;
1241
1242    Some(ZshCommand::For(ZshFor {
1243        var,
1244        list,
1245        body: Box::new(body),
1246        is_select: false,
1247    }))
1248}
1249
1250/// Parse case statement
1251/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
1252/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
1253/// (pattern_list, body, terminator) tuple where terminator is
1254/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
1255fn par_case() -> Option<ZshCommand> {
1256    // C par_case (parse.c:1209-1241). Order of state toggles
1257    // matters — the lexer reads the case word in `incmdpos=0`
1258    // (so it's not promoted to a reswd), then the `in`/`{` in
1259    // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
1260    // isn't alias-expanded or spell-corrected), then sets
1261    // `incasepat=1, incmdpos=0` before the first pattern.
1262    set_incmdpos(false);
1263    zshlex(); // skip 'case'
1264
1265    let word = match tok() {
1266        STRING_LEX => {
1267            let w = tokstr().unwrap_or_default();
1268            // c:1222 — `incmdpos = 1;` before the next zshlex so the
1269            // `in` keyword is recognised. c:1223-1225 — save+force
1270            // noaliases / nocorrect.
1271            set_incmdpos(true);
1272            let ona = noaliases();
1273            let onc = nocorrect();
1274            set_noaliases(true);
1275            set_nocorrect(1);
1276            zshlex();
1277            // Restore noaliases/nocorrect after the `in`-or-`{` token
1278            // is in hand; both are unconditionally restored at c:1238-1239.
1279            let restore = |ona: bool, onc: i32| {
1280                set_noaliases(ona);
1281                set_nocorrect(onc);
1282            };
1283            (w, ona, onc, restore)
1284        }
1285        _ => {
1286            crate::ported::utils::zerr("expected word after case");
1287            return None;
1288        }
1289    };
1290    let (word, ona, onc, restore) = word;
1291
1292    skip_separators();
1293
1294    // Expect 'in' or {
1295    let use_brace = tok() == INBRACE_TOK;
1296    if tok() == STRING_LEX {
1297        let s = tokstr();
1298        if s.map(|s| s != "in").unwrap_or(true) {
1299            // c:1228-1232 — restore noaliases/nocorrect on error path.
1300            restore(ona, onc);
1301            crate::ported::utils::zerr("expected 'in' in case");
1302            return None;
1303        }
1304    } else if !use_brace {
1305        restore(ona, onc);
1306        crate::ported::utils::zerr("expected 'in' or '{' in case");
1307        return None;
1308    }
1309    // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
1310    // nocorrect = onc;` — set the case-pattern context AND restore
1311    // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
1312    set_incasepat(1);
1313    set_incmdpos(false);
1314    restore(ona, onc);
1315    zshlex();
1316
1317    let mut arms = Vec::new();
1318    const MAX_ARMS: usize = 10_000;
1319
1320    loop {
1321        if arms.len() > MAX_ARMS {
1322            crate::ported::utils::zerr("par_case: too many arms");
1323            break;
1324        }
1325
1326        // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
1327        // This affects how [ and | are lexed
1328        set_incasepat(1);
1329
1330        skip_separators();
1331
1332        // Check for end
1333        // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
1334        let is_esac = tok() == ESAC
1335            || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
1336        if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
1337            set_incasepat(0);
1338            zshlex();
1339            break;
1340        }
1341
1342        // Also break on EOF
1343        if tok() == ENDINPUT || tok() == LEXERR {
1344            set_incasepat(0);
1345            break;
1346        }
1347
1348        // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
1349        // The leading `(` is paired with a matching `)` that closes
1350        // the pattern itself; the arm-close `)` follows separately.
1351        // Track whether we consumed it so we can skip the matching
1352        // `)` after pattern parsing — otherwise the arm-close would
1353        // be interpreted as the pattern-close and the actual body
1354        // would get the leftover `)`.
1355        let had_leading_paren = tok() == INPAR_TOK;
1356        if had_leading_paren {
1357            zshlex();
1358        }
1359
1360        // incasepat is already set above
1361        let mut patterns = Vec::new();
1362        loop {
1363            if tok() == STRING_LEX {
1364                let s = tokstr();
1365                if s.map(|s| s == "esac").unwrap_or(false) {
1366                    break;
1367                }
1368                patterns.push(tokstr().unwrap_or_default());
1369                // After first pattern token, set incasepat=2 so ( is treated as part of pattern
1370                set_incasepat(2);
1371                zshlex();
1372            } else if tok() != BAR_TOK {
1373                break;
1374            }
1375
1376            if tok() == BAR_TOK {
1377                // Reset to 1 (start of next alternative pattern)
1378                set_incasepat(1);
1379                zshlex();
1380            } else {
1381                break;
1382            }
1383        }
1384        set_incasepat(0);
1385
1386        // zsh's `(P)` form (parse.c:1320-1360 hack) treats the entire
1387        // parenthesized contents as ONE zsh pattern with internal `|`
1388        // as the literal alternation operator — NOT as multiple
1389        // case-arm alternatives. Without a leading `(`, the bare
1390        // `P1|P2)` form splits into multiple alts. Mirror that here:
1391        // when a leading `(` was consumed, fold the |-separated
1392        // pieces back into a single pattern string.
1393        if had_leading_paren && patterns.len() > 1 {
1394            let joined = patterns.join("|");
1395            patterns = vec![joined];
1396        }
1397
1398        // Expect ).  Also handle the `(P))` wrapped-pattern form:
1399        // when a leading `(` was consumed, accept an extra `)` —
1400        // the inner `)` closes the optional-paren wrapper, the
1401        // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
1402        // (bare pattern, leading-paren is just the opt-marker, the
1403        // close is arm-close) and `(P)) BODY` (paren-wrapped
1404        // pattern, then arm-close). The first form is unambiguous
1405        // when the bare pattern was simple; the second is needed
1406        // when the body starts with `(`.
1407        if tok() != OUTPAR_TOK {
1408            crate::ported::utils::zerr("expected ')' in case pattern");
1409            return None;
1410        }
1411        // Port of Src/parse.c:1310-1313 — when the case pattern
1412        // closes with `)`, set `incmdpos = 1` BEFORE consuming
1413        // the token so the first word of the arm body is lexed
1414        // in command position. Without this, `case X in X) c1=v ;;`
1415        // lexes `c1=v` as a plain STRING rather than an assignment
1416        // word, and exec treats it as a command name (yielding
1417        // "command not found: c1=v"). Subsequent statements after
1418        // `;` parse correctly because the `;` separator restores
1419        // command position; only the FIRST body word was broken.
1420        set_incmdpos(true);
1421        zshlex();
1422        if had_leading_paren && tok() == OUTPAR_TOK {
1423            set_incmdpos(true);
1424            zshlex();
1425        }
1426
1427        // Parse body
1428        let body = parse_program();
1429
1430        // Get terminator. Set incasepat=1 BEFORE the zshlex
1431        // advance so the next token (the next arm's pattern, like
1432        // `[a-z]`) gets tokenized in pattern context. Without
1433        // this, a `[`-prefixed pattern after the FIRST arm became
1434        // Inbrack instead of String and the pattern-loop bailed
1435        // out with "expected ')' in case pattern".
1436        let terminator = match tok() {
1437            DSEMI => {
1438                set_incasepat(1);
1439                zshlex();
1440                CaseTerm::Break
1441            }
1442            SEMIAMP => {
1443                set_incasepat(1);
1444                zshlex();
1445                CaseTerm::Continue
1446            }
1447            SEMIBAR => {
1448                set_incasepat(1);
1449                zshlex();
1450                CaseTerm::TestNext
1451            }
1452            _ => CaseTerm::Break,
1453        };
1454
1455        if !patterns.is_empty() {
1456            arms.push(CaseArm {
1457                patterns,
1458                body,
1459                terminator,
1460            });
1461        }
1462    }
1463
1464    Some(ZshCommand::Case(ZshCase { word, arms }))
1465}
1466
1467/// Parse if statement
1468/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
1469/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
1470/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
1471/// (cond, then_body) tuples plus an optional else_body.
1472fn par_if() -> Option<ZshCommand> {
1473    zshlex(); // skip 'if'
1474
1475    // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
1476    let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
1477
1478    skip_separators();
1479
1480    // Expect 'then' or {
1481    let use_brace = tok() == INBRACE_TOK;
1482    if tok() != THEN && !use_brace {
1483        crate::ported::utils::zerr("expected 'then' or '{' after if condition");
1484        return None;
1485    }
1486    zshlex();
1487
1488    // Parse then-body - stops at else/elif/fi, or } if using brace syntax
1489    let then = if use_brace {
1490        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1491        if tok() == OUTBRACE_TOK {
1492            zshlex();
1493        }
1494        Box::new(body)
1495    } else {
1496        Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
1497    };
1498
1499    // Parse elif and else. zsh accepts the SAME elif/else
1500    // continuations for both classic `then/fi` AND the brace
1501    // form `{ ... } elif ... { ... } else { ... }`. Direct port
1502    // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
1503    // arms are checked AFTER the body close regardless of which
1504    // delimiter style opened the block. Without this, zinit's
1505    //   if [[ -z $sel ]] { ... } else { ... }
1506    // hung the parser — `else` was treated as an external
1507    // command following the if-statement, which the lexer state
1508    // mis-classified inside the still-open function body.
1509    //
1510    // For brace-form: skip the `fi` consumption at the end of
1511    // the loop (no `fi` after a brace block), and `else` may
1512    // arrive after a `}` close. Skip-separators between the
1513    // body close and the elif/else token.
1514    let mut elif = Vec::new();
1515    let mut else_ = None;
1516
1517    {
1518        loop {
1519            skip_separators();
1520
1521            match tok() {
1522                ELIF => {
1523                    zshlex();
1524                    // elif condition stops at 'then' or '{'
1525                    let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
1526                    skip_separators();
1527
1528                    let elif_use_brace = tok() == INBRACE_TOK;
1529                    if tok() != THEN && !elif_use_brace {
1530                        crate::ported::utils::zerr("expected 'then' after elif");
1531                        return None;
1532                    }
1533                    zshlex();
1534
1535                    // elif body stops at else/elif/fi or } if using braces
1536                    let ebody = if elif_use_brace {
1537                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1538                        if tok() == OUTBRACE_TOK {
1539                            zshlex();
1540                        }
1541                        body
1542                    } else {
1543                        parse_program_until(Some(&[ELSE, ELIF, FI]))
1544                    };
1545
1546                    elif.push((econd, ebody));
1547                }
1548                ELSE => {
1549                    zshlex();
1550                    skip_separators();
1551
1552                    let else_use_brace = tok() == INBRACE_TOK;
1553                    if else_use_brace {
1554                        zshlex();
1555                    }
1556
1557                    // else body stops at 'fi' or '}'
1558                    else_ = Some(Box::new(if else_use_brace {
1559                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1560                        if tok() == OUTBRACE_TOK {
1561                            zshlex();
1562                        }
1563                        body
1564                    } else {
1565                        parse_program_until(Some(&[FI]))
1566                    }));
1567
1568                    // Consume the 'fi' if present (not for brace syntax)
1569                    if !else_use_brace && tok() == FI {
1570                        zshlex();
1571                    }
1572                    break;
1573                }
1574                FI => {
1575                    zshlex();
1576                    break;
1577                }
1578                _ => break,
1579            }
1580        }
1581    }
1582
1583    Some(ZshCommand::If(ZshIf {
1584        cond,
1585        then,
1586        elif,
1587        else_,
1588    }))
1589}
1590
1591/// Parse while/until loop
1592/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
1593/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
1594/// `until` variant is the same loop with the condition negated.
1595fn par_while(until: bool) -> Option<ZshCommand> {
1596    zshlex(); // skip while/until
1597
1598    let cond = Box::new(parse_program());
1599
1600    skip_separators();
1601    let body = parse_loop_body(false, false)?;
1602
1603    Some(ZshCommand::While(ZshWhile {
1604        cond,
1605        body: Box::new(body),
1606        until,
1607    }))
1608}
1609
1610/// Parse repeat loop
1611/// Parse `repeat N; do BODY; done`. Direct port of
1612/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
1613/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
1614/// parser doesn't yet special-case that variant.
1615fn par_repeat() -> Option<ZshCommand> {
1616    zshlex(); // skip 'repeat'
1617
1618    let count = match tok() {
1619        STRING_LEX => {
1620            let c = tokstr().unwrap_or_default();
1621            zshlex();
1622            c
1623        }
1624        _ => {
1625            crate::ported::utils::zerr("expected count after repeat");
1626            return None;
1627        }
1628    };
1629
1630    skip_separators();
1631    // c:1600 — par_repeat's short-form gate is wider: it unlocks
1632    // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
1633    // for/while). Pass `is_repeat=true` so parse_loop_body
1634    // applies that widened gate.
1635    let body = parse_loop_body(false, true)?;
1636
1637    Some(ZshCommand::Repeat(ZshRepeat {
1638        count,
1639        body: Box::new(body),
1640    }))
1641}
1642
1643/// Parse (...) subshell
1644/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
1645/// `par_subsh`. Body parses as a normal list; the subshell wrapper
1646/// fork-isolates execution in the executor.
1647fn par_subsh() -> Option<ZshCommand> {
1648    zshlex(); // skip (
1649    let prog = parse_program();
1650    if tok() == OUTPAR_TOK {
1651        zshlex();
1652    }
1653    Some(ZshCommand::Subsh(Box::new(prog)))
1654}
1655
1656/// Parse function definition
1657/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
1658/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
1659/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
1660/// the optional `[fname1 fname2 ...]` for multi-name function defs,
1661/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
1662fn par_funcdef() -> Option<ZshCommand> {
1663    zshlex(); // skip 'function'
1664
1665    let mut names = Vec::new();
1666    let mut tracing = false;
1667
1668    // Handle options like -T and function names. Two subtleties:
1669    //
1670    //   1. Flags: zsh's lexer encodes a leading `-` as
1671    //      `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
1672    //      The previous `s.starts_with('-')` check failed for
1673    //      `\u{9b}T`, so `function -T NAME { body }` slipped the
1674    //      `-T` token into `names` and the function got registered
1675    //      as `T` plus the intended `NAME`.
1676    //
1677    //   2. Body opener: zsh's lexer emits the opening `{` as a
1678    //      String (not INBRACE_TOK) when it follows the String
1679    //      NAME — the preceding name token resets incmdpos to
1680    //      false, and only `{` immediately followed by `}` (the
1681    //      empty-body case) gets promoted to Inbrace. The funcdef
1682    //      parser must recognise the bare-`{` String as the body
1683    //      opener; otherwise `function NAME { body }` falls through
1684    //      to `_ => break`, no body parses, and the FuncDef never
1685    //      lands in the AST. This is consistent with C zsh's
1686    //      par_funcdef which knows it's in funcdef-header context
1687    //      and accepts the brace either way.
1688    loop {
1689        match tok() {
1690            STRING_LEX => {
1691                let _ts_s = tokstr()?;
1692                let s = _ts_s.as_str();
1693                // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
1694                // Body opener can be either the literal `{` (early-return
1695                // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
1696                // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
1697                // post-switch add(c) where c was rewritten via lextok2).
1698                if s == "{" || s == "\u{8f}" {
1699                    break;
1700                }
1701                let first = s.chars().next();
1702                if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
1703                    if s.contains('T') {
1704                        tracing = true;
1705                    }
1706                    zshlex();
1707                    continue;
1708                }
1709                names.push(s.to_string());
1710                zshlex();
1711            }
1712            INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
1713            _ => break,
1714        }
1715    }
1716
1717    // Optional ()
1718    let saw_paren = tok() == INOUTPAR;
1719    if saw_paren {
1720        zshlex();
1721    }
1722
1723    skip_separators();
1724
1725    // Body opener: real Inbrace OR a String containing the literal `{`
1726    // (early-return path) OR a String containing the Inbrace marker
1727    // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
1728    // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
1729    let body_opener_is_string_brace =
1730        tok() == STRING_LEX
1731            && tokstr().map(|s| s == "{" || s == "\u{8f}").unwrap_or(false);
1732    if tok() == INBRACE_TOK || body_opener_is_string_brace {
1733        // Capture body_start BEFORE the lexer advances past the
1734        // first body token. After the previous zshlex consumed
1735        // `{`, lexer.pos points just past `{` (which is where the
1736        // body source starts). The next `zshlex()` would advance
1737        // past the first token (`echo`), making body_start land
1738        // mid-body and lose the first word — `typeset -f f` would
1739        // print `a; echo b` for `{ echo a; echo b }`.
1740        let body_start = pos();
1741        zshlex();
1742        let body = parse_program();
1743        let body_end = if tok() == OUTBRACE_TOK {
1744            // Lexer has just consumed `}`; pos is past it. Body content
1745            // ends one byte before pos.
1746            pos().saturating_sub(1)
1747        } else {
1748            pos()
1749        };
1750        let body_source = input_slice(body_start, body_end)
1751            .map(|s| s.trim().to_string())
1752            .filter(|s| !s.is_empty());
1753        if tok() == OUTBRACE_TOK {
1754            zshlex();
1755        }
1756
1757        // Anonymous form `function () { body } a b c` (with `()`) or
1758        // `function { body } a b c` (zsh-only shorthand, no `()`). No
1759        // name was collected. Mirror parse_anon_funcdef: synthesize
1760        // `_zshrs_anon_N`, collect trailing args, set auto_call_args
1761        // so compile_funcdef registers + immediately calls the
1762        // function with the args as positional params.
1763        if names.is_empty() {
1764            let mut args = Vec::new();
1765            while tok() == STRING_LEX {
1766                if let Some(s) = tokstr() {
1767                    args.push(s);
1768                }
1769                zshlex();
1770            }
1771            static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
1772            let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
1773            let name = format!("_zshrs_anon_kw_{}", n);
1774            return Some(ZshCommand::FuncDef(ZshFuncDef {
1775                names: vec![name],
1776                body: Box::new(body),
1777                tracing,
1778                auto_call_args: Some(args),
1779                body_source,
1780            }));
1781        }
1782
1783        Some(ZshCommand::FuncDef(ZshFuncDef {
1784            names,
1785            body: Box::new(body),
1786            tracing,
1787            auto_call_args: None,
1788            body_source,
1789        }))
1790    } else {
1791        // Short form
1792        par_list().map(|list| {
1793            ZshCommand::FuncDef(ZshFuncDef {
1794                names,
1795                body: Box::new(ZshProgram { lists: vec![list] }),
1796                tracing,
1797                auto_call_args: None,
1798                body_source: None,
1799            })
1800        })
1801    }
1802}
1803
1804/// Parse time command
1805/// Parse `time CMD` (POSIX time keyword). Direct port of
1806/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
1807/// times the execution of the following pipeline / cmd.
1808fn par_time() -> Option<ZshCommand> {
1809    zshlex(); // skip 'time'
1810
1811    // Check if there's a pipeline to time
1812    if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
1813        Some(ZshCommand::Time(None))
1814    } else {
1815        let sublist = par_sublist();
1816        Some(ZshCommand::Time(sublist.map(Box::new)))
1817    }
1818}
1819
1820/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
1821/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
1822/// condition wordcode then advances past `]]`.
1823pub fn par_dinbrack() -> Option<()> {
1824    // c:1810
1825    set_incond(1); // c:1814
1826    set_incmdpos(false); // c:1815
1827    zshlex(); // c:1816
1828    let _ = par_cond(); // c:1817
1829    if tok() != DOUTBRACK {
1830        // c:1818
1831        yyerror("missing ]]");
1832        return None;
1833    }
1834    set_incond(0); // c:1820
1835    set_incmdpos(true); // c:1821
1836    zshlex(); // c:1822
1837    Some(())
1838}
1839
1840/// Parse a simple command
1841/// Parse a simple command (assignments + words + redirections).
1842/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
1843/// the largest single function in parse.c. Handles ENVSTRING/
1844/// ENVARRAY assignments at command head, intermixed redirs,
1845/// typeset-style multi-assignment commands, and the trailing
1846/// inout-par `()` that converts a simple command into an inline
1847/// function definition.
1848fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1849    let mut assigns = Vec::new();
1850    let mut words = Vec::new();
1851
1852    // c:1934-1974 — `{var}>file` brace-FD detection is wired
1853    // INSIDE the words loop below (parse.rs:4940-4956) rather than
1854    // here at the head. The words-loop site sees the tok=STRING
1855    // `{varname}` followed by a REDIROP and routes into par_redir
1856    // with redir.varid populated. C does it inline at the start of
1857    // each STRING/TYPESET arm iteration; functionally equivalent.
1858
1859    // Parse leading assignments
1860    while tok() == ENVSTRING || tok() == ENVARRAY {
1861        if let Some(assign) = parse_assign() {
1862            assigns.push(assign);
1863        }
1864        zshlex();
1865    }
1866
1867    // Parse words and redirections
1868    loop {
1869        match tok() {
1870            ENVSTRING | ENVARRAY => {
1871                // Mid-command assignment-shape arg under typeset
1872                // / declare / local / etc. (intypeset gates the
1873                // lexer to emit Envstring/Envarray for `name=val`
1874                // and `name=()` past the command name). Parse the
1875                // assignment, then emit a synthetic word
1876                // `NAME=value` (scalar) or `NAME=( … )` (array)
1877                // string so typeset's builtin arg list sees the
1878                // assignment-shape arg. Avoids the inline-env
1879                // scope path that mistakenly treats it like a
1880                // pre-cmd `X=Y cmd` assignment.
1881                if let Some(assign) = parse_assign() {
1882                    let synthetic = match &assign.value {
1883                        ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
1884                        ZshAssignValue::Array(elems) => {
1885                            format!("{}=({})", assign.name, elems.join(" "))
1886                        }
1887                    };
1888                    words.push(synthetic);
1889                }
1890                zshlex();
1891            }
1892            STRING_LEX | TYPESET => {
1893                let s = tokstr();
1894                if let Some(s) = s {
1895                    words.push(s);
1896                }
1897                // c:1929 — `incmdpos = 0;` so the next zshlex() does
1898                // not re-promote `{`/`[[`/reserved words at the
1899                // continuation position. Without this, `echo {a,b}`
1900                // re-lexes `{` as INBRACE_TOK (current-shell block)
1901                // and the brace expansion never reaches par_simple.
1902                set_incmdpos(false);
1903                // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
1904                // Multi-assign `typeset a=1 b=2` relies on the lexer
1905                // re-emitting `b=2` as ENVSTRING; that path is gated
1906                // on `intypeset`. Without this, follow-on assignment
1907                // words arrive as STRING and the typeset builtin's
1908                // multi-assign form silently degrades.
1909                if tok() == TYPESET {
1910                    set_intypeset(true);
1911                }
1912                zshlex();
1913                // Check for function definition foo() { ... }
1914                if words.len() == 1 && tok() == INOUTPAR {
1915                    return parse_inline_funcdef(words.pop().unwrap());
1916                }
1917                // `{name}>file` named-fd redirect: the lexer doesn't
1918                // recognize this shape, so the bare word `{name}`
1919                // arrives as a String. If it matches `{IDENT}` and
1920                // the NEXT token is a redirop, pop it off as the
1921                // varid for that redir.
1922                if !words.is_empty() && IS_REDIROP(tok()) {
1923                    let last = words.last().unwrap();
1924                    let untoked = super::lex::untokenize(last);
1925                    if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1926                        let name = &untoked[1..untoked.len() - 1];
1927                        if !name.is_empty()
1928                            && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1929                            && name
1930                                .chars()
1931                                .next()
1932                                .map(|c| c == '_' || c.is_ascii_alphabetic())
1933                                .unwrap_or(false)
1934                        {
1935                            let varid = name.to_string();
1936                            words.pop();
1937                            if let Some(mut redir) = par_redir() {
1938                                redir.varid = Some(varid);
1939                                redirs.push(redir);
1940                            }
1941                            continue;
1942                        }
1943                    }
1944                }
1945            }
1946            _ if IS_REDIROP(tok()) => {
1947                match par_redir() {
1948                    Some(redir) => redirs.push(redir),
1949                    None => break, // Error in redir parsing, stop
1950                }
1951            }
1952            INOUTPAR if !words.is_empty() => {
1953                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
1954                // YYERROR(oecused);` — multi-name funcdef gate:
1955                // `f1 f2() { ... }` defines f1 AND f2 to the same
1956                // body, but only when MULTIFUNCDEF is set.
1957                if !isset(MULTIFUNCDEF) && words.len() > 1 {
1958                    crate::ported::utils::zerr(
1959                        "parse error: multiple names in function definition without MULTIFUNCDEF",
1960                    );
1961                    return None;
1962                }
1963                // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
1964                // !isset(ALIASFUNCDEF) && argc && hasalias !=
1965                // input_hasalias()) { zwarn(...); YYERROR(...); }`
1966                // Alias-as-funcdef warning. zshrs's parser doesn't
1967                // track `hasalias` (alias-expansion provenance
1968                // during parse) yet, so `had_alias` stays false —
1969                // the gate is wired here as a marker so the canonical
1970                // C predicate is visible. Once alias-provenance lands,
1971                // swap `false` for the actual provenance compare.
1972                let had_alias = false;
1973                if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
1974                    crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
1975                    return None;
1976                }
1977                // foo() { ... } style function
1978                return parse_inline_funcdef(words.pop().unwrap());
1979            }
1980            _ => break,
1981        }
1982    }
1983
1984    if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1985        return None;
1986    }
1987
1988    Some(ZshCommand::Simple(ZshSimple {
1989        assigns,
1990        words,
1991        redirs,
1992    }))
1993}
1994
1995/// Parse a redirection
1996/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1997/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
1998/// a ZshRedir node carrying the operator type, fd, target word
1999/// (or here-doc body / pipe-redir command), and any `{var}` style
2000/// fd-binding parameter.
2001fn par_redir() -> Option<ZshRedir> {
2002    par_redir_with_id(None)
2003}
2004
2005/// Wire a here-document body onto the redirection token that
2006/// requested it. Direct port of zsh/Src/parse.c:2347
2007/// `setheredoc`. Called when a heredoc terminator has been
2008/// matched and the body is ready to be attached to the redir.
2009///
2010/// zshrs port note: zsh's setheredoc patches the wordcode
2011/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
2012/// zshrs threads heredoc bodies through `HereDocInfo` structs
2013/// attached inline during the post-parse `fill_heredoc_bodies` walk.
2014/// This method is the AST-side equivalent: writes back to the
2015/// matching redir node by index.
2016/// Port of `setheredoc(int pc, int type, char *str, char *termstr,
2017/// char *munged_termstr)` from `Src/parse.c:2347-2355`. Patches the
2018/// pending heredoc redir at `pc` with its body string + raw and
2019/// munged terminator forms.
2020pub fn setheredoc(pc: usize, redir_type: i32, doc: &str, term: &str, munged_term: &str) {
2021    // zshrs-only guard: AST-path heredocs use `pc = -1 as usize`
2022    // (i.e. `usize::MAX`) as a sentinel meaning "no wordcode slot to
2023    // patch". C never passes a negative pc since the wordcode emitter
2024    // is always active. Skip silently for the AST-only case.
2025    if pc == usize::MAX {
2026        return;
2027    }
2028    // c:2350 — `int varid = WC_REDIR_VARID(ecbuf[pc]) ? REDIR_VARID_MASK : 0;`
2029    let cur = ECBUF.with_borrow(|b| b.get(pc).copied().unwrap_or(0));
2030    let varid = if WC_REDIR_VARID(cur) != 0 {
2031        REDIR_VARID_MASK
2032    } else {
2033        0
2034    };
2035    // c:2351 — `ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK | varid);`
2036    let new_header = WCB_REDIR((redir_type | REDIR_FROM_HEREDOC_MASK | varid) as wordcode);
2037    // c:2352 — `ecbuf[pc + 2] = ecstrcode(str);`
2038    let coded_str = ecstrcode(doc);
2039    // c:2353 — `ecbuf[pc + 3] = ecstrcode(termstr);`
2040    let coded_term = ecstrcode(term);
2041    // c:2354 — `ecbuf[pc + 4] = ecstrcode(munged_termstr);`
2042    let coded_munged = ecstrcode(munged_term);
2043    ECBUF.with_borrow_mut(|b| {
2044        b[pc] = new_header;
2045        b[pc + 2] = coded_str;
2046        b[pc + 3] = coded_term;
2047        b[pc + 4] = coded_munged;
2048    });
2049}
2050
2051/// Parse a wordlist for `for ... in WORDS;`. Direct port of
2052/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
2053/// until the next SEPER / SEMI / NEWLIN.
2054pub fn par_wordlist() -> Vec<String> {
2055    let mut out = Vec::new();
2056    // parse.c:2362-2378 — collect STRINGs into the wordlist.
2057    while tok() == STRING_LEX {
2058        if let Some(text) = tokstr() {
2059            out.push(text);
2060        }
2061        zshlex();
2062    }
2063    out
2064}
2065
2066/// Parse a newline-separated wordlist. Direct port of
2067/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
2068/// par_wordlist but tolerates leading/trailing newlines.
2069pub fn par_nl_wordlist() -> Vec<String> {
2070    // parse.c:2380-2381 — skip leading newlines.
2071    while tok() == NEWLIN {
2072        zshlex();
2073    }
2074    let out = par_wordlist();
2075    // parse.c:2395-2397 — skip trailing newlines.
2076    while tok() == NEWLIN {
2077        zshlex();
2078    }
2079    out
2080}
2081
2082/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
2083/// token is a separator usable inside `[[ … ]]` (newline / semi /
2084/// `&`). C uses it to skip optional whitespace between cond terms.
2085#[inline]
2086pub fn COND_SEP() -> bool {
2087    matches!(tok(), NEWLIN | SEMI | AMPER)
2088}
2089
2090/// Parse [[ ... ]] conditional
2091/// Parse `[[ EXPR ]]` conditional expression. Direct port of
2092/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
2093/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2094/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2095/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2096///   <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2097fn par_cond() -> Option<ZshCommand> {
2098    // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
2099    // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
2100    // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
2101    // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
2102    // cond body bleeds past the close bracket — the parser then
2103    // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
2104    // failed with `command not found: ]]` before this fix.
2105    set_incond(1);
2106    set_incmdpos(false);
2107    zshlex(); // skip [[
2108    // Empty cond `[[ ]]` is a parse error in zsh — emit the
2109    // diagnostic and return None so the caller produces a
2110    // non-zero exit. Without this, `[[ ]]` silently passed and
2111    // returned exit 0.
2112    if tok() == DOUTBRACK {
2113        crate::ported::utils::zerr("parse error near `]]'");
2114        set_incond(0);
2115        set_incmdpos(true);
2116        zshlex();
2117        return None;
2118    }
2119    let cond = parse_cond_expr();
2120
2121    if tok() == DOUTBRACK {
2122        set_incond(0);
2123        set_incmdpos(true);
2124        zshlex();
2125    } else {
2126        // Recover incond/incmdpos so subsequent parsing isn't stuck
2127        // in cond-mode if the close bracket is missing.
2128        set_incond(0);
2129        set_incmdpos(true);
2130    }
2131
2132    cond.map(ZshCommand::Cond)
2133}
2134
2135/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
2136/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
2137/// when an `&&` is found and recurses.
2138pub fn par_cond_1() -> i32 {
2139    // c:2434
2140
2141    let p = ECUSED.with(|c| c.get()) as usize;
2142    let r = par_cond_2();
2143    while COND_SEP() {
2144        condlex();
2145    }
2146    if tok() == DAMPER {
2147        condlex();
2148        while COND_SEP() {
2149            condlex();
2150        }
2151        ecispace(p, 1);
2152        par_cond_1();
2153        let ecused = ECUSED.with(|c| c.get()) as usize;
2154        ECBUF.with(|c| {
2155            c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
2156        });
2157        return 1;
2158    }
2159    r
2160}
2161
2162/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
2163/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
2164/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
2165pub fn par_cond_2() -> i32 {
2166    // c:2476
2167    // `n_testargs` only applies in `testlex` mode (=== /bin/test
2168    // compat). zshrs has no testlex yet, so always 0.
2169    let n_testargs: i32 = 0;
2170
2171    // c:2481 — handled inline; this Rust port skips the n_testargs
2172    // arm since zshrs invokes par_cond via [[ ... ]] only.
2173
2174    while COND_SEP() {
2175        condlex();
2176    }
2177    if tok() == BANG_TOK {
2178        // c:2522 — `[[ ! cond ]]`
2179        condlex();
2180        ecadd(WCB_COND(COND_NOT as u32, 0));
2181        return par_cond_2();
2182    }
2183    if tok() == INPAR_TOK {
2184        // c:2533 — `[[ (cond) ]]`
2185        condlex();
2186        while COND_SEP() {
2187            condlex();
2188        }
2189        let r = par_cond();
2190        while COND_SEP() {
2191            condlex();
2192        }
2193        if tok() != OUTPAR_TOK {
2194            yyerror("missing )");
2195            return 0;
2196        }
2197        condlex();
2198        return r.map_or(0, |_| 1);
2199    }
2200    let s1 = tokstr().unwrap_or_default();
2201    // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
2202    // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
2203    // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
2204    // carries Dash as a marker byte, so `starts_with('-')` alone
2205    // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
2206    // etc. — every such cond emitted the AST-only `condition
2207    // expected` error from par_cond_double. Use IS_DASH and count
2208    // chars (Dash is a single code point) instead of bytes.
2209    let s1_chars: Vec<char> = s1.chars().collect();
2210    let dble = !s1_chars.is_empty()
2211        && IS_DASH(s1_chars[0])
2212        && s1_chars.len() == 2
2213        && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
2214    if tok() != STRING_LEX {
2215        if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
2216            // c:2486-2497 — `if (n_testargs == 1)` block: under
2217            // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
2218            // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
2219            // && check_cond(s1, "t")`. zshrs's parser has
2220            // n_testargs=0 (no testlex), so this rewrite path is
2221            // unreachable from zshrs's [[ ]] / [ ] entry points;
2222            // wired here as a marker for parity. When testlex is
2223            // ported the call below activates.
2224            if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
2225                condlex();
2226                return par_cond_double(&s1, "1");
2227            }
2228            // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
2229            condlex();
2230            while COND_SEP() {
2231                condlex();
2232            }
2233            return par_cond_double("-n", &s1);
2234        }
2235        yyerror("condition expected");
2236        return 0;
2237    }
2238    condlex();
2239    while COND_SEP() {
2240        condlex();
2241    }
2242    if tok() == INANG_TOK || tok() == OUTANG_TOK {
2243        // c:2576 — `<` / `>` string compare.
2244        let xtok = tok();
2245        condlex();
2246        while COND_SEP() {
2247            condlex();
2248        }
2249        if tok() != STRING_LEX {
2250            yyerror("string expected");
2251            return 0;
2252        }
2253        let s3 = tokstr().unwrap_or_default();
2254        condlex();
2255        while COND_SEP() {
2256            condlex();
2257        }
2258        let op = if xtok == INANG_TOK {
2259            COND_STRLT
2260        } else {
2261            COND_STRGTR
2262        };
2263        ecadd(WCB_COND(op as u32, 0));
2264        ecstr(&s1);
2265        ecstr(&s3);
2266        return 1;
2267    }
2268    if tok() != STRING_LEX {
2269        // c:2592 — only one operand seen → `[ -n s1 ]`.
2270        if tok() != LEXERR {
2271            if !dble || n_testargs != 0 {
2272                return par_cond_double("-n", &s1);
2273            }
2274            return par_cond_multi(&s1, &[]);
2275        }
2276        yyerror("syntax error");
2277        return 0;
2278    }
2279    let s2 = tokstr().unwrap_or_default();
2280    set_incond(incond() + 1);
2281    condlex();
2282    while COND_SEP() {
2283        condlex();
2284    }
2285    set_incond(incond() - 1);
2286    if tok() == STRING_LEX && !dble {
2287        let s3 = tokstr().unwrap_or_default();
2288        condlex();
2289        while COND_SEP() {
2290            condlex();
2291        }
2292        if tok() == STRING_LEX {
2293            // c:2615 — n-ary `[ A op B C D ... ]`.
2294            let mut l: Vec<String> = vec![s2, s3];
2295            while tok() == STRING_LEX {
2296                l.push(tokstr().unwrap_or_default());
2297                condlex();
2298                while COND_SEP() {
2299                    condlex();
2300                }
2301            }
2302            return par_cond_multi(&s1, &l);
2303        }
2304        return par_cond_triple(&s1, &s2, &s3);
2305    }
2306    par_cond_double(&s1, &s2)
2307}
2308
2309/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
2310/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
2311pub fn par_cond_double(a: &str, b: &str) -> i32 {
2312    // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
2313    // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
2314    // BYTES would still pass for "-z" but fail for the marker form
2315    // `\u{9b}z` (2 bytes). Walk by chars.
2316    let ac: Vec<char> = a.chars().collect();
2317    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2318        crate::ported::utils::zerr(&format!("parse error: condition expected: {}", a));
2319        return 1;
2320    }
2321    // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
2322    let unary_set = "abcdefgknoprstuvwxzhLONGS";
2323    if ac.len() == 2 && unary_set.contains(ac[1]) {
2324        // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
2325        // letter byte as the opcode payload. Use the ASCII char's
2326        // code-point value directly — every letter in `unary_set`
2327        // fits in 7 bits.
2328        ecadd(WCB_COND(ac[1] as u32, 0));
2329        ecstr(b);
2330    } else {
2331        ecadd(WCB_COND(COND_MOD as u32, 1));
2332        ecstr(a);
2333        ecstr(b);
2334    }
2335    1
2336}
2337
2338/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
2339/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
2340/// or `-1` if not a recognized binary cond operator.
2341pub fn get_cond_num(tst: &str) -> i32 {
2342    // c:2643
2343    const CONDSTRS: [&str; 9] = [
2344        "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
2345    ];
2346    for (i, &c) in CONDSTRS.iter().enumerate() {
2347        if c == tst {
2348            return i as i32; // c:2654
2349        }
2350    }
2351    -1 // c:2656
2352}
2353
2354/// par_time's `static int inpartime` guard at C parse.c:1038
2355/// preventing infinite recursion on `time time foo`. The wordcode
2356/// path keeps this as a thread_local since C uses a function-level
2357/// `static int` (per-process; per-evaluator semantically matches).
2358thread_local! {
2359    static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
2360}
2361
2362/// Port of `par_cond_triple(char *a, char *b, char *c)` from
2363/// `Src/parse.c:2659`. Emits wordcode for the binary forms
2364/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
2365///
2366/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
2367/// raw ASCII operator char AND its tokenized marker form per
2368/// `Src/zsh.h:159-194`:
2369///   Equals = `\u{8d}`, Outang = `\u{95}`, Inang  = `\u{94}`,
2370///   Tilde  = `\u{98}`, Bang   = `\u{9c}`, Dash   = `\u{9b}`.
2371/// Inside `[[ ... ]]` the lexer emits the marker bytes — comparing
2372/// against literal-only `b"=="` misses every cond op.
2373/// (The previous Rust port had the doc comment values wrong:
2374/// Outang=0x8e was actually Bar; Inang=0x91 was Inbrack;
2375/// Tilde=0x96 was OutangProc; Bang=0x8b was Outparmath. The code
2376/// itself uses the correct const names, so this was a docs-only fix.)
2377pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
2378    // c:2659
2379    let bc: Vec<char> = b.chars().collect();
2380    let is_eq = |ch: char| ch == '=' || ch == Equals;
2381    let is_gt = |ch: char| ch == '>' || ch == Outang;
2382    let is_lt = |ch: char| ch == '<' || ch == Inang;
2383    let is_tilde = |ch: char| ch == '~' || ch == Tilde;
2384    let is_bang = |ch: char| ch == '!' || ch == Bang;
2385
2386    // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
2387    if bc.len() == 1 && is_eq(bc[0]) {
2388        ecadd(WCB_COND(COND_STREQ as u32, 0));
2389        ecstr(a);
2390        ecstr(c);
2391        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2392        ecadd(np);
2393        return 1;
2394    }
2395    // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
2396    if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
2397        let op = if is_gt(bc[0]) { COND_STRGTR } else { COND_STRLT };
2398        ecadd(WCB_COND(op as u32, 0));
2399        ecstr(a);
2400        ecstr(c);
2401        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2402        ecadd(np);
2403        return 1;
2404    }
2405    // c:2674-2679 — `==` STRDEQ.
2406    if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
2407        ecadd(WCB_COND(COND_STRDEQ as u32, 0));
2408        ecstr(a);
2409        ecstr(c);
2410        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2411        ecadd(np);
2412        return 1;
2413    }
2414    // c:2680-2684 — `!=` STRNEQ.
2415    if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
2416        ecadd(WCB_COND(COND_STRNEQ as u32, 0));
2417        ecstr(a);
2418        ecstr(c);
2419        let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2420        ecadd(np);
2421        return 1;
2422    }
2423    // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
2424    if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
2425        ecadd(WCB_COND(COND_REGEX as u32, 0));
2426        ecstr(a);
2427        ecstr(c);
2428        return 1;
2429    }
2430    // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
2431    if !bc.is_empty() && IS_DASH(bc[0]) {
2432        let rest: String = bc[1..].iter().collect();
2433        let t = get_cond_num(&rest);
2434        if t > -1 {
2435            ecadd(WCB_COND((t + COND_NT) as u32, 0));
2436            ecstr(a);
2437            ecstr(c);
2438            return 1;
2439        }
2440        ecadd(WCB_COND(COND_MODI as u32, 0));
2441        ecstr(b);
2442        ecstr(a);
2443        ecstr(c);
2444        return 1;
2445    }
2446    // c:2703-2707 — `-mod A B C` modular cond on `a`.
2447    let ac: Vec<char> = a.chars().collect();
2448    if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
2449        ecadd(WCB_COND(COND_MOD as u32, 2));
2450        ecstr(a);
2451        ecstr(b);
2452        ecstr(c);
2453        return 1;
2454    }
2455    crate::ported::utils::zerr(&format!("condition expected: {}", b));
2456    1
2457}
2458
2459/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
2460/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
2461pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
2462    // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
2463    // matching as par_cond_double, char-walked because Dash is a
2464    // single code point.
2465    let ac: Vec<char> = a.chars().collect();
2466    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2467        crate::ported::utils::zerr(&format!("condition expected: {}", a));
2468        return 1;
2469    }
2470    ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
2471    ecstr(a);
2472    for item in l {
2473        ecstr(item);
2474    }
2475    1
2476}
2477
2478/// Emit a parser-level error. Direct port of zsh/Src/parse.c
2479/// 2733-2766 `yyerror`. C version fills a per-event error buffer
2480/// and sets errflag. zshrs pushes onto errors which the
2481/// caller drains via parse()'s Result return.
2482/// WARNING: param-name divergence — Rust takes `&str message`, C takes
2483/// `int noerr`. The Rust callers pass user-meaningful messages
2484/// (`"missing ]]"`, `"condition expected"`); the C body collects the
2485/// offending token via `dupstring(zshlextext)` for the error string.
2486/// This Rust adapter:
2487///   1. Uses the caller-supplied message verbatim if non-empty.
2488///   2. Skips the `histdone & HISTFLAG_NOEXEC` and `errflag & ERRFLAG_INT`
2489///      gates per c:2746 (printing only when neither is set) — the
2490///      ERRFLAG_INT check is the load-bearing guard.
2491///   3. Sets ERRFLAG_ERROR per c:2753 (noerr=0 path always taken).
2492pub fn yyerror(msg: &str) {                                                  // c:2733
2493    let int_flagged = (crate::ported::utils::errflag.load(std::sync::atomic::Ordering::SeqCst)
2494        & crate::ported::zsh_h::ERRFLAG_INT) != 0;
2495    if !int_flagged {                                                        // c:2746
2496        let body = if msg.is_empty() { "parse error".to_string() }           // c:2751
2497                   else { format!("parse error: {msg}") };                   // c:2748
2498        crate::ported::utils::zwarnnam("zsh", &body);
2499    }
2500    // c:2753 — `if (!noerr && noerrs != 2) errflag |= ERRFLAG_ERROR;`
2501    crate::ported::utils::errflag.fetch_or(
2502        crate::ported::zsh_h::ERRFLAG_ERROR,
2503        std::sync::atomic::Ordering::SeqCst);
2504}
2505
2506// ============================================================
2507// Eprog runtime ops (parse.c:2767-2853)
2508//
2509// dupeprog / useeprog / freeeprog are zsh's reference-counting
2510// helpers for executable programs. zshrs's AST is owned by
2511// value (Rust ownership); cloning is a tree-deep copy via
2512// Clone, "use" is a no-op (the executor borrows the AST), and
2513// "free" is automatic on drop.
2514// ============================================================
2515
2516/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
2517/// Port of `Eprog dupeprog(Eprog p, int heap)` from
2518/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
2519/// table, and pattern-prog slots. `dummy_eprog` is returned
2520/// unchanged. `heap`-allocated copies get `nref = -1` (never
2521/// freed); real ones get `nref = 1`.
2522pub fn dupeprog(p: &crate::ported::zsh_h::eprog, heap: bool) -> crate::ported::zsh_h::eprog {
2523    // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
2524    // observable identity in C uses a pointer compare; Rust's
2525    // equivalent is "if it has the dummy's shape (single WCB_END
2526    // word and no strs), return a copy of the same shape".
2527    // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
2528    // C uses `dummy_patprog1` as a placeholder; the Rust port has
2529    // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
2530    // initialized patprog for each slot (resolved later by
2531    // pattern.c::patcompile-on-first-use).
2532    let dummy_pat = || crate::ported::zsh_h::patprog {
2533        startoff: 0,
2534        size: 0,
2535        mustoff: 0,
2536        patmlen: 0,
2537        globflags: 0,
2538        globend: 0,
2539        flags: 0,
2540        patnpar: 0,
2541        patstartch: 0,
2542    };
2543    let r = crate::ported::zsh_h::eprog {
2544        // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
2545        flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
2546        len: p.len,
2547        npats: p.npats,
2548        // c:2787 — `nref = heap ? -1 : 1;`
2549        nref: if heap { -1 } else { 1 },
2550        prog: p.prog.clone(),
2551        strs: p.strs.clone(),
2552        pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
2553        shf: None,
2554        dump: None,
2555    };
2556    r
2557}
2558
2559/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
2560/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
2561/// pin a real (non-heap, non-dummy) Eprog so it survives the
2562/// next `freeeprog`.
2563pub fn useeprog(p: &mut crate::ported::zsh_h::eprog) {
2564    // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
2565    if p.nref >= 0 {
2566        p.nref += 1; // c:2816
2567    }
2568}
2569
2570/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
2571/// Refcount-decrement; when it hits zero, drops the pattern progs,
2572/// decrements the dump refcount if any, and releases the eprog.
2573/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
2574/// never freed either — they live as long as the heap arena.
2575pub fn freeeprog(p: &mut crate::ported::zsh_h::eprog) {
2576    // c:2829 — `if (p && p != &dummy_eprog) { ... }`
2577    if p.nref > 0 {
2578        p.nref -= 1; // c:2832
2579        if p.nref == 0 {
2580            // c:2833-2840 — drop pats, dump refcount, then the eprog.
2581            // Rust's Drop handles the per-field cleanup; we just
2582            // need to decrement the dump count first.
2583            if let Some(dump) = p.dump.take() {
2584                let dumped = (*dump).clone();
2585                decrdumpcount(&dumped); // c:2837
2586            }
2587            p.prog.clear();
2588            p.strs = None;
2589            p.pats.clear();
2590        }
2591    }
2592}
2593
2594// =============================================================================
2595// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
2596// to walk a compiled Eprog without re-running the parser. These are the
2597// only `Src/parse.c` functions ported so far in this file; the recursive-
2598// descent parser (par_event / par_list / par_cmd / par_*) follows
2599// below as free fns at module scope.
2600// =============================================================================
2601
2602/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
2603/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
2604/// string pool. Returns the interned string (or a 1-3-char literal
2605/// inlined directly into the wordcode word).
2606pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
2607    let prog = &s.prog.prog;
2608    if s.pc >= prog.len() {
2609        return String::new();
2610    }
2611    let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
2612    s.pc += 1;
2613    if let Some(tf) = tokflag {
2614        *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
2615    }
2616    if c == 6 || c == 7 {
2617        // c:2861 `if (c == 6 || c == 7) r = "";`
2618        return String::new();
2619    }
2620    let r: String = if (c & 2) != 0 {
2621        // c:2862 — `else if (c & 2)`
2622        // c:2863-2868 — 3-byte inline string packed into the wordcode
2623        // word; followed by `buf[3] = '\0'; r = dupstring(buf);`.
2624        // C's `dupstring` uses `strlen(buf)` which TRUNCATES at the
2625        // first NUL byte — short strings of 1 or 2 chars get padded
2626        // with NULs and truncated cleanly. The previous Rust port
2627        // used `retain(|&x| x != 0)` which would silently SPLICE OUT
2628        // an interior NUL (e.g. `[a, 0, b]` → "ab"), diverging from
2629        // C's strlen-truncate (`[a, 0, b]` → "a"). Fix: truncate at
2630        // first NUL to match C exactly.
2631        let b0 = ((c >> 3)  & 0xff) as u8;
2632        let b1 = ((c >> 11) & 0xff) as u8;
2633        let b2 = ((c >> 19) & 0xff) as u8;
2634        let v  = [b0, b1, b2];
2635        let end = v.iter().position(|&x| x == 0).unwrap_or(v.len());        // c:2869 strlen(buf)
2636        String::from_utf8_lossy(&v[..end]).into_owned()
2637    } else {
2638        // c:2877 `else r = s->strs + (c >> 2);`
2639        let off = (c >> 2) as usize + s.strs_offset;
2640        let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
2641        if off >= strs_bytes.len() {
2642            String::new()
2643        } else {
2644            let tail = &strs_bytes[off..];
2645            let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
2646            String::from_utf8_lossy(&tail[..end]).into_owned()
2647        }
2648    };
2649    // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
2650    // Rust owns the String already; `dup` flag has no observable effect.
2651    let _ = (dup, EC_DUP, EC_NODUP);
2652    r
2653}
2654
2655// ============================================================
2656// Wordcode runtime getters (parse.c:2853-3060)
2657//
2658// Direct ports of the wordcode-read helpers (ecrawstr,
2659// ecgetstr, ecgetarr, ecgetredirs, ecgetlist, eccopyredirs).
2660// Read packed wordcode out of an Eprog at execution time.
2661// Used by exec_wordcode and the wordcode-walking dispatch in
2662// src/exec.rs.
2663// ============================================================
2664
2665/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
2666/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
2667/// without advancing — caller steps `pc` separately.
2668pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
2669    if pc >= p.prog.len() {
2670        return String::new();
2671    }
2672    let c = p.prog[pc]; // c:2894
2673    if let Some(tf) = tokflag {
2674        *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
2675    }
2676    if c == 6 || c == 7 {
2677        // c:2897
2678        return String::new();
2679    }
2680    if (c & 2) != 0 {
2681        // c:2902-2906 — same 3-byte inline string as ecgetstr, then
2682        // `buf[3] = '\0'; return dupstring(buf);` — truncate at first
2683        // NUL via strlen (NOT splice out interior NULs).
2684        let b0 = ((c >> 3)  & 0xff) as u8;
2685        let b1 = ((c >> 11) & 0xff) as u8;
2686        let b2 = ((c >> 19) & 0xff) as u8;
2687        let v  = [b0, b1, b2];
2688        let end = v.iter().position(|&x| x == 0).unwrap_or(v.len());        // c:2906 strlen(buf)
2689        String::from_utf8_lossy(&v[..end]).into_owned()
2690    } else {
2691        // c:2911
2692        let off = (c >> 2) as usize;
2693        let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
2694        if off >= strs_bytes.len() {
2695            return String::new();
2696        }
2697        let tail = &strs_bytes[off..];
2698        let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
2699        String::from_utf8_lossy(&tail[..end]).into_owned()
2700    }
2701}
2702
2703/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
2704/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
2705/// and OR-folds each entry's token flag into `*tokflag`.
2706pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
2707    let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
2708    let mut tf: i32 = 0;
2709    for _ in 0..num {
2710        // c:2924 `while (num--)`
2711        let mut tmp = 0;
2712        ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
2713        tf |= tmp; // c:2926
2714    }
2715    if let Some(out) = tokflag {
2716        // c:2929
2717        *out = tf;
2718    }
2719    ret
2720}
2721
2722/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
2723/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
2724/// `LinkList`; zshrs uses `Vec<String>` for both.
2725pub fn ecgetlist(
2726    s: &mut crate::ported::zsh_h::estate,
2727    num: usize,
2728    dup: i32,
2729    tokflag: Option<&mut i32>,
2730) -> Vec<String> {
2731    if num == 0 {
2732        // c:2949-2952
2733        if let Some(tf) = tokflag {
2734            *tf = 0;
2735        }
2736        return Vec::new();
2737    }
2738    ecgetarr(s, num, dup, tokflag)
2739}
2740
2741/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
2742///
2743/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
2744/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
2745pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
2746    let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
2747    let prog_len = s.prog.prog.len();
2748    if s.pc >= prog_len {
2749        return ret;
2750    }
2751    let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
2752    s.pc += 1;
2753
2754    loop {
2755        if wc_code(code) != WC_REDIR {
2756            // c:2988-2989 `s->pc--` then break from while
2757            s.pc = s.pc.saturating_sub(1);
2758            break;
2759        }
2760
2761        let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
2762        if s.pc >= prog_len {
2763            break;
2764        }
2765        let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
2766        s.pc += 1;
2767
2768        let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
2769
2770        let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
2771            // c:2970-2973
2772            let term = ecgetstr(s, EC_DUP, None);
2773            let munged = ecgetstr(s, EC_DUP, None);
2774            (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
2775        } else {
2776            // c:2974-2977
2777            (0, None, None)
2778        };
2779
2780        let varid = if WC_REDIR_VARID(code) != 0 {
2781            // c:2979-2980
2782            Some(ecgetstr(s, EC_DUP, None))
2783        } else {
2784            None // c:2981-2982
2785        };
2786
2787        ret.push(redir {
2788            // c:2965-2982 fields + c:2984 `addlinknode`
2789            typ,
2790            flags,
2791            fd1: fd1_w as i32,
2792            fd2: 0,
2793            name: Some(name),
2794            varid,
2795            here_terminator,
2796            munged_here_terminator,
2797        });
2798
2799        if s.pc >= prog_len {
2800            break;
2801        }
2802        code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
2803        s.pc += 1;
2804    }
2805
2806    ret // c:2990 `return ret`
2807}
2808
2809/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
2810/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
2811/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
2812/// re-emitting each redir's wordcodes into the reserved slot —
2813/// finally calls `bld_eprog(0)` to package the result as an Eprog.
2814pub fn eccopyredirs(s: &mut crate::ported::zsh_h::estate) -> Option<crate::ported::zsh_h::eprog> {
2815    let prog_len = s.prog.prog.len();
2816    if s.pc >= prog_len {
2817        return None;
2818    }
2819    // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
2820    let first_code = s.prog.prog[s.pc];
2821    if wc_code(first_code) != WC_REDIR {
2822        return None;
2823    }
2824    // c:3011 — `init_parse();`
2825    init_parse();
2826
2827    // c:3013-3027 — count wordcodes the redir run will need.
2828    // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
2829    // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
2830    // `+1` if WC_REDIR_VARID.
2831    let mut probe = s.pc;
2832    let mut ncodes = 0usize;
2833    loop {
2834        if probe >= prog_len {
2835            break;
2836        }
2837        let code = s.prog.prog[probe];
2838        if wc_code(code) != WC_REDIR {
2839            break;
2840        }
2841        let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
2842            5
2843        } else {
2844            3
2845        };
2846        if WC_REDIR_VARID(code) != 0 {
2847            ncode += 1;
2848        }
2849        probe += ncode;
2850        ncodes += ncode;
2851    }
2852
2853    // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
2854    let r0 = ECUSED.get() as usize;
2855    ecispace(r0, ncodes);
2856
2857    // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
2858    let mut r = r0;
2859    loop {
2860        if s.pc >= prog_len {
2861            break;
2862        }
2863        let code = s.prog.prog[s.pc];
2864        if wc_code(code) != WC_REDIR {
2865            break;
2866        }
2867        s.pc += 1;
2868        // c:3036 — `ecbuf[r++] = code;`
2869        ECBUF.with_borrow_mut(|buf| {
2870            if r >= buf.len() {
2871                buf.resize(r + 1, 0);
2872            }
2873            buf[r] = code;
2874        });
2875        r += 1;
2876        // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
2877        let fd1 = s.prog.prog[s.pc];
2878        s.pc += 1;
2879        ECBUF.with_borrow_mut(|buf| {
2880            if r >= buf.len() {
2881                buf.resize(r + 1, 0);
2882            }
2883            buf[r] = fd1;
2884        });
2885        r += 1;
2886        // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
2887        let name = ecgetstr(s, EC_NODUP, None);
2888        let nc = ecstrcode(&name);
2889        ECBUF.with_borrow_mut(|buf| {
2890            if r >= buf.len() {
2891                buf.resize(r + 1, 0);
2892            }
2893            buf[r] = nc;
2894        });
2895        r += 1;
2896        // c:3042-3047 — heredoc terminators.
2897        if WC_REDIR_FROM_HEREDOC(code) != 0 {
2898            let term = ecgetstr(s, EC_NODUP, None);
2899            let tc = ecstrcode(&term);
2900            ECBUF.with_borrow_mut(|buf| {
2901                if r >= buf.len() {
2902                    buf.resize(r + 1, 0);
2903                }
2904                buf[r] = tc;
2905            });
2906            r += 1;
2907            let munged = ecgetstr(s, EC_NODUP, None);
2908            let mc = ecstrcode(&munged);
2909            ECBUF.with_borrow_mut(|buf| {
2910                if r >= buf.len() {
2911                    buf.resize(r + 1, 0);
2912                }
2913                buf[r] = mc;
2914            });
2915            r += 1;
2916        }
2917        // c:3048-3049 — varid.
2918        if WC_REDIR_VARID(code) != 0 {
2919            let varid = ecgetstr(s, EC_NODUP, None);
2920            let vc = ecstrcode(&varid);
2921            ECBUF.with_borrow_mut(|buf| {
2922                if r >= buf.len() {
2923                    buf.resize(r + 1, 0);
2924                }
2925                buf[r] = vc;
2926            });
2927            r += 1;
2928        }
2929    }
2930
2931    // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
2932    // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
2933    Some(bld_eprog(false))
2934}
2935
2936/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
2937/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
2938/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
2939/// Called once at shell startup (init_main → init_misc → init_eprog).
2940pub fn init_eprog() {
2941    let mut d = DUMMY_EPROG.lock().unwrap();
2942    d.prog = vec![crate::ported::zsh_h::WCB_END()]; // c:3071/3073
2943    d.len = std::mem::size_of::<wordcode>() as i32; // c:3072
2944    d.strs = None; // c:3074
2945    d.flags = 0;
2946    d.npats = 0;
2947    d.nref = 0;
2948}
2949
2950// =====================================================================
2951// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
2952//
2953// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
2954// `mmap()` and dispatch from without re-parsing on every shell start.
2955// File layout (one struct = `FD_PRELEN` `u32`s):
2956//   - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
2957//     opposite byte-order).
2958//   - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
2959//   - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
2960//   - `pre[12]` = `fdheaderlen` (total prelude+header word count).
2961//   - Then a sequence of `struct fdhead` records, one per function,
2962//     each followed by its NUL-terminated name (padded to 4-byte).
2963//   - Then the wordcode bytes for every function back-to-back.
2964//
2965// On a little-endian host writing a dump twice: first `FD_MAGIC` for
2966// native readers, then re-walks the body byte-swapped and emits a
2967// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
2968// =====================================================================
2969
2970// File-format constants — port of `Src/parse.c:3104-3150`.
2971
2972/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
2973pub const FD_EXT: &str = ".zwc";
2974
2975/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
2976/// — `-M` mode only kicks in when the wordcode body is at least
2977/// this many bytes (otherwise read(2) is preferred).
2978pub const FD_MINMAP: usize = 4096;
2979
2980/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
2981/// length in u32 words: magic + packed-flags-byte + 10 version words.
2982pub const FD_PRELEN: usize = 12;
2983
2984/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
2985/// for native-byte-order dumps.
2986pub const FD_MAGIC: u32 = 0x04050607;
2987
2988/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
2989/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
2990pub const FD_OMAGIC: u32 = 0x07060504;
2991
2992/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
2993/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
2994pub const FDF_MAP: u32 = 1;
2995
2996/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
2997/// this dump has an opposite-byte-order copy at `fdother(f)`.
2998pub const FDF_OTHER: u32 = 2;
2999
3000/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
3001/// inside a wordcode dump. All fields are `wordcode` (u32).
3002#[allow(non_camel_case_types)]
3003#[derive(Debug, Clone, Copy)]
3004pub struct fdhead {
3005    /// Offset (in u32 words) to the start of this function's
3006    /// wordcode body inside the dump.
3007    pub start: u32, // c:3117
3008    /// Wordcode-byte length of the body (excludes pattern-prog slots).
3009    pub len: u32, // c:3118
3010    /// Number of compiled patterns the body references.
3011    pub npats: u32, // c:3119
3012    /// Offset of the string table inside `prog->prog`.
3013    pub strs: u32, // c:3120
3014    /// Header-record length in u32 words (record + name).
3015    pub hlen: u32, // c:3121
3016    /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
3017    pub flags: u32, // c:3122
3018}
3019
3020/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
3021/// flag word — `-k` ksh-style autoload marker.
3022pub const FDHF_KSHLOAD: u32 = 1;
3023
3024/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
3025/// autoload marker.
3026pub const FDHF_ZSHLOAD: u32 = 2;
3027
3028/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
3029/// per-function aggregate before write_dump emits it. The Rust
3030/// port stores the source-text body inline since the C-side
3031/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
3032/// layer yet (`build_dump` falls back to source-text caching).
3033#[allow(non_camel_case_types)]
3034#[derive(Debug, Clone)]
3035pub struct wcfunc {
3036    pub name: String, // c:3159
3037    pub flags: u32,   // c:3161
3038    /// Compiled body wordcode (one `u32` array per fn). Empty until
3039    /// the eprog emit-side lands; `write_dump` then walks each entry.
3040    pub body: Vec<u32>,
3041}
3042
3043/// Port of `dump_find_func(Wordcode h, char *name)` from
3044/// `Src/parse.c:3167`. Walks the header table inside a loaded
3045/// dump for a function with the given basename; returns true on hit.
3046pub fn dump_find_func(h: &[u32], name: &str) -> bool {
3047    // c:3167
3048    let header_words = fdheaderlen(h) as usize;
3049    let end = header_words; // walking u32 offsets, end-exclusive
3050    let mut cur = firstfdhead_offset();
3051    while cur < end {
3052        if let Some(fh) = read_fdhead(h, cur) {
3053            let full = fdname(h, cur);
3054            let tail = fdhtail(&fh) as usize;
3055            let basename = if tail <= full.len() {
3056                &full[tail..]
3057            } else {
3058                ""
3059            };
3060            if basename == name {
3061                return true;
3062            }
3063            cur = nextfdhead_offset(h, cur);
3064        } else {
3065            break;
3066        }
3067    }
3068    false
3069}
3070
3071/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
3072/// from `Src/parse.c:3180`. Validates the option set, then dispatches
3073/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
3074/// or the default (compile source files to `.zwc`).
3075pub fn bin_zcompile(
3076    nam: &str, // c:3180
3077    args: &[String],
3078    ops: &crate::ported::zsh_h::options,
3079    _func: i32,
3080) -> i32 {
3081    // c:3185-3192 — illegal-combination guard.
3082    if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
3083        || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
3084        || (OPT_ISSET(ops, b'c')
3085            && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
3086        || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
3087    {
3088        zwarnnam(nam, "illegal combination of options"); // c:3192
3089        return 1;
3090    }
3091
3092    // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
3093    if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
3094        zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
3095    }
3096
3097    // c:3196-3197 — flag word from `-k` / `-z`.
3098    let flags: u32 = if OPT_ISSET(ops, b'k') {
3099        FDHF_KSHLOAD
3100    } else if OPT_ISSET(ops, b'z') {
3101        FDHF_ZSHLOAD
3102    } else {
3103        0
3104    };
3105
3106    // c:3199 — `-t` test/list mode.
3107    if OPT_ISSET(ops, b't') {
3108        // c:3199
3109        if args.is_empty() {
3110            zwarnnam(nam, "too few arguments"); // c:3202
3111            return 1;
3112        }
3113        let dump_name = if args[0].ends_with(FD_EXT) {
3114            args[0].clone()
3115        } else {
3116            format!("{}{}", args[0], FD_EXT)
3117        };
3118        let f = match load_dump_header(nam, &dump_name, 1) {
3119            // c:3206
3120            Some(buf) => buf,
3121            None => return 1,
3122        };
3123        // c:3209 — per-function check.
3124        if args.len() > 1 {
3125            for name in &args[1..] {
3126                // c:3210
3127                if !dump_find_func(&f, name) {
3128                    // c:3212
3129                    return 1;
3130                }
3131            }
3132            return 0;
3133        }
3134        // c:3215-3221 — listing arm. Walk every fdhead, print
3135        // each function's full name. C uses `fdname(h)` which
3136        // includes the path prefix; matches our `fdname()` impl.
3137        let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
3138            "mapped"
3139        } else {
3140            "read"
3141        };
3142        println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
3143        let header_words = fdheaderlen(&f) as usize;
3144        let mut cur = firstfdhead_offset();
3145        while cur < header_words {
3146            if read_fdhead(&f, cur).is_none() {
3147                break;
3148            }
3149            println!("{}", fdname(&f, cur));
3150            cur = nextfdhead_offset(&f, cur);
3151        }
3152        return 0;
3153    }
3154
3155    if args.is_empty() {
3156        zwarnnam(nam, "too few arguments"); // c:3226
3157        return 1;
3158    }
3159
3160    // c:3228 — map mode discriminant.
3161    let map: i32 = if OPT_ISSET(ops, b'M') {
3162        2
3163    } else if OPT_ISSET(ops, b'R') {
3164        0
3165    } else {
3166        1
3167    };
3168
3169    // c:3230-3236 — single-file default-mode short path.
3170    if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
3171        let dump = format!("{}{}", args[0], FD_EXT);
3172        return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
3173    }
3174
3175    // c:3239-3247 — multi-file or `-c`/`-a` mode.
3176    let dump = if args[0].ends_with(FD_EXT) {
3177        args[0].clone()
3178    } else {
3179        format!("{}{}", args[0], FD_EXT)
3180    };
3181    let rest = &args[1..];
3182    if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
3183        let what =
3184            (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
3185        build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
3186    } else {
3187        build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
3188    }
3189}
3190
3191/// Port of `load_dump_header(char *nam, char *name, int err)` from
3192/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
3193/// and version, then slurps the full header table into memory.
3194/// Returns the header u32-array on success or None on any failure
3195/// (emitting C-shaped warnings when `err != 0`).
3196pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
3197    // c:3258
3198
3199    let mut f = match File::open(name) {
3200        // c:3263
3201        Ok(h) => h,
3202        Err(_) => {
3203            if err != 0 {
3204                zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
3205            }
3206            return None;
3207        }
3208    };
3209
3210    // Read FD_PRELEN+1 u32 words = 52 bytes.
3211    let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
3212    if f.read_exact(&mut buf_bytes).is_err() {
3213        if err != 0 {
3214            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3215        }
3216        return None;
3217    }
3218    let mut buf: Vec<u32> = buf_bytes
3219        .chunks_exact(4)
3220        .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3221        .collect();
3222
3223    // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
3224    // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
3225    let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
3226    let v_ok = fdversion(&buf) == "5.9";
3227    if !magic_ok {
3228        if err != 0 {
3229            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3230        }
3231        return None;
3232    }
3233    if !v_ok {
3234        if err != 0 {
3235            zwarnnam(
3236                nam,
3237                &format!(
3238                    "zwc file has wrong version (zsh-{}): {}", // c:3274
3239                    fdversion(&buf),
3240                    name
3241                ),
3242            );
3243        }
3244        return None;
3245    }
3246
3247    // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
3248    // Else seek to `fdother(buf)` and re-read.
3249    if fdmagic(&buf) != FD_MAGIC {
3250        let other = fdother(&buf) as u64; // c:3290
3251        if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
3252            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
3253            return None;
3254        }
3255        buf = buf_bytes
3256            .chunks_exact(4)
3257            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3258            .collect();
3259    }
3260
3261    let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
3262    if total_words < FD_PRELEN + 1 {
3263        zwarnnam(nam, &format!("invalid zwc file: {}", name));
3264        return None;
3265    }
3266
3267    // Read the remaining header words.
3268    let mut head: Vec<u32> = Vec::with_capacity(total_words);
3269    head.extend_from_slice(&buf);
3270    let remaining_words = total_words - (FD_PRELEN + 1);
3271    if remaining_words > 0 {
3272        let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
3273        if f.read_exact(&mut rest_bytes).is_err() {
3274            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
3275            return None;
3276        }
3277        for c in rest_bytes.chunks_exact(4) {
3278            head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
3279        }
3280    }
3281    Some(head) // c:3311
3282}
3283
3284/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
3285/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
3286/// opposite-byte-order copy of a wordcode dump.
3287pub fn fdswap(p: &mut [u32]) {
3288    // c:3318
3289    for w in p.iter_mut() {
3290        *w = w.swap_bytes();
3291    }
3292}
3293
3294/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
3295/// from `Src/parse.c:3334`. Writes the prelude + header records +
3296/// body wordcode bytes to the dump file descriptor.
3297///
3298/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
3299/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
3300/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
3301pub fn write_dump(
3302    dfd: &mut std::fs::File, // c:3334
3303    progs: &[wcfunc],
3304    mut map: i32,
3305    hlen: i32,
3306    tlen: i32,
3307) -> std::io::Result<()> {
3308    if map == 1 && (tlen as usize) >= FD_MINMAP {
3309        // c:3344
3310        map = 1;
3311    } else if map == 1 {
3312        map = 0;
3313    }
3314
3315    let mut other = 0u32; // c:3338
3316    let ohlen = hlen;
3317    let mut cur_hlen = hlen;
3318
3319    loop {
3320        cur_hlen = ohlen;
3321        // c:3347 — build the prelude.
3322        let mut pre = vec![0u32; FD_PRELEN];
3323        pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
3324        let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
3325        fdsetflags(&mut pre, flags as u8); // c:3351
3326        fdsetother(&mut pre, tlen as u32); // c:3352
3327                                           // c:3353 — copy ZSH_VERSION C-string into pre[2..].
3328        let ver = b"5.9";
3329        for (i, &b) in ver.iter().enumerate() {
3330            let word = 2 + i / 4;
3331            let shift = (i % 4) * 8;
3332            pre[word] |= (b as u32) << shift;
3333        }
3334        // Write prelude.
3335        for w in &pre {
3336            dfd.write_all(&w.to_le_bytes())?;
3337        }
3338        // c:3356 — per-fn header records.
3339        for wcf in progs {
3340            let n = &wcf.name;
3341            let prog = &wcf.body;
3342            let mut head = fdhead {
3343                start: cur_hlen as u32,                                     // c:3360
3344                len: (prog.len() * 4) as u32,                               // c:3363
3345                npats: 0, // c:3364 (npats not tracked yet)
3346                strs: 0,  // c:3365
3347                hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
3348                flags: 0,
3349            };
3350            cur_hlen += prog.len() as i32; // c:3361
3351                                           // c:3368 — name tail offset from path basename.
3352            let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
3353            head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
3354                                                              // c:3373 — opposite-byte-order swap on second pass.
3355            let mut head_words: Vec<u32> = vec![
3356                head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
3357            ];
3358            if other != 0 {
3359                fdswap(&mut head_words);
3360            }
3361            for w in &head_words {
3362                dfd.write_all(&w.to_le_bytes())?;
3363            }
3364            // c:3376 — write the name + NUL + pad-to-4.
3365            dfd.write_all(n.as_bytes())?;
3366            dfd.write_all(&[0u8])?;
3367            let pad = (4 - ((n.len() + 1) & 3)) & 3;
3368            if pad > 0 {
3369                dfd.write_all(&vec![0u8; pad])?;
3370            }
3371        }
3372        // c:3381 — per-fn body words.
3373        for wcf in progs {
3374            let mut body = wcf.body.clone();
3375            if other != 0 {
3376                fdswap(&mut body);
3377            }
3378            for w in &body {
3379                dfd.write_all(&w.to_le_bytes())?;
3380            }
3381        }
3382        if other != 0 {
3383            // c:3389
3384            break;
3385        }
3386        other = FDF_OTHER; // c:3391
3387    }
3388    Ok(())
3389}
3390
3391/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
3392/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
3393///
3394/// Status: scaffolded but the wordcode-emit step depends on
3395/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
3396/// npats` fields populated. The current `parse_string`/`parse` shape
3397/// emits an AST (`ZshProgram`) but not yet the wordcode array C
3398/// expects in this dump format. Until that lands, this returns 1
3399/// with a clear "wordcode emit not yet ported" message so callers
3400/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
3401pub fn build_dump(
3402    nam: &str, // c:3397
3403    dump: &str,
3404    _files: &[String],
3405    _ali: i32,
3406    _map: i32,
3407    _flags: u32,
3408) -> i32 {
3409    crate::ported::utils::zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
3410    1
3411}
3412
3413/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
3414/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
3415/// progs+names lists. Stub: `Eprog` for the function body isn't
3416/// yet wired through `shfunc.funcdef` to be serializable here.
3417pub fn cur_add_func(
3418    nam: &str, // c:3489
3419    shf_name: &str,
3420    shf_flags: i32,
3421    names: &mut Vec<String>,
3422    progs: &mut Vec<wcfunc>,
3423    hlen: &mut i32,
3424    tlen: &mut i32,
3425    what: i32,
3426) -> i32 {
3427    let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
3428    if is_undef {
3429        if (what & 2) == 0 {
3430            // c:3498
3431            zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
3432            return 1;
3433        }
3434        // c:3503 — would call `getfpfunc` to load body for dump.
3435        zwarnnam(nam, &format!("can't load function: {}", shf_name));
3436        return 1;
3437    } else if (what & 1) == 0 {
3438        zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
3439        return 1;
3440    }
3441    // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
3442    let wcf = wcfunc {
3443        name: shf_name.to_string(),
3444        flags: FDHF_ZSHLOAD,
3445        body: Vec::new(),
3446    };
3447    progs.push(wcf);
3448    names.push(shf_name.to_string());
3449
3450    // c:3526 — bump hlen / tlen.
3451    let name_words = (shf_name.len() as i32 + 4) / 4;
3452    *hlen += (FDHEAD_WORDS as i32) + name_words;
3453    *tlen += 0; // body is empty in stub; real path adds prog->len in words.
3454
3455    0
3456}
3457
3458/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
3459/// from `Src/parse.c:3536`. Compiles currently-loaded functions
3460/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
3461/// Same wordcode-emit dependency as `build_dump`.
3462pub fn build_cur_dump(
3463    nam: &str, // c:3536
3464    dump: &str,
3465    _names: &[String],
3466    _match_: i32,
3467    _map: i32,
3468    _what: i32,
3469) -> i32 {
3470    crate::ported::utils::zwarnnam(
3471        nam,
3472        &format!("{}: wordcode dump-current emit not yet ported", dump),
3473    );
3474    1
3475}
3476
3477/// Port of `zwcstat(char *filename, struct stat *buf)` from
3478/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
3479/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
3480/// suffix to keep a previous dump readable while a rewrite is in
3481/// progress).
3482pub fn zwcstat(filename: &str) -> Option<std::fs::Metadata> {
3483    // c:3656
3484    if let Ok(m) = std::fs::metadata(filename) {
3485        return Some(m);
3486    }
3487    let old = format!("{}.old", filename);
3488    std::fs::metadata(&old).ok()
3489}
3490
3491/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
3492/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
3493/// file into memory. Returns the u32 buffer or None on I/O error.
3494pub fn load_dump_file(
3495    dump: &str, // c:3675
3496    _sbuf: &std::fs::Metadata,
3497    other: i32,
3498    _len: usize,
3499) -> Option<Vec<u32>> {
3500    let mut f = File::open(dump).ok()?;
3501    if other != 0 {
3502        f.seek(SeekFrom::Start(other as u64)).ok()?;
3503    }
3504    let mut bytes = Vec::new();
3505    f.read_to_end(&mut bytes).ok()?;
3506    Some(
3507        bytes
3508            .chunks_exact(4)
3509            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3510            .collect(),
3511    )
3512}
3513
3514/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
3515/// from `Src/parse.c:3746`. Tries to load a function from a `.zwc`
3516/// in the given fpath directory. Returns `(found, ksh_load)` —
3517/// stub: returns false until the dump-cache port (`FuncDump`) lands.
3518pub fn try_dump_file(
3519    _path: &str,
3520    _name: &str,
3521    _file: &str, // c:3746
3522    _test_only: bool,
3523) -> Option<(bool, bool)> {
3524    None
3525}
3526
3527/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
3528/// Tries `source <file>` then falls back to `source <file>.zwc`.
3529/// Returns the resolved path on hit. Stub: returns None until the
3530/// dump-cache port lands.
3531pub fn try_source_file(_file: &str) -> Option<String> {
3532    // c:3795
3533    None
3534}
3535
3536/// Port of `Eprog check_dump_file(char *file, struct stat *sbuf,
3537/// char *name, int *ksh, int test_only)` from `Src/parse.c:3833`.
3538/// Walks the `dumps` mmap list looking for `(dev, ino)` matching
3539/// `sbuf`; on miss, calls `load_dump_header` to read the .zwc
3540/// header. Then `dump_find_func(d, name)` locates the function
3541/// table entry. Returns the wordcode slice + ksh-load flag.
3542///
3543/// ```c
3544/// Eprog
3545/// check_dump_file(char *file, struct stat *sbuf, char *name,
3546///                 int *ksh, int test_only)
3547/// {
3548///     int isrec = 0;
3549///     Wordcode d;
3550///     FDHead h;
3551///     FuncDump f;
3552///     struct stat lsbuf;
3553///     if (!sbuf) {
3554///         if (zwcstat(file, &lsbuf)) return NULL;
3555///         sbuf = &lsbuf;
3556///     }
3557///   rec:
3558///     d = NULL;
3559///     for (f = dumps; f; f = f->next)
3560///         if (f->dev == sbuf->st_dev && f->ino == sbuf->st_ino)
3561///             { d = f->map; break; }
3562///     if (!f && (isrec || !(d = load_dump_header(NULL, file, 0))))
3563///         return NULL;
3564///     if ((h = dump_find_func(d, name))) {
3565///         if (test_only) return &dummy_eprog;
3566///         /* allocate Eprog from f->map at h offset, incrdumpcount,
3567///            return prog */
3568///     }
3569///     return NULL;
3570/// }
3571/// ```
3572/// Rust port returns `Option<(Vec<u32>, bool)>` instead of the C
3573/// `Eprog` pointer + `*ksh` out-param: tuple element 0 is the
3574/// wordcode slice, element 1 is true if the function was a ksh-
3575/// loaded entry.
3576pub fn check_dump_file(                                                      // c:3833
3577    file: &str,
3578    sbuf: &std::fs::Metadata,
3579    name: &str,
3580    test_only: bool,
3581) -> Option<(Vec<u32>, bool)> {
3582    use std::os::unix::fs::MetadataExt;
3583
3584    // c:3842-3846 — `if (!sbuf) { zwcstat(file, &lsbuf); sbuf = &lsbuf; }`
3585    // Rust takes sbuf by &Metadata — never null.
3586    let dev = sbuf.dev();                                                    // c:3859
3587    let ino = sbuf.ino();                                                    // c:3859
3588
3589    // c:3854 — `d = NULL;`
3590    let mut d: Option<Vec<u32>> = None;
3591    let mut found_mmap = false;                                              // c:3858 `for (f = dumps; f; ...)`
3592
3593    // c:3858-3862 — walk DUMPS for matching dev/ino.
3594    {
3595        let dumps_guard = DUMPS.lock().expect("dumps poisoned");
3596        for f in dumps_guard.iter() {                                        // c:3858
3597            if f.dev == dev && f.ino == ino {                                // c:3859
3598                d = Some(f.map.clone());                                     // c:3860
3599                found_mmap = true;
3600                break;                                                       // c:3861
3601            }
3602        }
3603    }
3604
3605    // c:3870-3871 — `if (!f && (isrec || !(d = load_dump_header(NULL, file, 0)))) return NULL;`
3606    if !found_mmap {                                                         // c:3870
3607        match load_dump_header("", file, 0) {                                // c:3870 load_dump_header
3608            Some(loaded) => d = Some(loaded),
3609            None => return None,                                             // c:3871
3610        }
3611    }
3612
3613    // c:3873 — `if ((h = dump_find_func(d, name)))`
3614    let dump = d?;
3615    if !dump_find_func(&dump, name) {                                        // c:3873
3616        return None;
3617    }
3618
3619    // c:3876-3879 — `if (test_only) return &dummy_eprog;`
3620    if test_only {                                                           // c:3876
3621        return Some((Vec::new(), false));                                    // c:3879 dummy
3622    }
3623
3624    // c:3884-3953 — allocate Eprog from the mmap area + ksh detection.
3625    // The C source builds an `Eprog` struct wrapping the wordcode
3626    // slice at h's offset; the Rust port returns the slice directly
3627    // since Eprog construction lives at the call site (load_dump_file).
3628    // ksh-load detection reads the FDHF_KSHLOAD flag on the FDHead.
3629    // !!! STUB: FDHead parsing not yet wired through dump_find_func.
3630    let is_ksh_load = false;                                                 // c:3905 fdhflags(h) & FDHF_KSHLOAD
3631
3632    // c:3950 — incrdumpcount(f). The Rust incrdumpcount takes a
3633    // funcdump ref; look up the matching entry by dev/ino again.
3634    if found_mmap {
3635        let dumps_guard = DUMPS.lock().expect("dumps poisoned");
3636        if let Some(f) = dumps_guard.iter().find(|f| f.dev == dev && f.ino == ino) {
3637            incrdumpcount(f);                                                // c:3899
3638        }
3639    }
3640
3641    Some((dump, is_ksh_load))                                                // c:3953
3642}
3643
3644/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
3645/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
3646/// keys lookup by `filename` because Rust can't raw-pointer-compare
3647/// funcdump values inside a `Mutex<Vec<...>>`; same observable
3648/// effect (the count of the matching entry increments).
3649pub fn incrdumpcount(f: &crate::ported::zsh_h::funcdump) {
3650    // c:3970 — `f->count++;`
3651    if let Some(d) = DUMPS.lock().unwrap().iter_mut().find(|d| d.filename.as_deref() == f.filename.as_deref()) {
3652        d.count += 1; // c:3973
3653    }
3654}
3655
3656/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
3657/// helper for the rare external caller; locks the dumps mutex and
3658/// drops the entry with the given filename.
3659pub fn freedump(f: &crate::ported::zsh_h::funcdump) {
3660    // c:3976
3661    let mut g = DUMPS.lock().unwrap();
3662    if let Some(name) = f.filename.as_deref() {
3663        freedump_locked(&mut g, name);
3664    }
3665}
3666
3667/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
3668/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
3669pub fn decrdumpcount(f: &crate::ported::zsh_h::funcdump) {
3670    // c:3988
3671    let key = f.filename.clone();
3672    let mut g = DUMPS.lock().unwrap();
3673    let mut hit_zero: Option<String> = None;
3674    for d in g.iter_mut() {
3675        if d.filename == key {
3676            d.count -= 1; // c:3991
3677            if d.count == 0 {
3678                // c:3992
3679                hit_zero = d.filename.clone();
3680            }
3681            break;
3682        }
3683    }
3684    if let Some(name) = hit_zero {
3685        // c:3994-4001
3686        freedump_locked(&mut g, &name);
3687    }
3688}
3689
3690/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
3691/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
3692pub fn closedumps() {
3693    // c:4008
3694    let mut g = DUMPS.lock().unwrap();
3695    g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
3696}
3697
3698/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
3699/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
3700/// for autoload via `shfunctab`. Stub: returns 1 (error) until the
3701/// dump-cache port lands.
3702pub fn dump_autoload(
3703    nam: &str,
3704    file: &str, // c:4042
3705    _on: i32,
3706    _ops: &crate::ported::zsh_h::options,
3707    _func: i32,
3708) -> i32 {
3709    zwarnnam(nam, &format!("{}: zwc-based autoload not yet ported", file));
3710    1
3711}
3712
3713/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
3714/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
3715/// parse.c:447-453 including the conditional cmp chain
3716/// (nfunc → hashval → strcmp), so corpus inputs where C's eccstr BST walk
3717/// finds-or-misses match get the same outcome on the Rust side.
3718struct EccstrNode {
3719    left: Option<Box<EccstrNode>>,
3720    right: Option<Box<EccstrNode>>,
3721    /// C-byte form of the string (single byte per char ≤ 0xff).
3722    /// Owned because Rust doesn't have C zsh's "stable pointers into
3723    /// the lexer's tokstr arena" — every tokstr lives as a fresh
3724    /// Rust String allocation.
3725    str: Vec<u8>,
3726    /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
3727    /// Same shape as `Eccstr::offs` (parse.c:459).
3728    offs: u32,
3729    /// Absolute byte offset in the final strs region (= `ecsoffs` at
3730    /// insert time). C `Eccstr::aoffs` (parse.c:464). copy_ecstr uses
3731    /// THIS for the write position — distinct from `offs` which is
3732    /// ecssub-relative and collides across funcdef scopes.
3733    aoffs: u32,
3734    /// `nfunc` snapshot at insert time. Per-function namespace key
3735    /// — top-level scripts use 0; each funcdef bumps it.
3736    nfunc: i32,
3737    /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
3738    hashval: u32,
3739}
3740// === end AST relocation ===
3741
3742// Parser state lives in file-scope thread_locals:
3743//   - LEX_* (lexer side, matching Src/lex.c file-statics)
3744//   - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
3745//     ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
3746//     Src/parse.c file-statics)
3747//
3748// Callers use the free-fn entry points directly:
3749//   crate::ported::parse::parse_init(input);
3750//   let prog = crate::ported::parse::parse();
3751
3752const MAX_RECURSION_DEPTH: usize = 500;
3753
3754/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
3755/// Used by `parse_context_save` / `parse_context_restore`
3756/// (parse.c:295-355) to snapshot per-parse-call state so a nested
3757/// parse (e.g. inside command substitution) doesn't clobber the
3758/// outer parse.
3759///
3760/// A second port of `struct parse_stack` exists at
3761/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
3762/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
3763/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
3764/// wires wordcode emission. This local version uses the working-set
3765/// shapes (`Vec<HereDoc>`, stubbed wordcode fields) suited to zshrs's
3766/// pre-wordcode AST architecture; the consolidation happens in P9b.
3767#[allow(non_camel_case_types)]
3768#[derive(Debug, Default, Clone)]
3769pub struct parse_stack {
3770    // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
3771    /// Pending heredocs awaiting body collection (canonical C
3772    /// linked-list shape). C: `struct heredocs *hdocs` (zsh.h:3100).
3773    /// Mirrors `parse::HDOCS` thread_local across nested parses.
3774    pub hdocs: Option<Box<crate::ported::zsh_h::heredocs>>,
3775    /// !!! WARNING: NOT IN PARSE_STACK — Rust-only AST-glue !!!
3776    /// Snapshot of `lex::LEX_HEREDOCS` (the parallel Rust-only Vec
3777    /// carrying terminator / strip_tabs / quoted metadata).
3778    /// Saved/restored alongside the canonical `hdocs` so nested
3779    /// parses get a clean AST view. C's parse_stack has no analog
3780    /// because C tracks terminator metadata implicitly via tokstr.
3781    pub lex_heredocs: Vec<HereDoc>,
3782    /// C: `int incmdpos` (zsh.h:3102).
3783    pub incmdpos: bool,
3784    /// C: `int aliasspaceflag` (zsh.h:3103).
3785    pub aliasspaceflag: i32,
3786    /// C: `int incond` (zsh.h:3104).
3787    pub incond: i32,
3788    /// C: `int inredir` (zsh.h:3105).
3789    pub inredir: bool,
3790    /// C: `int incasepat` (zsh.h:3106).
3791    pub incasepat: i32,
3792    /// C: `int isnewlin` (zsh.h:3107).
3793    pub isnewlin: i32,
3794    /// C: `int infor` (zsh.h:3108).
3795    pub infor: i32,
3796    /// C: `int inrepeat_` (zsh.h:3109).
3797    pub inrepeat_: i32,
3798    /// C: `int intypeset` (zsh.h:3110).
3799    pub intypeset: bool,
3800    // ── Wordcode-buffer state — STUB until Phase 9b ──
3801    // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
3802    // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
3803    // zshrs hasn't emitted wordcode yet — these fields exist to
3804    // preserve the C shape but read/write nothing until P9b lands.
3805    pub eclen: i32,
3806    pub ecused: i32,
3807    pub ecnpats: i32,
3808    pub ecbuf: Option<Vec<u32>>,
3809    pub ecstrs: Option<Vec<u8>>,
3810    pub ecsoffs: i32,
3811    pub ecssub: i32,
3812    pub ecnfunc: i32,
3813}
3814
3815// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
3816// existing call sites (context.rs) keep resolving until the
3817// rename ripples through.
3818#[allow(non_camel_case_types)]
3819pub type ParseStack = parse_stack;
3820
3821/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
3822/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
3823/// builtin.c when clearing a stale autoload stub. Held in a Mutex
3824/// so `init_eprog` can set it once at shell startup.
3825pub static DUMMY_EPROG: std::sync::Mutex<crate::ported::zsh_h::eprog> =
3826    std::sync::Mutex::new(crate::ported::zsh_h::eprog {
3827        flags: 0,
3828        len: 0,
3829        npats: 0,
3830        nref: 0,
3831        prog: Vec::new(),
3832        strs: None,
3833        pats: Vec::new(),
3834        shf: None,
3835        dump: None,
3836    });
3837
3838/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
3839/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
3840/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
3841/// during scanning (in source order).
3842fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
3843    for list in &mut prog.lists {
3844        fill_in_sublist(&mut list.sublist, bodies);
3845    }
3846}
3847
3848fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
3849    fill_in_pipe(&mut sub.pipe, bodies);
3850    if let Some(next) = &mut sub.next {
3851        fill_in_sublist(&mut next.1, bodies);
3852    }
3853}
3854
3855fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
3856    fill_in_command(&mut pipe.cmd, bodies);
3857    if let Some(next) = &mut pipe.next {
3858        fill_in_pipe(next, bodies);
3859    }
3860}
3861
3862fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
3863    match cmd {
3864        ZshCommand::Simple(s) => {
3865            for r in &mut s.redirs {
3866                if let Some(idx) = r.heredoc_idx {
3867                    if let Some(info) = bodies.get(idx) {
3868                        r.heredoc = Some(info.clone());
3869                    }
3870                }
3871            }
3872        }
3873        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
3874        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
3875        ZshCommand::If(i) => {
3876            fill_heredoc_bodies(&mut i.cond, bodies);
3877            fill_heredoc_bodies(&mut i.then, bodies);
3878            for (c, b) in &mut i.elif {
3879                fill_heredoc_bodies(c, bodies);
3880                fill_heredoc_bodies(b, bodies);
3881            }
3882            if let Some(e) = &mut i.else_ {
3883                fill_heredoc_bodies(e, bodies);
3884            }
3885        }
3886        ZshCommand::While(w) | ZshCommand::Until(w) => {
3887            fill_heredoc_bodies(&mut w.cond, bodies);
3888            fill_heredoc_bodies(&mut w.body, bodies);
3889        }
3890        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
3891        ZshCommand::Case(c) => {
3892            for arm in &mut c.arms {
3893                fill_heredoc_bodies(&mut arm.body, bodies);
3894            }
3895        }
3896        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
3897        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
3898        ZshCommand::Try(t) => {
3899            fill_heredoc_bodies(&mut t.try_block, bodies);
3900            fill_heredoc_bodies(&mut t.always, bodies);
3901        }
3902        ZshCommand::Redirected(inner, redirs) => {
3903            for r in redirs {
3904                if let Some(idx) = r.heredoc_idx {
3905                    if let Some(info) = bodies.get(idx) {
3906                        r.heredoc = Some(info.clone());
3907                    }
3908                }
3909            }
3910            fill_in_command(inner, bodies);
3911        }
3912        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
3913    }
3914}
3915
3916/// If `list` is a Simple containing one word that ends in the
3917/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
3918/// return the bare name. Used by `parse_program_until` to detect
3919/// `name() {body}` style function definitions where the lexer
3920/// hasn't split the `()` from the name.
3921/// Detect the `name() …` shape inside a Simple. Returns the function
3922/// name and (when the body was already inlined into the same Simple,
3923/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
3924/// Returns None for non-funcdef shapes.
3925fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
3926    if list.flags.async_ || list.sublist.next.is_some() {
3927        return None;
3928    }
3929    let pipe = &list.sublist.pipe;
3930    if pipe.next.is_some() {
3931        return None;
3932    }
3933    let simple = match &pipe.cmd {
3934        ZshCommand::Simple(s) => s,
3935        _ => return None,
3936    };
3937    if simple.words.is_empty() || !simple.assigns.is_empty() {
3938        return None;
3939    }
3940    let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
3941                                 // Find the FIRST word ending in `()`. zsh accepts the
3942                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
3943                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
3944                                 // words[i] is `lastname()`. Words after are the body argv
3945                                 // (one-line shorthand, `name() cmd args`).
3946    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
3947    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
3948    for w in &simple.words[..par_idx] {
3949        // Earlier names must be bare identifiers, NOT contain
3950        // tokens that imply they're not function names (no `()`,
3951        // no quotes, no expansions). zsh's lexer enforces this
3952        // at the wordlist level; we approximate by requiring the
3953        // word be an identifier-shaped token after untokenize.
3954        let bare = super::lex::untokenize(w);
3955        let valid = !bare.is_empty()
3956            && bare
3957                .chars()
3958                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
3959        if !valid {
3960            return None;
3961        }
3962        names.push(bare);
3963    }
3964    let last = &simple.words[par_idx];
3965    let bare = &last[..last.len() - suffix.len()];
3966    if bare.is_empty() {
3967        return None;
3968    }
3969    names.push(super::lex::untokenize(bare));
3970    let rest = simple.words[par_idx + 1..].to_vec();
3971    Some((names, rest))
3972}
3973
3974/// Initialize parser state for a fresh parse of `input`.
3975/// Free-fn entry point — resets parser thread_locals and loads input.
3976pub fn parse_init(input: &str) {
3977    // Seed the option defaults the parser/lexer inspect. Real zsh
3978    // installs these via `install_emulation_defaults` (options.c:172)
3979    // at shell startup; zshrs's parse-only test entry path bypasses
3980    // init_main, so we mirror the `zsh` emulation defaults here.
3981    // Only seeds when unset so a script that explicitly disabled an
3982    // option stays so.
3983    for (name, default) in [
3984        ("shortloops", true),
3985        ("shortrepeat", false),
3986        ("multifuncdef", true),
3987        ("aliasfuncdef", false),
3988        ("ignorebraces", false),
3989        ("cshjunkieloops", false),
3990        ("posixbuiltins", false),
3991        ("execopt", true),
3992        ("kshautoload", false),
3993        ("aliases", true),
3994    ] {
3995        if crate::ported::options::opt_state_get(name).is_none() {
3996            crate::ported::options::opt_state_set(name, default);
3997        }
3998    }
3999    lex_init(input);
4000}
4001
4002/// P9b decoder (wordcode-pipeline variant): direct port of
4003/// `ecgetstr(Estate s, int dup, int *tokflag)` from
4004/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
4005/// encoded string back to owned String. Returns (string,
4006/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
4007/// takes a separate strs buffer for text.rs) — this variant uses
4008/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
4009pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
4010    if pc >= buf.len() {
4011        return (String::new(), pc);
4012    }
4013    let c = buf[pc];
4014    let next = pc + 1;
4015    // parse.c:2862-2863 — empty-string sentinels.
4016    if c == 6 || c == 7 {
4017        return (String::new(), next);
4018    }
4019    // parse.c:2864-2871 — inline-packed short string.
4020    if (c & 2) != 0 {
4021        let b0 = ((c >> 3) & 0xff) as u8;
4022        let b1 = ((c >> 11) & 0xff) as u8;
4023        let b2 = ((c >> 19) & 0xff) as u8;
4024        let mut bytes: Vec<u8> = Vec::new();
4025        for b in [b0, b1, b2] {
4026            if b == 0 {
4027                break;
4028            }
4029            bytes.push(b);
4030        }
4031        return (String::from_utf8_lossy(&bytes).into_owned(), next);
4032    }
4033    // parse.c:2872-2873 — long string via offs lookup. Map value is
4034    // metafied Vec<u8>; convert back to display String. Unmetafy is
4035    // the caller's job (the wordcode-parity dumper does it; other
4036    // callers may want raw bytes).
4037    let s = ECSTRS_REVERSE
4038        .with_borrow(|m| m.get(&c).cloned())
4039        .map(|v| String::from_utf8_lossy(&v).into_owned())
4040        .unwrap_or_default();
4041    (s, next)
4042}
4043
4044/// Parse the complete input. Direct port of `parse_event` /
4045/// `par_list` from `Src/parse.c:614-720`. On syntax error,
4046/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
4047/// partial program — callers check `errflag` to detect failure,
4048/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
4049pub fn parse() -> ZshProgram {
4050    zshlex();
4051
4052    let mut program = parse_program_until(None);
4053
4054    // Post-pass: wire heredoc bodies (collected by the inline NEWLIN
4055    // walk in zshlex into LEX_HEREDOCS) back into ZshRedir.heredoc
4056    // fields via heredoc_idx. No C analog — LEX_HEREDOCS is the
4057    // Rust-only AST-glue Vec.
4058    let bodies: Vec<HereDocInfo> = crate::ported::lex::LEX_HEREDOCS
4059        .with_borrow(|v| v.clone())
4060        .into_iter()
4061        .map(|h| HereDocInfo {
4062            content: h.content,
4063            terminator: h.terminator,
4064            quoted: h.quoted,
4065        })
4066        .collect();
4067    if !bodies.is_empty() {
4068        fill_heredoc_bodies(&mut program, &bodies);
4069    }
4070
4071    program
4072}
4073
4074/// Wordcode-emission top-level driver. Closest C analog is
4075/// `parse_list(void)` at `Src/parse.c:697-712`: init_parse +
4076/// zshlex + par_list(&c) + bld_eprog. This entry omits init_parse
4077/// and bld_eprog (caller responsibilities) and inlines a guard
4078/// loop around par_list_wordcode for cases where the lexer leaves
4079/// a non-ENDINPUT terminator (LEXERR, missing close-token, etc.).
4080pub fn par_event_wordcode() -> usize {
4081    let start = ECUSED.get() as usize;
4082    // C `parse_list` (parse.c:697-712) calls par_list ONCE — par_list's
4083    // own goto-rec loop handles all SEPER-separated sublists. The
4084    // outer loop here exists for safety against early-return cases
4085    // (LEXERR, missing terminator) but normally par_list_wordcode
4086    // consumes everything in one call.
4087    let mut cmplx: i32 = 0;
4088    while tok() != ENDINPUT && tok() != LEXERR {
4089        par_list_wordcode(&mut cmplx);
4090        match tok() {
4091            SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
4092                zshlex();
4093            }
4094            _ => break,
4095        }
4096    }
4097    // parse.c:712 — `ecadd(WCB_END());`
4098    ecadd(crate::ported::zsh_h::WCB_END());
4099    start
4100}
4101
4102/// Port of `par_list(int *cmplx)` from `Src/parse.c:769-803`.
4103/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
4104/// True line-by-line port: takes `cmplx: &mut i32` matching C's
4105/// `int *cmplx` out-parameter, uses stack-local `c` per iteration
4106/// like C (so inner sublist cmplx is independent of outer).
4107pub fn par_list_wordcode(cmplx: &mut i32) {
4108    // c:773 — `int p, lp = -1, c;`
4109    let mut p: usize;
4110    let mut lp: i32 = -1;
4111    let mut c: i32;
4112    loop {
4113        // c:775 `rec:` — c:777-778 `while (tok == SEPER) zshlex();`
4114        while tok() == SEPER {
4115            zshlex();
4116        }
4117        // c:780 — `p = ecadd(0);`
4118        p = ecadd(0);
4119        // c:781 — `c = 0;`
4120        c = 0;
4121        // c:783 — `if (par_sublist(&c)) { ... }`
4122        if par_sublist_wordcode(&mut c) {
4123            // c:784 — `*cmplx |= c;`
4124            *cmplx |= c;
4125            // c:785 — `if (tok == SEPER || tok == AMPER || tok == AMPERBANG)`
4126            let t = tok();
4127            if t == SEPER || t == AMPER || t == AMPERBANG {
4128                // c:786-787 — `if (tok != SEPER) *cmplx = 1;`
4129                if t != SEPER {
4130                    *cmplx = 1;
4131                }
4132                // c:788-790 — `set_list_code(p, ..., c);`
4133                let z = if t == SEPER {
4134                    Z_SYNC
4135                } else if t == AMPER {
4136                    Z_ASYNC
4137                } else {
4138                    Z_ASYNC | Z_DISOWN
4139                };
4140                set_list_code(p, z, c != 0);
4141                // c:791 — `incmdpos = 1;`
4142                set_incmdpos(true);
4143                // c:792-794 — `do { zshlex(); } while (tok == SEPER);`
4144                loop {
4145                    zshlex();
4146                    if tok() != SEPER {
4147                        break;
4148                    }
4149                }
4150                // c:795 — `lp = p;` c:796 — `goto rec;`
4151                lp = p as i32;
4152                continue;
4153            } else {
4154                // c:798 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4155                set_list_code(p, Z_SYNC | Z_END, c != 0);
4156            }
4157        } else {
4158            // c:800-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
4159            ECUSED.set((ECUSED.get() - 1).max(0));
4160            if lp >= 0 {
4161                ECBUF.with_borrow_mut(|b| {
4162                    if (lp as usize) < b.len() {
4163                        b[lp as usize] |= wc_bdata(Z_END as wordcode);
4164                    }
4165                });
4166            }
4167        }
4168        break;
4169    }
4170}
4171
4172/// Port of `par_list1(int *cmplx)` from `Src/parse.c:806-817`.
4173/// Single-sublist variant used by funcdef bodies and the short
4174/// `for`/`while`/`repeat` forms — exactly one sublist with
4175/// `Z_SYNC|Z_END`, no chain.
4176pub fn par_list1_wordcode(cmplx: &mut i32) {
4177    // c:810 — `int p = ecadd(0), c = 0;`
4178    let p = ecadd(0);
4179    let mut c: i32 = 0;
4180    // c:812 — `if (par_sublist(&c)) { ... }`
4181    if par_sublist_wordcode(&mut c) {
4182        // c:813 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4183        set_list_code(p, Z_SYNC | Z_END, c != 0);
4184        // c:814 — `*cmplx |= c;`
4185        *cmplx |= c;
4186    } else {
4187        // c:816 — `ecused--;`
4188        ECUSED.set((ECUSED.get() - 1).max(0));
4189    }
4190}
4191
4192/// Port of `par_save_list(C)` macro from `Src/parse.c:475-480`.
4193///   do { int eu = ecused; par_list(C); if (eu == ecused) ecadd(WCB_END()); } while (0)
4194pub fn par_save_list_wordcode(cmplx: &mut i32) {
4195    let eu = ECUSED.get();
4196    par_list_wordcode(cmplx);
4197    if ECUSED.get() == eu {
4198        ecadd(WCB_END());
4199    }
4200}
4201
4202/// Port of `par_save_list1(C)` macro from `Src/parse.c:481-486`.
4203pub fn par_save_list1_wordcode(cmplx: &mut i32) {
4204    let eu = ECUSED.get();
4205    par_list1_wordcode(cmplx);
4206    if ECUSED.get() == eu {
4207        ecadd(WCB_END());
4208    }
4209}
4210
4211/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
4212/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
4213/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
4214/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
4215/// or DAMPER (`&&`) recursively. Returns true if at least one
4216/// pipeline was emitted.
4217pub fn par_sublist_wordcode(cmplx: &mut i32) -> bool {
4218    // c:827 — `int f, p, c = 0;`
4219    let mut c: i32 = 0;
4220    // c:829 — `p = ecadd(0);`
4221    let p = ecadd(0);
4222    // c:831 — `if ((f = par_sublist2(&c)) != -1) { ... }`
4223    match par_sublist2(&mut c) {
4224        Some(f) => {
4225            // c:832 — `int e = ecused;`
4226            let e = ECUSED.get() as usize;
4227            // c:834 — `*cmplx |= c;`
4228            *cmplx |= c;
4229            if tok() == DBAR || tok() == DAMPER {
4230                // c:836 — `enum lextok qtok = tok;`
4231                let qtok = tok();
4232                // c:839 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
4233                cmdpush(if qtok == DBAR {
4234                    CS_CMDOR as u8
4235                } else {
4236                    CS_CMDAND as u8
4237                });
4238                // c:840 — `zshlex();`
4239                zshlex();
4240                // c:841-842 — `while (tok == SEPER) zshlex();`
4241                while tok() == SEPER {
4242                    zshlex();
4243                }
4244                // c:843 — `sl = par_sublist(cmplx);`
4245                let sl = par_sublist_wordcode(cmplx);
4246                // c:844-847 — `set_sublist_code(p, (sl ? ... : WC_SUBLIST_END),
4247                // f, (e - 1 - p), c);`
4248                let st = if sl {
4249                    if qtok == DBAR {
4250                        WC_SUBLIST_OR
4251                    } else {
4252                        WC_SUBLIST_AND
4253                    }
4254                } else {
4255                    WC_SUBLIST_END
4256                };
4257                set_sublist_code(p, st as i32, f, (e - 1 - p) as i32, c != 0);
4258                // c:848 — `cmdpop();`
4259                cmdpop();
4260            } else {
4261                // c:850-853 — `if (tok == AMPER || tok == AMPERBANG)
4262                // { c = 1; *cmplx |= c; }`
4263                if tok() == AMPER || tok() == AMPERBANG {
4264                    c = 1;
4265                    *cmplx |= c;
4266                }
4267                // c:854 — `set_sublist_code(p, WC_SUBLIST_END, f,
4268                // (e - 1 - p), c);`
4269                set_sublist_code(p, WC_SUBLIST_END as i32, f, (e - 1 - p) as i32, c != 0);
4270            }
4271            // c:856 — `return 1;`
4272            true
4273        }
4274        None => {
4275            // c:858-859 — `ecused--; return 0;`
4276            ECUSED.set((ECUSED.get() - 1).max(0));
4277            false
4278        }
4279    }
4280}
4281
4282/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
4283/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
4284/// WCB_PIPE header (mid for chain links, end for the last cmd)
4285/// plus the optional BARAMP `2>&1` synthetic redir.
4286/// Port of `par_pline(int *cmplx)` from `Src/parse.c:893-947`.
4287/// (Named `par_pipe_wordcode` to disambiguate from the AST
4288/// `par_pline` at parse.rs:3744 — semantically the same `pline`
4289/// production.)
4290pub fn par_pipe_wordcode(cmplx: &mut i32) -> bool {
4291    // c:897 — `zlong line = toklineno;`
4292    let line = toklineno() as i64;
4293    // c:899 — `p = ecadd(0);`
4294    let p = ecadd(0);
4295    // c:901-904 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
4296    if !par_cmd_wordcode(cmplx, 0) {
4297        ECUSED.set((ECUSED.get() - 1).max(0));
4298        return false;
4299    }
4300    if tok() == BAR_TOK {
4301        // c:906 — `*cmplx = 1;`
4302        *cmplx = 1;
4303        // c:907 — `cmdpush(CS_PIPE);`
4304        cmdpush(CS_PIPE as u8);
4305        // c:908 — `zshlex();`
4306        zshlex();
4307        // c:909-910 — `while (tok == SEPER) zshlex();`
4308        while tok() == SEPER {
4309            zshlex();
4310        }
4311        // c:911 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
4312        ECBUF.with_borrow_mut(|b| {
4313            if p < b.len() {
4314                b[p] = WCB_PIPE(
4315                    WC_PIPE_MID,
4316                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4317                );
4318            }
4319        });
4320        // c:912 — `ecispace(p+1, 1);`
4321        ecispace(p + 1, 1);
4322        // c:913 — `ecbuf[p+1] = ecused - 1 - p;`
4323        let used = ECUSED.get() as usize;
4324        ECBUF.with_borrow_mut(|b| {
4325            if p + 1 < b.len() {
4326                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4327            }
4328        });
4329        // c:914-916 — `if (!par_pline(cmplx)) { tok = LEXERR; }`
4330        if !par_pipe_wordcode(cmplx) {
4331            set_tok(LEXERR);
4332        }
4333        // c:917 — `cmdpop();`
4334        cmdpop();
4335        true
4336    } else if tok() == BARAMP {
4337        // c:920-923 — walk past inline WC_REDIR to find r.
4338        let mut r = p + 1;
4339        loop {
4340            let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
4341            if wc_code(code) != WC_REDIR {
4342                break;
4343            }
4344            r += WC_REDIR_WORDS(code) as usize;
4345        }
4346        // c:925-928 — `ecispace(r, 3);` + synthetic `2>&1` redir
4347        ecispace(r, 3);
4348        ECBUF.with_borrow_mut(|b| {
4349            if r + 2 < b.len() {
4350                b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
4351                b[r + 1] = 2;
4352                b[r + 2] = ecstrcode("1");
4353            }
4354        });
4355        // c:930 — `*cmplx = 1;`
4356        *cmplx = 1;
4357        cmdpush(CS_ERRPIPE as u8);
4358        zshlex();
4359        while tok() == SEPER {
4360            zshlex();
4361        }
4362        ECBUF.with_borrow_mut(|b| {
4363            if p < b.len() {
4364                b[p] = WCB_PIPE(
4365                    WC_PIPE_MID,
4366                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4367                );
4368            }
4369        });
4370        ecispace(p + 1, 1);
4371        let used = ECUSED.get() as usize;
4372        ECBUF.with_borrow_mut(|b| {
4373            if p + 1 < b.len() {
4374                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4375            }
4376        });
4377        if !par_pipe_wordcode(cmplx) {
4378            set_tok(LEXERR);
4379        }
4380        cmdpop();
4381        true
4382    } else {
4383        // c:944 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
4384        ECBUF.with_borrow_mut(|b| {
4385            if p < b.len() {
4386                b[p] = WCB_PIPE(
4387                    WC_PIPE_END,
4388                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4389                );
4390            }
4391        });
4392        true
4393    }
4394}
4395
4396/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
4397/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
4398/// dispatches on the current token to the right par_* builder.
4399/// Returns false only when no command was emitted (no redirs +
4400/// par_simple returned 0).
4401/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
4402/// `Src/parse.c:957-1077`.
4403pub fn par_cmd_wordcode(cmplx: &mut i32, zsh_construct: i32) -> bool {
4404    // c:960 — `int r, nr = 0;`
4405    let mut nr: i32 = 0;
4406    // c:962 — `r = ecused;`
4407    let mut r: usize = ECUSED.get() as usize;
4408    // c:964-968 — leading redirs.
4409    if IS_REDIROP(tok()) {
4410        // c:965 — `*cmplx = 1;`
4411        *cmplx = 1;
4412        // c:966-968 — `while (IS_REDIROP(tok)) { nr += par_redir(&r, NULL); }`
4413        while IS_REDIROP(tok()) {
4414            nr += par_redir_wordcode(&mut r);
4415        }
4416    }
4417    // c:970-1066 — token-dispatch switch.
4418    match tok() {
4419        FOR => {
4420            cmdpush(CS_FOR as u8);
4421            par_for_wordcode(cmplx);
4422            cmdpop();
4423        }
4424        FOREACH => {
4425            cmdpush(CS_FOREACH as u8);
4426            par_for_wordcode(cmplx);
4427            cmdpop();
4428        }
4429        SELECT => {
4430            // c:982 — `*cmplx = 1;`
4431            *cmplx = 1;
4432            cmdpush(CS_SELECT as u8);
4433            par_for_wordcode(cmplx);
4434            cmdpop();
4435        }
4436        CASE => {
4437            cmdpush(CS_CASE as u8);
4438            par_case_wordcode(cmplx);
4439            cmdpop();
4440        }
4441        IF => {
4442            par_if_wordcode(cmplx);
4443        }
4444        WHILE => {
4445            cmdpush(CS_WHILE as u8);
4446            par_while_wordcode(cmplx);
4447            cmdpop();
4448        }
4449        UNTIL => {
4450            cmdpush(CS_UNTIL as u8);
4451            par_while_wordcode(cmplx);
4452            cmdpop();
4453        }
4454        REPEAT => {
4455            cmdpush(CS_REPEAT as u8);
4456            par_repeat_wordcode(cmplx);
4457            cmdpop();
4458        }
4459        INPAR_TOK => {
4460            // c:1011 — `*cmplx = 1;`
4461            *cmplx = 1;
4462            cmdpush(CS_SUBSH as u8);
4463            par_subsh_wordcode_impl(cmplx, zsh_construct);
4464            cmdpop();
4465        }
4466        INBRACE_TOK => {
4467            cmdpush(CS_CURSH as u8);
4468            par_subsh_wordcode_impl(cmplx, zsh_construct);
4469            cmdpop();
4470        }
4471        FUNC => {
4472            cmdpush(CS_FUNCDEF as u8);
4473            par_funcdef_wordcode(cmplx);
4474            cmdpop();
4475        }
4476        DINBRACK => {
4477            cmdpush(CS_COND as u8);
4478            par_cond_wordcode();
4479            cmdpop();
4480        }
4481        DINPAR => {
4482            par_arith_wordcode();
4483        }
4484        TIME => {
4485            // c:1037-1050 — `static int inpartime` guard so
4486            // `time time foo` doesn't recurse infinitely.
4487            if !PARSER_INPARTIME.with(|c| c.get()) {
4488                // c:1041 — `*cmplx = 1;`
4489                *cmplx = 1;
4490                PARSER_INPARTIME.with(|c| c.set(true));
4491                par_time_wordcode();
4492                PARSER_INPARTIME.with(|c| c.set(false));
4493            } else {
4494                set_tok(STRING_LEX);
4495                let sr = par_simple_wordcode_impl(cmplx, nr);
4496                if sr == 0 && nr == 0 {
4497                    return false;
4498                }
4499                if sr > 1 {
4500                    *cmplx = 1;
4501                    r += (sr - 1) as usize;
4502                }
4503            }
4504        }
4505        _ => {
4506            // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
4507            let sr = par_simple_wordcode_impl(cmplx, nr);
4508            if sr == 0 {
4509                if nr == 0 {
4510                    return false;
4511                }
4512            } else if sr > 1 {
4513                // c:1060-1061 — `*cmplx = 1; r += sr - 1;`
4514                *cmplx = 1;
4515                r += (sr - 1) as usize;
4516            }
4517        }
4518    }
4519    // c:1067-1071 — trailing redirs.
4520    // c:1067 — `if (IS_REDIROP(tok)) { *cmplx = 1; while (...) (void)par_redir(&r, NULL); }`
4521    if IS_REDIROP(tok()) {
4522        *cmplx = 1;
4523        while IS_REDIROP(tok()) {
4524            let _ = par_redir_wordcode(&mut r);
4525        }
4526    }
4527    // c:1072-1075 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
4528    set_incmdpos(true);
4529    set_incasepat(0);
4530    set_incond(0);
4531    set_intypeset(false);
4532    let _ = r;
4533    // c:1076 — `return 1;`
4534    true
4535}
4536
4537/// Port of `par_for(int *cmplx)` from `Src/parse.c:1086-1198`.
4538pub fn par_for_wordcode(cmplx: &mut i32) {
4539    // c:1089 — `int oecused = ecused, csh = (tok == FOREACH), p, sel = (tok == SELECT);`
4540    let _oecused = ECUSED.get() as usize;
4541    let csh = tok() == FOREACH;
4542    let sel = tok() == SELECT;
4543    let p: usize;
4544    // c:1090 — `int type;`
4545    let r#type: wordcode;
4546
4547    // c:1092 — `p = ecadd(0);`
4548    p = ecadd(0);
4549
4550    // c:1094 — `incmdpos = 0;`
4551    set_incmdpos(false);
4552    // c:1095 — `infor = tok == FOR ? 2 : 0;`
4553    set_infor(if tok() == FOR { 2 } else { 0 });
4554    // c:1096 — `zshlex();`
4555    zshlex();
4556    // c:1097 — `if (tok == DINPAR) {`
4557    if tok() == DINPAR {
4558        // c:1098 — `zshlex();`
4559        zshlex();
4560        // c:1099-1100 — `if (tok != DINPAR) YYERRORV(oecused);`
4561        if tok() != DINPAR {
4562            crate::ported::utils::zerr("par_for: expected init");
4563            return;
4564        }
4565        // c:1101 — `ecstr(tokstr);`
4566        ecstr(&tokstr().unwrap_or_default());
4567        // c:1102 — `zshlex();`
4568        zshlex();
4569        // c:1103-1104
4570        if tok() != DINPAR {
4571            crate::ported::utils::zerr("par_for: expected cond");
4572            return;
4573        }
4574        // c:1105
4575        ecstr(&tokstr().unwrap_or_default());
4576        // c:1106
4577        zshlex();
4578        // c:1107-1108
4579        if tok() != DOUTPAR {
4580            crate::ported::utils::zerr("par_for: expected ))");
4581            return;
4582        }
4583        // c:1109
4584        ecstr(&tokstr().unwrap_or_default());
4585        // c:1110 — `infor = 0;`
4586        set_infor(0);
4587        // c:1111 — `incmdpos = 1;`
4588        set_incmdpos(true);
4589        // c:1112 — `zshlex();`
4590        zshlex();
4591        // c:1113 — `type = WC_FOR_COND;`
4592        r#type = WC_FOR_COND;
4593    } else {
4594        // c:1115 — `int np = 0, n, posix_in, ona = noaliases, onc = nocorrect;`
4595        let mut np: usize = 0;
4596        let mut n: u32;
4597        let posix_in: bool;
4598        let ona = noaliases();
4599        let onc = nocorrect();
4600        // c:1116 — `infor = 0;`
4601        set_infor(0);
4602        // c:1117-1118 — `if (tok != STRING || !isident(tokstr)) YYERRORV(oecused);`
4603        if tok() != STRING_LEX
4604            || !crate::ported::utils::isident(&tokstr().unwrap_or_default())
4605        {
4606            crate::ported::utils::zerr("par_for: expected identifier");
4607            return;
4608        }
4609        // c:1119-1120 — `if (!sel) np = ecadd(0);`
4610        if !sel {
4611            np = ecadd(0);
4612        }
4613        // c:1121 — `n = 0;`
4614        n = 0;
4615        // c:1122 — `incmdpos = 1;`
4616        set_incmdpos(true);
4617        // c:1123 — `noaliases = nocorrect = 1;`
4618        set_noaliases(true);
4619        set_nocorrect(1);
4620        // c:1124 — `for (;;) {`
4621        loop {
4622            // c:1125 — `n++;`
4623            n += 1;
4624            // c:1126 — `ecstr(tokstr);`
4625            ecstr(&tokstr().unwrap_or_default());
4626            // c:1127 — `zshlex();`
4627            zshlex();
4628            // c:1128-1129 — `if (tok != STRING || !strcmp(tokstr, "in") || sel) break;`
4629            if tok() != STRING_LEX
4630                || tokstr().as_deref() == Some("in")
4631                || sel
4632            {
4633                break;
4634            }
4635            // c:1130-1135 — `if (!isident(tokstr) || errflag) { ... YYERRORV; }`
4636            if !crate::ported::utils::isident(&tokstr().unwrap_or_default())
4637                || (crate::ported::utils::errflag.load(std::sync::atomic::Ordering::Relaxed) & 1) != 0
4638            {
4639                set_noaliases(ona);
4640                set_nocorrect(onc);
4641                crate::ported::utils::zerr("par_for: expected identifier in name list");
4642                return;
4643            }
4644        }
4645        // c:1137-1138 — `noaliases = ona; nocorrect = onc;`
4646        set_noaliases(ona);
4647        set_nocorrect(onc);
4648        // c:1139-1140 — `if (!sel) ecbuf[np] = n;`
4649        if !sel {
4650            ECBUF.with_borrow_mut(|b| {
4651                b[np] = n;
4652            });
4653        }
4654        // c:1141 — `posix_in = isnewlin;`
4655        posix_in = isnewlin() != 0;
4656        // c:1142-1143 — `while (isnewlin) zshlex();`
4657        while isnewlin() != 0 {
4658            zshlex();
4659        }
4660        // c:1144 — `if (tok == STRING && !strcmp(tokstr, "in")) {`
4661        if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
4662            // c:1145 — `incmdpos = 0;`
4663            set_incmdpos(false);
4664            // c:1146 — `zshlex();`
4665            zshlex();
4666            // c:1147 — `np = ecadd(0);`
4667            np = ecadd(0);
4668            // c:1148 — `n = par_wordlist();`
4669            let n2 = par_wordlist_wordcode();
4670            // c:1149-1150 — `if (tok != SEPER) YYERRORV(oecused);`
4671            if tok() != SEPER {
4672                crate::ported::utils::zerr("par_for: expected separator after `in`");
4673                return;
4674            }
4675            // c:1151 — `ecbuf[np] = n;`
4676            ECBUF.with_borrow_mut(|b| {
4677                b[np] = n2 as wordcode;
4678            });
4679            // c:1152 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
4680            r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
4681        } else if !posix_in && tok() == INPAR_TOK {
4682            // c:1153-1154 — `else if (!posix_in && tok == INPAR)`
4683            // c:1154 — `incmdpos = 0;`
4684            set_incmdpos(false);
4685            // c:1155 — `zshlex();`
4686            zshlex();
4687            // c:1156 — `np = ecadd(0);`
4688            np = ecadd(0);
4689            // c:1157 — `n = par_nl_wordlist();`
4690            let n2 = par_nl_wordlist_wordcode();
4691            // c:1158-1159 — `if (tok != OUTPAR) YYERRORV(oecused);`
4692            if tok() != OUTPAR_TOK {
4693                crate::ported::utils::zerr("par_for: expected `)`");
4694                return;
4695            }
4696            // c:1160 — `ecbuf[np] = n;`
4697            ECBUF.with_borrow_mut(|b| {
4698                b[np] = n2 as wordcode;
4699            });
4700            // c:1161 — `incmdpos = 1;`
4701            set_incmdpos(true);
4702            // c:1162 — `zshlex();`
4703            zshlex();
4704            // c:1163 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
4705            r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
4706        } else {
4707            // c:1165 — `type = (sel ? WC_SELECT_PPARAM : WC_FOR_PPARAM);`
4708            r#type = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
4709        }
4710        let _ = np;
4711    }
4712    // c:1167 — `incmdpos = 1;`
4713    set_incmdpos(true);
4714    // c:1168-1169 — `while (tok == SEPER) zshlex();`
4715    while tok() == SEPER {
4716        zshlex();
4717    }
4718    // c:1170-1193 — body dispatch (inline in C, factored here for
4719    // reuse by par_while/par_repeat — same control flow, same calls).
4720    par_loop_body_wordcode(cmplx, csh);
4721    // c:1195-1197 — `ecbuf[p] = (sel ? WCB_SELECT(...) : WCB_FOR(...));`
4722    let used = ECUSED.get() as usize;
4723    let off = used.saturating_sub(1 + p) as wordcode;
4724    ECBUF.with_borrow_mut(|b| {
4725        b[p] = if sel {
4726            WCB_SELECT(r#type, off)
4727        } else {
4728            WCB_FOR(r#type, off)
4729        };
4730    });
4731}
4732
4733/// Port of `par_wordlist(void)` from `Src/parse.c:2361-2371` —
4734/// emits wordcode form. Returns the number of strings emitted.
4735fn par_wordlist_wordcode() -> u32 {
4736    // c:2364 — `int num = 0;`
4737    let mut num: u32 = 0;
4738    // c:2365 — `while (tok == STRING) {`
4739    while tok() == STRING_LEX {
4740        // c:2366 — `ecstr(tokstr);`
4741        ecstr(&tokstr().unwrap_or_default());
4742        // c:2367 — `num++;`
4743        num += 1;
4744        // c:2368 — `zshlex();`
4745        zshlex();
4746    }
4747    // c:2370 — `return num;`
4748    num
4749}
4750
4751/// Port of `par_nl_wordlist(void)` from `Src/parse.c:2378-2390` —
4752/// emits wordcode form. Like par_wordlist but tolerates SEPER
4753/// between words.
4754fn par_nl_wordlist_wordcode() -> u32 {
4755    // c:2381 — `int num = 0;`
4756    let mut num: u32 = 0;
4757    // c:2383 — `while (tok == STRING || tok == SEPER) {`
4758    while tok() == STRING_LEX || tok() == SEPER || tok() == NEWLIN {
4759        // c:2384-2387 — `if (tok != SEPER) { ecstr(tokstr); num++; }`
4760        if tok() == STRING_LEX {
4761            ecstr(&tokstr().unwrap_or_default());
4762            num += 1;
4763        }
4764        // c:2388 — `zshlex();`
4765        zshlex();
4766    }
4767    // c:2390 — `return num;`
4768    num
4769}
4770
4771/// Body dispatch shared by par_for / par_while / par_repeat.
4772/// Direct port of `Src/parse.c:1170-1194`.
4773fn par_loop_body_wordcode(cmplx: &mut i32, csh: bool) {
4774    if tok() == DOLOOP {
4775        zshlex();
4776        // c:1172 — `par_save_list(cmplx);`
4777        par_save_list_wordcode(cmplx);
4778        if tok() != DONE {
4779            crate::ported::utils::zerr("missing `done`");
4780            return;
4781        }
4782        set_incmdpos(false);
4783        zshlex();
4784    } else if tok() == INBRACE_TOK {
4785        zshlex();
4786        // c:1179 — `par_save_list(cmplx);`
4787        par_save_list_wordcode(cmplx);
4788        if tok() != OUTBRACE_TOK {
4789            crate::ported::utils::zerr("missing `}`");
4790            return;
4791        }
4792        set_incmdpos(false);
4793        zshlex();
4794    } else if csh || isset(CSHJUNKIELOOPS) {
4795        // c:1185 — `par_save_list(cmplx);`
4796        par_save_list_wordcode(cmplx);
4797        if tok() != ZEND {
4798            crate::ported::utils::zerr("missing `end`");
4799            return;
4800        }
4801        set_incmdpos(false);
4802        zshlex();
4803    } else if unset(SHORTLOOPS) {
4804        crate::ported::utils::zerr("short loop form requires SHORTLOOPS");
4805    } else {
4806        // c:1193 — `par_save_list1(cmplx);`
4807        par_save_list1_wordcode(cmplx);
4808    }
4809}
4810
4811/// `select` shares par_for body (c:983-985 routes SELECT to par_for).
4812pub fn par_select_wordcode(cmplx: &mut i32) {
4813    par_for_wordcode(cmplx);
4814}
4815
4816/// Port of `par_case(int *cmplx)` from `Src/parse.c:1208-1400`.
4817pub fn par_case_wordcode(_cmplx: &mut i32) {
4818    // c:1211 — `int oecused = ecused, brflag, p, pp, palts, type, nalts;`
4819    let _oecused = ECUSED.get() as usize;
4820    let brflag: bool;
4821    let p: usize;
4822    let mut pp: usize;
4823    let mut palts: usize;
4824    let mut r#type: wordcode;
4825    let mut nalts: u32;
4826    // c:1212 — `int ona, onc;`
4827    let ona: bool;
4828    let onc: i32;
4829
4830    // c:1214 — `p = ecadd(0);`
4831    p = ecadd(0);
4832
4833    // c:1216 — `incmdpos = 0;`
4834    set_incmdpos(false);
4835    // c:1217 — `zshlex();`
4836    zshlex();
4837    // c:1218-1219 — `if (tok != STRING) YYERRORV(oecused);`
4838    if tok() != STRING_LEX {
4839        crate::ported::utils::zerr("par_case: expected scrutinee");
4840        return;
4841    }
4842    // c:1220 — `ecstr(tokstr);`
4843    ecstr(&tokstr().unwrap_or_default());
4844
4845    // c:1222 — `incmdpos = 1;`
4846    set_incmdpos(true);
4847    // c:1223-1224 — `ona = noaliases; onc = nocorrect;`
4848    ona = noaliases();
4849    onc = nocorrect();
4850    // c:1225 — `noaliases = nocorrect = 1;`
4851    set_noaliases(true);
4852    set_nocorrect(1);
4853    // c:1226 — `zshlex();`
4854    zshlex();
4855    // c:1227-1228 — `while (tok == SEPER) zshlex();`
4856    while tok() == SEPER {
4857        zshlex();
4858    }
4859    // c:1229 — `if (!(tok == STRING && !strcmp(tokstr, "in")) && tok != INBRACE)`
4860    if !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) && tok() != INBRACE_TOK {
4861        // c:1231-1233 — restore noaliases/nocorrect + ERROR
4862        set_noaliases(ona);
4863        set_nocorrect(onc);
4864        crate::ported::utils::zerr("par_case: expected `in` or `{`");
4865        return;
4866    }
4867    // c:1235 — `brflag = (tok == INBRACE);`
4868    brflag = tok() == INBRACE_TOK;
4869    // c:1236 — `incasepat = 1;`
4870    set_incasepat(1);
4871    // c:1237 — `incmdpos = 0;`
4872    set_incmdpos(false);
4873    // c:1238-1239 — `noaliases = ona; nocorrect = onc;`
4874    set_noaliases(ona);
4875    set_nocorrect(onc);
4876    // c:1240 — `zshlex();`
4877    zshlex();
4878
4879    // c:1242 — `for (;;) {`
4880    'arms: loop {
4881        // c:1243 — `char *str;`
4882        let mut str: String;
4883        // c:1244 — `int skip_zshlex;`
4884        let skip_zshlex: bool;
4885
4886        // c:1246-1247 — `while (tok == SEPER) zshlex();`
4887        while tok() == SEPER {
4888            zshlex();
4889        }
4890        // c:1248-1249 — `if (tok == OUTBRACE) break;`
4891        if tok() == OUTBRACE_TOK {
4892            break 'arms;
4893        }
4894        // c:1250-1251 — `if (tok == INPAR) zshlex();`
4895        if tok() == INPAR_TOK {
4896            zshlex();
4897        }
4898        // c:1252-1254 — `if (tok == BAR) { str = ""; skip_zshlex = 1; }`
4899        if tok() == BAR_TOK {
4900            str = String::new();
4901            skip_zshlex = true;
4902        } else {
4903            // c:1256-1257 — `if (tok != STRING) YYERRORV(oecused);`
4904            if tok() != STRING_LEX {
4905                crate::ported::utils::zerr("par_case: expected pattern");
4906                return;
4907            }
4908            // c:1258-1259 — `if (!strcmp(tokstr, "esac")) break;`
4909            if tokstr().as_deref() == Some("esac") {
4910                break 'arms;
4911            }
4912            // c:1260 — `str = dupstring(tokstr);`
4913            str = tokstr().unwrap_or_default();
4914            // c:1261 — `skip_zshlex = 0;`
4915            skip_zshlex = false;
4916        }
4917        // c:1263 — `type = WC_CASE_OR;`
4918        r#type = WC_CASE_OR;
4919        // c:1264-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
4920        pp = ecadd(0);
4921        palts = ecadd(0);
4922        nalts = 0;
4923        // c:1300 — `incasepat = -1;`
4924        set_incasepat(-1);
4925        // c:1301 — `incmdpos = 1;`
4926        set_incmdpos(true);
4927        // c:1302-1303 — `if (!skip_zshlex) zshlex();`
4928        if !skip_zshlex {
4929            zshlex();
4930        }
4931        // c:1304 — `for (;;) {`
4932        loop {
4933            // c:1305-1313 — `if (tok == OUTPAR) { ecstr(str);
4934            //   ecadd(ecnpats++); nalts++; incasepat = 0;
4935            //   incmdpos = 1; zshlex(); break; }`
4936            if tok() == OUTPAR_TOK {
4937                ecstr(&str);
4938                let np = ECNPATS.with(|cc| {
4939                    let v = cc.get();
4940                    cc.set(v + 1);
4941                    v
4942                }) as u32;
4943                ecadd(np);
4944                nalts += 1;
4945                set_incasepat(0);
4946                set_incmdpos(true);
4947                zshlex();
4948                break;
4949            }
4950            // c:1314-1320 — `else if (tok == BAR) { ecstr(str);
4951            //   ecadd(ecnpats++); nalts++; incasepat = 1;
4952            //   incmdpos = 0; }`
4953            else if tok() == BAR_TOK {
4954                ecstr(&str);
4955                let np = ECNPATS.with(|cc| {
4956                    let v = cc.get();
4957                    cc.set(v + 1);
4958                    v
4959                }) as u32;
4960                ecadd(np);
4961                nalts += 1;
4962                set_incasepat(1);
4963                set_incmdpos(false);
4964            }
4965            // c:1321-1357 — else { ... `(...)` whole-pattern hack
4966            // (Inpar at str[0]); else YYERRORV. Not yet ported —
4967            // err out on unexpected. }
4968            else {
4969                crate::ported::utils::zerr("par_case: expected `)` or `|`");
4970                return;
4971            }
4972
4973            // c:1359 — `zshlex();`
4974            zshlex();
4975            // c:1360-1377 — switch on next tok.
4976            match tok() {
4977                STRING_LEX => {
4978                    // c:1361-1365
4979                    str = tokstr().unwrap_or_default();
4980                    zshlex();
4981                }
4982                OUTPAR_TOK | BAR_TOK => {
4983                    // c:1367-1371 — empty string
4984                    str = String::new();
4985                }
4986                _ => {
4987                    // c:1374-1376 — `YYERRORV(oecused);`
4988                    crate::ported::utils::zerr("par_case: expected pattern, `)` or `|`");
4989                    return;
4990                }
4991            }
4992        }
4993        // c:1379 — `incasepat = 0;`
4994        set_incasepat(0);
4995        // c:1380 — `par_save_list(cmplx);`
4996        par_save_list_wordcode(_cmplx);
4997        // c:1381-1384 — terminator → arm type
4998        if tok() == SEMIAMP {
4999            r#type = WC_CASE_AND;
5000        } else if tok() == SEMIBAR {
5001            r#type = WC_CASE_TESTAND;
5002        }
5003        // c:1385 — `ecbuf[pp] = WCB_CASE(type, ecused - 1 - pp);`
5004        let used = ECUSED.get() as usize;
5005        ECBUF.with_borrow_mut(|b| {
5006            b[pp] = WCB_CASE(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5007        });
5008        // c:1386 — `ecbuf[palts] = nalts;`
5009        ECBUF.with_borrow_mut(|b| {
5010            b[palts] = nalts;
5011        });
5012        // c:1387-1388 — terminator (ESAC w/o brace OR OUTBRACE w/ brace) → break
5013        if (tok() == ESAC && !brflag) || (tok() == OUTBRACE_TOK && brflag) {
5014            break 'arms;
5015        }
5016        // c:1389-1390 — `if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR) YYERRORV;`
5017        if tok() != DSEMI && tok() != SEMIAMP && tok() != SEMIBAR {
5018            crate::ported::utils::zerr("par_case: expected `;;`, `;&`, or `;|`");
5019            return;
5020        }
5021        // c:1391 — `incasepat = 1;`
5022        set_incasepat(1);
5023        // c:1392 — `incmdpos = 0;`
5024        set_incmdpos(false);
5025        // c:1393 — `zshlex();`
5026        zshlex();
5027    }
5028    // c:1395 — `incmdpos = 1;`
5029    set_incmdpos(true);
5030    // c:1396 — `incasepat = 0;`
5031    set_incasepat(0);
5032    // c:1397 — `zshlex();`
5033    zshlex();
5034
5035    // c:1399 — `ecbuf[p] = WCB_CASE(WC_CASE_HEAD, ecused - 1 - p);`
5036    let used = ECUSED.get() as usize;
5037    ECBUF.with_borrow_mut(|b| {
5038        b[p] = WCB_CASE(WC_CASE_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5039    });
5040}
5041
5042/// Port of `par_if(int *cmplx)` from `Src/parse.c:1410-1512`.
5043pub fn par_if_wordcode(cmplx: &mut i32) {
5044    // c:1413 — `int oecused = ecused, p, pp, type, usebrace = 0;`
5045    let _oecused = ECUSED.get() as usize;
5046    let p: usize;
5047    let mut pp: usize = 0;
5048    let mut r#type: wordcode = WC_IF_IF;
5049    let mut usebrace: i32 = 0;
5050    // c:1414 — `enum lextok xtok;`
5051    let mut xtok: lextok;
5052    // c:1415 — `unsigned char nc;`
5053    let nc: u8;
5054    let _ = nc;
5055
5056    // c:1417 — `p = ecadd(0);`
5057    p = ecadd(0);
5058
5059    // c:1419 — `for (;;) {`
5060    loop {
5061        // c:1420 — `xtok = tok;`
5062        xtok = tok();
5063        // c:1421 — `cmdpush(xtok == IF ? CS_IF : CS_ELIF);`
5064        cmdpush(if xtok == IF { CS_IF as u8 } else { CS_ELIF as u8 });
5065        // c:1422-1426 — `if (xtok == FI) { incmdpos = 0; zshlex(); break; }`
5066        if xtok == FI {
5067            set_incmdpos(false);
5068            zshlex();
5069            break;
5070        }
5071        // c:1427 — `zshlex();`
5072        zshlex();
5073        // c:1428-1429 — `if (xtok == ELSE) break;`
5074        if xtok == ELSE {
5075            break;
5076        }
5077        // c:1430-1431 — `while (tok == SEPER) zshlex();`
5078        while tok() == SEPER {
5079            zshlex();
5080        }
5081        // c:1432-1435 — `if (!(xtok == IF || xtok == ELIF)) { cmdpop(); YYERRORV; }`
5082        if !(xtok == IF || xtok == ELIF) {
5083            cmdpop();
5084            crate::ported::utils::zerr("par_if: expected `if` or `elif`");
5085            return;
5086        }
5087        // c:1436 — `pp = ecadd(0);`
5088        pp = ecadd(0);
5089        // c:1437 — `type = (xtok == IF ? WC_IF_IF : WC_IF_ELIF);`
5090        r#type = if xtok == IF { WC_IF_IF } else { WC_IF_ELIF };
5091        // c:1438 — `par_save_list(cmplx);` — condition body
5092        par_save_list_wordcode(cmplx);
5093        // c:1439 — `incmdpos = 1;`
5094        set_incmdpos(true);
5095        // c:1440-1443 — `if (tok == ENDINPUT) { cmdpop(); YYERRORV; }`
5096        if tok() == ENDINPUT {
5097            cmdpop();
5098            crate::ported::utils::zerr("par_if: unexpected end-of-input after condition");
5099            return;
5100        }
5101        // c:1444-1445 — `while (tok == SEPER) zshlex();`
5102        while tok() == SEPER {
5103            zshlex();
5104        }
5105        // c:1446 — `xtok = FI;` — pre-set so the post-loop check works
5106        xtok = FI;
5107        // c:1447 — `nc = cmdstack[cmdsp - 1] == CS_IF ? CS_IFTHEN : CS_ELIFTHEN;`
5108        // (Not tracked separately in zshrs cmdstack — derive from cur top
5109        // by reading CMDSTACK; for safety use CS_IFTHEN as default.)
5110        // We don't have a way to read top easily — match by tracking
5111        // whether we just pushed CS_IF or CS_ELIF.
5112        // For wordcode emission this only affects cmdstack debug output;
5113        // not the emitted wordcode. Use CS_IFTHEN.
5114        let nc_local: u8 = CS_IFTHEN as u8;
5115        if tok() == THEN {
5116            // c:1448-1456 — THEN branch
5117            // c:1449 — `usebrace = 0;`
5118            usebrace = 0;
5119            // c:1450 — `cmdpop();`
5120            cmdpop();
5121            // c:1451 — `cmdpush(nc);`
5122            cmdpush(nc_local);
5123            // c:1452 — `zshlex();`
5124            zshlex();
5125            // c:1453 — `par_save_list(cmplx);` — then body
5126            par_save_list_wordcode(cmplx);
5127            // c:1454 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5128            let used = ECUSED.get() as usize;
5129            ECBUF.with_borrow_mut(|b| {
5130                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5131            });
5132            // c:1455 — `incmdpos = 1;`
5133            set_incmdpos(true);
5134            // c:1456 — `cmdpop();`
5135            cmdpop();
5136        } else if tok() == INBRACE_TOK {
5137            // c:1457-1473 — INBRACE branch
5138            // c:1458 — `usebrace = 1;`
5139            usebrace = 1;
5140            // c:1459 — `cmdpop();`
5141            cmdpop();
5142            // c:1460 — `cmdpush(nc);`
5143            cmdpush(nc_local);
5144            // c:1461 — `zshlex();`
5145            zshlex();
5146            // c:1462 — `par_save_list(cmplx);`
5147            par_save_list_wordcode(cmplx);
5148            // c:1463-1466 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5149            if tok() != OUTBRACE_TOK {
5150                cmdpop();
5151                crate::ported::utils::zerr("par_if: expected `}`");
5152                return;
5153            }
5154            // c:1467 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5155            let used = ECUSED.get() as usize;
5156            ECBUF.with_borrow_mut(|b| {
5157                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5158            });
5159            // c:1469 — `zshlex();`
5160            zshlex();
5161            // c:1470 — `incmdpos = 1;`
5162            set_incmdpos(true);
5163            // c:1471-1472 — `if (tok == SEPER) break;`
5164            if tok() == SEPER {
5165                break;
5166            }
5167            // c:1473 — `cmdpop();`
5168            cmdpop();
5169        } else if unset(SHORTLOOPS) {
5170            // c:1474-1476 — `cmdpop(); YYERRORV;`
5171            cmdpop();
5172            crate::ported::utils::zerr("par_if: short body requires SHORTLOOPS");
5173            return;
5174        } else {
5175            // c:1477-1484 — short loop form
5176            // c:1478 — `cmdpop();`
5177            cmdpop();
5178            // c:1479 — `cmdpush(nc);`
5179            cmdpush(nc_local);
5180            // c:1480 — `par_save_list1(cmplx);`
5181            par_save_list1_wordcode(cmplx);
5182            // c:1481 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5183            let used = ECUSED.get() as usize;
5184            ECBUF.with_borrow_mut(|b| {
5185                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5186            });
5187            // c:1482 — `incmdpos = 1;`
5188            set_incmdpos(true);
5189            // c:1483 — `break;`
5190            break;
5191        }
5192    }
5193    // c:1486 — `cmdpop();`
5194    cmdpop();
5195    // c:1487 — `if (xtok == ELSE || tok == ELSE) {`
5196    if xtok == ELSE || tok() == ELSE {
5197        // c:1488 — `pp = ecadd(0);`
5198        pp = ecadd(0);
5199        // c:1489 — `cmdpush(CS_ELSE);`
5200        cmdpush(CS_ELSE as u8);
5201        // c:1490-1491 — `while (tok == SEPER) zshlex();`
5202        while tok() == SEPER {
5203            zshlex();
5204        }
5205        // c:1492-1498 — `if (tok == INBRACE && usebrace) { ... } else { ... }`
5206        if tok() == INBRACE_TOK && usebrace != 0 {
5207            // c:1493 — `zshlex();`
5208            zshlex();
5209            // c:1494 — `par_save_list(cmplx);`
5210            par_save_list_wordcode(cmplx);
5211            // c:1495-1498 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5212            if tok() != OUTBRACE_TOK {
5213                cmdpop();
5214                crate::ported::utils::zerr("par_if: else expected `}`");
5215                return;
5216            }
5217        } else {
5218            // c:1500 — `par_save_list(cmplx);`
5219            par_save_list_wordcode(cmplx);
5220            // c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`
5221            if tok() != FI {
5222                cmdpop();
5223                crate::ported::utils::zerr("par_if: else expected `fi`");
5224                return;
5225            }
5226        }
5227        // c:1506 — `incmdpos = 0;`
5228        set_incmdpos(false);
5229        // c:1507 — `ecbuf[pp] = WCB_IF(WC_IF_ELSE, ecused - 1 - pp);`
5230        let used = ECUSED.get() as usize;
5231        ECBUF.with_borrow_mut(|b| {
5232            b[pp] = WCB_IF(WC_IF_ELSE, (used.saturating_sub(1 + pp)) as wordcode);
5233        });
5234        // c:1508 — `zshlex();`
5235        zshlex();
5236        // c:1509 — `cmdpop();`
5237        cmdpop();
5238    }
5239    // c:1511 — `ecbuf[p] = WCB_IF(WC_IF_HEAD, ecused - 1 - p);`
5240    let used = ECUSED.get() as usize;
5241    ECBUF.with_borrow_mut(|b| {
5242        b[p] = WCB_IF(WC_IF_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5243    });
5244}
5245
5246/// Port of `par_while(int *cmplx)` from `Src/parse.c:1520-1557`.
5247pub fn par_while_wordcode(cmplx: &mut i32) {
5248    // c:1523 — `int oecused = ecused, p;`
5249    let _oecused = ECUSED.get() as usize;
5250    let p: usize;
5251    // c:1524 — `int type = (tok == UNTIL ? WC_WHILE_UNTIL : WC_WHILE_WHILE);`
5252    let r#type: wordcode = if tok() == UNTIL {
5253        WC_WHILE_UNTIL
5254    } else {
5255        WC_WHILE_WHILE
5256    };
5257
5258    // c:1526 — `p = ecadd(0);`
5259    p = ecadd(0);
5260    // c:1527 — `zshlex();`
5261    zshlex();
5262    // c:1528 — `par_save_list(cmplx);` — condition.
5263    par_save_list_wordcode(cmplx);
5264    // c:1529 — `incmdpos = 1;`
5265    set_incmdpos(true);
5266    // c:1530-1531 — `while (tok == SEPER) zshlex();`
5267    while tok() == SEPER {
5268        zshlex();
5269    }
5270    // c:1532-1545 — body dispatch (inlined in C; we factor via
5271    // par_loop_body_wordcode since for/while/repeat share this
5272    // identical block).
5273    if tok() == DOLOOP {
5274        // c:1533 — `zshlex();`
5275        zshlex();
5276        // c:1534 — `par_save_list(cmplx);`
5277        par_save_list_wordcode(cmplx);
5278        // c:1535-1536 — `if (tok != DONE) YYERRORV(oecused);`
5279        if tok() != DONE {
5280            crate::ported::utils::zerr("par_while: expected `done`");
5281            return;
5282        }
5283        // c:1537 — `incmdpos = 0;`
5284        set_incmdpos(false);
5285        // c:1538 — `zshlex();`
5286        zshlex();
5287    } else if tok() == INBRACE_TOK {
5288        // c:1540 — `zshlex();`
5289        zshlex();
5290        // c:1541 — `par_save_list(cmplx);`
5291        par_save_list_wordcode(cmplx);
5292        // c:1542-1543 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5293        if tok() != OUTBRACE_TOK {
5294            crate::ported::utils::zerr("par_while: expected `}`");
5295            return;
5296        }
5297        // c:1544 — `incmdpos = 0;`
5298        set_incmdpos(false);
5299        // c:1545 — `zshlex();`
5300        zshlex();
5301    } else if isset(CSHJUNKIELOOPS) {
5302        // c:1546-1550
5303        par_save_list_wordcode(cmplx);
5304        if tok() != ZEND {
5305            crate::ported::utils::zerr("par_while: expected `end`");
5306            return;
5307        }
5308        zshlex();
5309    } else if unset(SHORTLOOPS) {
5310        // c:1551-1552 — `YYERRORV(oecused);`
5311        crate::ported::utils::zerr("par_while: short body requires SHORTLOOPS");
5312        return;
5313    } else {
5314        // c:1554 — `par_save_list1(cmplx);`
5315        par_save_list1_wordcode(cmplx);
5316    }
5317
5318    // c:1556 — `ecbuf[p] = WCB_WHILE(type, ecused - 1 - p);`
5319    let used = ECUSED.get() as usize;
5320    ECBUF.with_borrow_mut(|b| {
5321        b[p] = WCB_WHILE(r#type, (used.saturating_sub(1 + p)) as wordcode);
5322    });
5323}
5324
5325/// `until` shares par_while body — tok==UNTIL flips the type.
5326pub fn par_until_wordcode(cmplx: &mut i32) {
5327    par_while_wordcode(cmplx);
5328}
5329
5330/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1564-1606`.
5331pub fn par_repeat_wordcode(cmplx: &mut i32) {
5332    // c:1567 — `/* ### what to do about inrepeat_ here? */`
5333    // c:1568 — `int oecused = ecused, p;`
5334    let _oecused = ECUSED.get() as usize;
5335    let p: usize;
5336
5337    // c:1570 — `p = ecadd(0);`
5338    p = ecadd(0);
5339
5340    // c:1572 — `incmdpos = 0;`
5341    set_incmdpos(false);
5342    // c:1573 — `zshlex();`
5343    zshlex();
5344    // c:1574-1575 — `if (tok != STRING) YYERRORV(oecused);`
5345    if tok() != STRING_LEX {
5346        crate::ported::utils::zerr("par_repeat: expected count");
5347        return;
5348    }
5349    // c:1576 — `ecstr(tokstr);`
5350    ecstr(&tokstr().unwrap_or_default());
5351    // c:1577 — `incmdpos = 1;`
5352    set_incmdpos(true);
5353    // c:1578 — `zshlex();`
5354    zshlex();
5355    // c:1579-1580 — `while (tok == SEPER) zshlex();`
5356    while tok() == SEPER {
5357        zshlex();
5358    }
5359    // c:1581-1604 — body dispatch (inlined here matching C exactly).
5360    if tok() == DOLOOP {
5361        // c:1582-1587
5362        zshlex();
5363        par_save_list_wordcode(cmplx);
5364        if tok() != DONE {
5365            crate::ported::utils::zerr("par_repeat: expected `done`");
5366            return;
5367        }
5368        set_incmdpos(false);
5369        zshlex();
5370    } else if tok() == INBRACE_TOK {
5371        // c:1589-1594
5372        zshlex();
5373        par_save_list_wordcode(cmplx);
5374        if tok() != OUTBRACE_TOK {
5375            crate::ported::utils::zerr("par_repeat: expected `}`");
5376            return;
5377        }
5378        set_incmdpos(false);
5379        zshlex();
5380    } else if isset(CSHJUNKIELOOPS) {
5381        // c:1596-1599
5382        par_save_list_wordcode(cmplx);
5383        if tok() != ZEND {
5384            crate::ported::utils::zerr("par_repeat: expected `end`");
5385            return;
5386        }
5387        zshlex();
5388    } else if unset(SHORTLOOPS) && unset(SHORTREPEAT) {
5389        // c:1601-1602 — par_repeat needs BOTH SHORTLOOPS and SHORTREPEAT
5390        // unset to refuse short form (more permissive than par_while).
5391        crate::ported::utils::zerr("par_repeat: short body requires SHORTLOOPS or SHORTREPEAT");
5392        return;
5393    } else {
5394        // c:1604 — `par_save_list1(cmplx);`
5395        par_save_list1_wordcode(cmplx);
5396    }
5397
5398    // c:1606 — `ecbuf[p] = WCB_REPEAT(ecused - 1 - p);`
5399    let used = ECUSED.get() as usize;
5400    ECBUF.with_borrow_mut(|b| {
5401        b[p] = WCB_REPEAT((used.saturating_sub(1 + p)) as wordcode);
5402    });
5403}
5404
5405/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1779`.
5406///
5407/// The `function NAME { ... }` form. Emits a WCB_FUNCDEF header
5408/// followed by a names-count slot, the names themselves, four
5409/// metadata slots (string-area start, string-area length, npats,
5410/// do_tracing), then the body wordcode, then WCB_END.
5411///
5412/// Critical: saves/resets `ecnpats` + `ecssub` + `ecsoffs` around
5413/// the body parse so per-function pattern counts don't leak into
5414/// the enclosing scope's `ecnpats` accumulator (parse.c:1723-1758).
5415pub fn par_funcdef_wordcode(cmplx: &mut i32) {
5416    // c:1674 — `int oecused = ecused, num = 0, onp, p, c = 0;`
5417    let _oecused = ECUSED.get() as usize;
5418    let mut num: i32 = 0;
5419    let onp: i32;
5420    let p: usize;
5421    let mut c: i32 = 0;
5422    // c:1675 — `int so, oecssub = ecssub;`
5423    let so: i32;
5424    let oecssub = ECSSUB.get();
5425    // c:1676 — `zlong oldlineno = lineno;`
5426    let oldlineno = lineno();
5427    // c:1677 — `int do_tracing = 0;`
5428    let mut do_tracing: i32 = 0;
5429
5430    // c:1679 — `lineno = 0;`
5431    set_lineno(0);
5432    // c:1680 — `nocorrect = 1;`
5433    set_nocorrect(1);
5434    // c:1681 — `incmdpos = 0;`
5435    set_incmdpos(false);
5436    // c:1682 — `zshlex();`
5437    zshlex();
5438
5439    // c:1684 — `p = ecadd(0);`
5440    p = ecadd(0);
5441    // c:1685 — `ecadd(0); /* p + 1 */`
5442    let p1 = ecadd(0);
5443
5444    // c:1687-1699 — `Consume an initial (-T), (--), or (-T --).`
5445    // c:1690 — `if (tok == STRING && tokstr[0] == Dash) {`
5446    if tok() == STRING_LEX {
5447        let s = tokstr().unwrap_or_default();
5448        let bytes = s.as_bytes();
5449        // C: `tokstr[0] == Dash` (Dash = 0x9b = 0xc2 0x9b in UTF-8).
5450        // First byte of UTF-8 `\u{9b}` is 0xc2; the char `'-'` is 0x2d.
5451        // Match either form.
5452        let first_is_dash = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b)
5453            || (bytes.len() >= 1 && bytes[0] == b'-');
5454        if first_is_dash {
5455            // c:1691-1694 — `if (tokstr[1] == 'T' && !tokstr[2]) { ++do_tracing; zshlex(); }`
5456            // After the leading dash byte(s), check remaining bytes.
5457            let after_dash = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b {
5458                &bytes[2..]
5459            } else {
5460                &bytes[1..]
5461            };
5462            if after_dash.len() == 1 && after_dash[0] == b'T' {
5463                do_tracing += 1;
5464                zshlex();
5465            }
5466            // c:1695-1698 — `if (tok == STRING && tokstr[0] == Dash &&
5467            //                  tokstr[1] == Dash && !tokstr[2]) zshlex();`
5468            if tok() == STRING_LEX {
5469                let s2 = tokstr().unwrap_or_default();
5470                let b2 = s2.as_bytes();
5471                let mut idx = 0;
5472                let mut dashes = 0;
5473                while idx < b2.len() && dashes < 2 {
5474                    if b2[idx] == 0xc2 && idx + 1 < b2.len() && b2[idx + 1] == 0x9b {
5475                        idx += 2;
5476                        dashes += 1;
5477                    } else if b2[idx] == b'-' {
5478                        idx += 1;
5479                        dashes += 1;
5480                    } else {
5481                        break;
5482                    }
5483                }
5484                if dashes == 2 && idx == b2.len() {
5485                    zshlex();
5486                }
5487            }
5488        }
5489    }
5490
5491    // c:1701-1709 — names loop.
5492    // `while (tok == STRING) { if ((*tokstr == Inbrace || *tokstr == '{')
5493    //   && !tokstr[1]) { tok = INBRACE; break; } ecstr(tokstr); num++; zshlex(); }`
5494    while tok() == STRING_LEX {
5495        let s = tokstr().unwrap_or_default();
5496        let bytes = s.as_bytes();
5497        // First byte tests for Inbrace marker (0x8f → UTF-8 `0xc2 0x8f`) or `{`,
5498        // and length-1 check (`!tokstr[1]`).
5499        let is_inbrace_only = (bytes.len() == 1 && bytes[0] == b'{')
5500            || (bytes.len() == 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f);
5501        if is_inbrace_only {
5502            set_tok(INBRACE_TOK);
5503            break;
5504        }
5505        ecstr(&s);
5506        num += 1;
5507        zshlex();
5508    }
5509
5510    // c:1711-1714 — four metadata placeholder slots.
5511    let m2 = ecadd(0);
5512    let m3 = ecadd(0);
5513    let m4 = ecadd(0);
5514    let m5 = ecadd(0);
5515
5516    // c:1716 — `nocorrect = 0;`
5517    set_nocorrect(0);
5518    // c:1717 — `incmdpos = 1;`
5519    set_incmdpos(true);
5520    // c:1718-1719 — `if (tok == INOUTPAR) zshlex();`
5521    if tok() == INOUTPAR {
5522        zshlex();
5523    }
5524    // c:1720-1721 — `while (tok == SEPER) zshlex();`
5525    while tok() == SEPER {
5526        zshlex();
5527    }
5528
5529    // c:1723 — `ecnfunc++;`
5530    ECNFUNC.set(ECNFUNC.get() + 1);
5531    // c:1724 — `ecssub = so = ecsoffs;`
5532    so = ECSOFFS.get();
5533    ECSSUB.set(so);
5534    // c:1725 — `onp = ecnpats;`
5535    onp = ECNPATS.with(|cc| cc.get());
5536    // c:1726 — `ecnpats = 0;`
5537    ECNPATS.with(|cc| cc.set(0));
5538
5539    // c:1728 — `if (tok == INBRACE) {`
5540    if tok() == INBRACE_TOK {
5541        // c:1729 — `zshlex();`
5542        zshlex();
5543        // c:1730 — `par_list(&c);`
5544        par_list_wordcode(&mut c);
5545        // c:1731-1736 — `if (tok != OUTBRACE) { lineno += oldlineno; ... }`
5546        if tok() != OUTBRACE_TOK {
5547            set_lineno(lineno() + oldlineno);
5548            ECNPATS.with(|cc| cc.set(onp));
5549            ECSSUB.set(oecssub);
5550            crate::ported::utils::zerr("par_funcdef: expected `}`");
5551            return;
5552        }
5553        // c:1737-1740 — `if (num == 0) { incmdpos = 0; }`
5554        if num == 0 {
5555            set_incmdpos(false);
5556        }
5557        // c:1741 — `zshlex();`
5558        zshlex();
5559    } else if unset(SHORTLOOPS) {
5560        // c:1742-1746 — `lineno += oldlineno; ecnpats = onp; ecssub = oecssub; YYERRORV`
5561        set_lineno(lineno() + oldlineno);
5562        ECNPATS.with(|cc| cc.set(onp));
5563        ECSSUB.set(oecssub);
5564        crate::ported::utils::zerr("par_funcdef: short body requires SHORTLOOPS");
5565        return;
5566    } else {
5567        // c:1748 — `par_list1(&c);`
5568        par_list1_wordcode(&mut c);
5569    }
5570
5571    // c:1750 — `ecadd(WCB_END());`
5572    ecadd(WCB_END());
5573    // c:1751-1754 — fill the 4 metadata slots
5574    let cur_sofs = ECSOFFS.get();
5575    let body_npats = ECNPATS.with(|cc| cc.get());
5576    ECBUF.with_borrow_mut(|b| {
5577        b[m2] = (so - oecssub) as wordcode;
5578        b[m3] = (cur_sofs - so) as wordcode;
5579        b[m4] = body_npats as wordcode;
5580        b[m5] = do_tracing as wordcode;
5581    });
5582    // c:1755 — `ecbuf[p + 1] = num;`
5583    ECBUF.with_borrow_mut(|b| {
5584        b[p1] = num as wordcode;
5585    });
5586
5587    // c:1757 — `ecnpats = onp;`
5588    ECNPATS.with(|cc| cc.set(onp));
5589    // c:1758 — `ecssub = oecssub;`
5590    ECSSUB.set(oecssub);
5591    // c:1759 — `ecnfunc++;`
5592    ECNFUNC.set(ECNFUNC.get() + 1);
5593
5594    // c:1761 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
5595    let used = ECUSED.get() as usize;
5596    ECBUF.with_borrow_mut(|b| {
5597        b[p] = WCB_FUNCDEF((used.saturating_sub(1 + p)) as wordcode);
5598    });
5599
5600    // c:1763-1777 — anonymous-function trailing args (num == 0 case).
5601    if num == 0 {
5602        // c:1766 — `int parg = ecadd(0);`
5603        let parg = ecadd(0);
5604        // c:1767 — `ecadd(0);`
5605        ecadd(0);
5606        // c:1768-1772 — `while (tok == STRING) { ecstr(tokstr); num++; zshlex(); }`
5607        while tok() == STRING_LEX {
5608            ecstr(&tokstr().unwrap_or_default());
5609            num += 1;
5610            zshlex();
5611        }
5612        // c:1773-1774 — `if (num > 0) *cmplx = 1;`
5613        if num > 0 {
5614            *cmplx = 1;
5615        }
5616        // c:1775 — `ecbuf[parg] = ecused - parg;`
5617        // c:1776 — `ecbuf[parg+1] = num;`
5618        let used2 = ECUSED.get() as usize;
5619        ECBUF.with_borrow_mut(|b| {
5620            b[parg] = (used2 - parg) as wordcode;
5621            b[parg + 1] = num as wordcode;
5622        });
5623    }
5624    // c:1778 — `lineno += oldlineno;`
5625    set_lineno(lineno() + oldlineno);
5626}
5627
5628/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
5629/// the header-walk macros below.
5630pub const FDHEAD_WORDS: usize = std::mem::size_of::<fdhead>() / 4;
5631
5632/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
5633/// `{...}` brace group (cursh) plus optional `always { ... }`
5634/// trailing block. C uses a single function with `zsh_construct=1`
5635/// for `{...}` and 0 for `(...)`.
5636pub fn par_subsh_wordcode_impl(cmplx: &mut i32, zsh_construct: i32) {
5637    // c:1621 — `enum lextok otok = tok;`
5638    let otok = tok();
5639    // c:1622 — `int oecused = ecused, p, pp;`
5640    let _oecused = ECUSED.get() as usize;
5641    let p: usize;
5642    let pp: usize;
5643
5644    // c:1624 — `p = ecadd(0);`
5645    p = ecadd(0);
5646    // c:1625 — `/* Extra word only needed for always block */`
5647    // c:1626 — `pp = ecadd(0);`
5648    pp = ecadd(0);
5649    // c:1627 — `zshlex();`
5650    zshlex();
5651    // c:1628 — `par_list(cmplx);`
5652    par_list_wordcode(cmplx);
5653    // c:1629 — `ecadd(WCB_END());`
5654    ecadd(WCB_END());
5655    // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
5656    // YYERRORV(oecused);`
5657    if tok() != (if otok == INPAR_TOK { OUTPAR_TOK } else { OUTBRACE_TOK }) {
5658        crate::ported::utils::zerr("par_subsh: missing closing token");
5659        return;
5660    }
5661    // c:1632 — `incmdpos = !zsh_construct;`
5662    set_incmdpos(zsh_construct == 0);
5663    // c:1633 — `zshlex();`
5664    zshlex();
5665
5666    // c:1635 — `/* Optional always block. No intervening SEPERs allowed. */`
5667    // c:1636 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always")) {`
5668    if otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always") {
5669        // c:1637 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
5670        let used = ECUSED.get() as usize;
5671        ECBUF.with_borrow_mut(|b| {
5672            b[pp] = WCB_TRY((used.saturating_sub(1 + pp)) as wordcode);
5673        });
5674        // c:1638 — `incmdpos = 1;`
5675        set_incmdpos(true);
5676        // c:1639-1641 — `do { zshlex(); } while (tok == SEPER);`
5677        loop {
5678            zshlex();
5679            if tok() != SEPER {
5680                break;
5681            }
5682        }
5683
5684        // c:1643-1644 — `if (tok != INBRACE) YYERRORV(oecused);`
5685        if tok() != INBRACE_TOK {
5686            crate::ported::utils::zerr("par_subsh: 'always' expects `{`");
5687            return;
5688        }
5689        // c:1645 — `cmdpop();`
5690        cmdpop();
5691        // c:1646 — `cmdpush(CS_ALWAYS);`
5692        cmdpush(CS_ALWAYS as u8);
5693
5694        // c:1648 — `zshlex();`
5695        zshlex();
5696        // c:1649 — `par_save_list(cmplx);`
5697        par_save_list_wordcode(cmplx);
5698        // c:1650-1651 — `while (tok == SEPER) zshlex();`
5699        while tok() == SEPER {
5700            zshlex();
5701        }
5702
5703        // c:1653 — `incmdpos = 1;`
5704        set_incmdpos(true);
5705
5706        // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5707        if tok() != OUTBRACE_TOK {
5708            crate::ported::utils::zerr("par_subsh: 'always' block missing `}`");
5709            return;
5710        }
5711        // c:1657 — `zshlex();`
5712        zshlex();
5713        // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
5714        let used = ECUSED.get() as usize;
5715        ECBUF.with_borrow_mut(|b| {
5716            b[p] = WCB_TRY((used.saturating_sub(1 + p)) as wordcode);
5717        });
5718    } else {
5719        // c:1660-1661 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) : WCB_CURSH(...));`
5720        let used = ECUSED.get() as usize;
5721        let off = used.saturating_sub(1 + p);
5722        ECBUF.with_borrow_mut(|b| {
5723            b[p] = if otok == INPAR_TOK {
5724                WCB_SUBSH(off as wordcode)
5725            } else {
5726                WCB_CURSH(off as wordcode)
5727            };
5728        });
5729    }
5730}
5731
5732/// Wrapper for `(...)` subshell — calls `par_subsh_wordcode_impl(0)`.
5733pub fn par_subsh_wordcode(cmplx: &mut i32) {
5734    par_subsh_wordcode_impl(cmplx, 0);
5735}
5736
5737/// Wrapper for `{...}` brace group (cursh) — calls
5738/// `par_subsh_wordcode_impl(1)`. C uses the same `par_subsh`
5739/// function with `zsh_construct=1`; the Rust split exists because
5740/// the par_cmd dispatch at parse.rs:1446 already named them
5741/// separately.
5742pub fn par_cursh_wordcode(cmplx: &mut i32) {
5743    par_subsh_wordcode_impl(cmplx, 1);
5744}
5745
5746/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
5747/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
5748/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
5749pub fn par_time_wordcode() {
5750    // c:1791 — `zshlex();`
5751    zshlex();
5752    // c:1793-1794 — `p = ecadd(0); ecadd(0);`
5753    let p = ecadd(0);
5754    ecadd(0);
5755    // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
5756    let mut c = 0i32;
5757    let f = par_sublist2(&mut c);
5758    match f {
5759        Some(flags) => {
5760            // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
5761            ECBUF.with_borrow_mut(|b| {
5762                if p < b.len() {
5763                    b[p] = WCB_TIMED(WC_TIMED_PIPE);
5764                }
5765            });
5766            // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
5767            // ecused-2-p, c);`
5768            let used = ECUSED.get() as usize;
5769            let skip = used.saturating_sub(2 + p) as i32;
5770            set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
5771        }
5772        None => {
5773            // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
5774            ECUSED.set((ECUSED.get() - 1).max(0));
5775            ECBUF.with_borrow_mut(|b| {
5776                if p < b.len() {
5777                    b[p] = WCB_TIMED(WC_TIMED_EMPTY);
5778                }
5779            });
5780        }
5781    }
5782}
5783
5784/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
5785/// `par_cond` (the cond-expression emitter at parse.c:2409) with
5786/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
5787/// expectation.
5788pub fn par_cond_wordcode() {
5789    let oecused = ECUSED.get();
5790    // c:1814 — `incond = 1;`
5791    set_incond(1);
5792    // c:1815 — `incmdpos = 0;`
5793    set_incmdpos(false);
5794    // c:1816 — `zshlex();` past `[[`.
5795    zshlex();
5796    // c:1817 — `par_cond();` — call the no-skip cond-expression
5797    // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
5798    // par_cond_2 → par_cond_double/triple/multi). NOT the AST
5799    // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
5800    // that skips `[[` AND `]]` and returns a ZshCommand AST node
5801    // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
5802    // either — that's also AST-only, returning ZshCond. With
5803    // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
5804    // wordcode payload and parity dropped ~148 words on /etc/zshrc.
5805    let _ = par_cond_top();
5806    // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
5807    if tok() != DOUTBRACK {
5808        let _ = oecused;
5809        crate::ported::utils::zerr("missing ]]");
5810        return;
5811    }
5812    // c:1820 — `incond = 0;`
5813    set_incond(0);
5814    // c:1821 — `incmdpos = 1;`
5815    set_incmdpos(true);
5816    // c:1822 — `zshlex();` past `]]`.
5817    zshlex();
5818}
5819
5820/// Port of the `case DINPAR:` arm of `par_cmd` from
5821/// `Src/parse.c:1031-1034`:
5822/// ```c
5823/// ecadd(WCB_ARITH());
5824/// ecstr(tokstr);
5825/// zshlex();
5826/// ```
5827/// `(( EXPR ))` arithmetic at command position — emits the ARITH
5828/// opcode followed by the interned EXPR string, then advances past
5829/// the DINPAR token (which already carries the body text).
5830pub fn par_arith_wordcode() {
5831    // c:1032 — `ecadd(WCB_ARITH());`
5832    ecadd(WCB_ARITH());
5833    // c:1033 — `ecstr(tokstr);` — interns the expression string and
5834    // appends its strcode index to the wordcode buffer.
5835    let expr = tokstr().unwrap_or_default();
5836    ecstr(&expr);
5837    // c:1034 — `zshlex();`
5838    zshlex();
5839}
5840
5841/// Port of `par_simple(int *cmplx, int nr)` from
5842/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
5843/// interned string offsets. Returns `0` when nothing was emitted,
5844/// otherwise `1 + (number of code words consumed by redirections)`.
5845/// The full C body handles assignments (ENVSTRING/ENVARRAY),
5846/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
5847/// and `name() { body }` funcdef detection — those paths are
5848/// progressively wired into the AST parser; this wordcode-emitter
5849/// covers the simple `cmd args...` case + interleaved redirs.
5850pub fn par_simple_wordcode_impl(cmplx: &mut i32, mut nr: i32) -> i32 {
5851    // c:1838-1841 — `int oecused = ecused, isnull = 1, r, argc = 0,
5852    //   p, isfunc = 0, sr = 0;`
5853    //   `int c = *cmplx, nrediradd, assignments = 0, ppost = 0,
5854    //   is_typeset = 0;`
5855    // c is the SAVED initial cmplx so INOUTPAR can restore via
5856    // `*cmplx = c;` at c:2070.
5857    let _oecused = ECUSED.get() as usize;
5858    let c_saved = *cmplx;
5859    let mut isnull = true;
5860    let mut argc: u32 = 0;
5861    let mut sr: i32 = 0;
5862    let mut assignments = false;
5863    let mut isfunc = false;
5864
5865    // c:1843 — `r = ecused;` — saves the offset where redirs get
5866    // INSERTED (via ecispace). Each redir shifts later words DOWN
5867    // by ncodes, so the SIMPLE placeholder at `p` (set later) must
5868    // also bump by ncodes when a redir lands. C uses `&r` to pass
5869    // the cursor by reference; Rust uses a mutable local + manual
5870    // bumps after each par_redir_wordcode call.
5871    let mut r: usize = ECUSED.get() as usize;
5872
5873    // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
5874    // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
5875    // a non-assignment token is seen.
5876    loop {
5877        match tok() {
5878            NOCORRECT => {
5879                // c:1846-1849
5880                *cmplx = 1;
5881                set_nocorrect(1);
5882            }
5883            ENVSTRING => {
5884                // c:1848-1898 — scalar assignment `name=value` or
5885                // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
5886                // followed by ecstr(name), ecstr(value).
5887                let raw = tokstr().unwrap_or_default();
5888                // Find first of Inbrack / '=' / '+' (the C scan at
5889                // c:1851-1853). Inside Inbrack we skipparens — i.e.
5890                // skip `name[...]` index, then continue.
5891                let bytes: Vec<char> = raw.chars().collect();
5892                let mut idx = 0usize;
5893                while idx < bytes.len() {
5894                    let ch = bytes[idx];
5895                    if ch == '\u{91}' /* Inbrack */ {
5896                        // Skip matched Inbrack…Outbrack pair.
5897                        let mut depth = 1;
5898                        idx += 1;
5899                        while idx < bytes.len() && depth > 0 {
5900                            match bytes[idx] {
5901                                '\u{91}' => depth += 1,
5902                                '\u{92}' => depth -= 1,
5903                                _ => {}
5904                            }
5905                            idx += 1;
5906                        }
5907                        continue;
5908                    }
5909                    // c:1851-1853 — `*ptr != '=' && *ptr != '+'` —
5910                    // C scan stops on either literal `=` / `+` OR the
5911                    // Equals marker (`\u{8d}`) the lexer emits for
5912                    // unquoted `=`. Without the marker check, the
5913                    // ENVSTRING split scans past the `=` (since it's
5914                    // already tokenised) and the whole `name=value`
5915                    // ends up in one ecstr.
5916                    if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
5917                        break;
5918                    }
5919                    idx += 1;
5920                }
5921                let is_inc = idx < bytes.len() && bytes[idx] == '+';
5922                // c:1856-1858 — `if (*ptr == '+') { *ptr++ = '\0';
5923                // ecadd(WCB_ASSIGN(SCALAR, INC, 0)); } else WCB_NEW`
5924                // C nulls the `+` AT THAT POSITION then advances ptr.
5925                // `name` is bytes BEFORE the `+`, NOT including it.
5926                let name_end = idx;
5927                if is_inc {
5928                    idx += 1;
5929                }
5930                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
5931                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
5932                // c:1860 — `if (*ptr == '=') { *ptr = '\0'; str = ptr + 1; }
5933                //          else equalsplit(tokstr, &str);`
5934                let name: String = bytes[..name_end].iter().collect();
5935                let str_off = if idx < bytes.len()
5936                    && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
5937                {
5938                    idx + 1
5939                } else {
5940                    idx
5941                };
5942                let value: String = bytes[str_off..].iter().collect();
5943                // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
5944                // subst); if found, bump cmplx (suppresses Z_SIMPLE).
5945                let vbytes: Vec<char> = value.chars().collect();
5946                for (i, ch) in vbytes.iter().enumerate() {
5947                    if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}' /* Inpar */ {
5948                        if *ch == '\u{8d}' /* Equals */
5949                            || *ch == '\u{94}' /* Inang */
5950                            || *ch == '\u{96}' /* OutangProc */
5951                        {
5952                            *cmplx = 1;
5953                            break;
5954                        }
5955                    }
5956                }
5957                ecstr(&name);
5958                ecstr(&value);
5959                isnull = false;
5960                assignments = true;
5961            }
5962            ENVARRAY => {
5963                // c:1883-1908 — array assignment `name=( ... )` in the
5964                // pre-cmd loop (no `typeset`-style typeset_force flag).
5965                // c:1884 — `int oldcmdpos = incmdpos, n, type2;`
5966                let oldcmdpos = incmdpos();
5967                let n: u32;
5968                let type2: wordcode;
5969                let p: usize;
5970
5971                // c:1886-1889 — `array setting is cmplx because it can
5972                //   contain process substitutions`
5973                // c:1890 — `*cmplx = c = 1;`
5974                *cmplx = 1;
5975                // c:1891 — `p = ecadd(0);`
5976                p = ecadd(0);
5977                // c:1892 — `incmdpos = 0;`
5978                set_incmdpos(false);
5979                // c:1893-1897 — `+=` detection: if tokstr ends in `+`,
5980                // strip the `+` and use WC_ASSIGN_INC; else WC_ASSIGN_NEW.
5981                let raw = tokstr().unwrap_or_default();
5982                let (name, t2) = if raw.ends_with('+') {
5983                    (raw[..raw.len() - 1].to_string(), WC_ASSIGN_INC)
5984                } else {
5985                    (raw.clone(), WC_ASSIGN_NEW)
5986                };
5987                type2 = t2;
5988                // c:1898 — `ecstr(tokstr);` (tokstr now NUL-trimmed)
5989                ecstr(&name);
5990                // c:1899 — `cmdpush(CS_ARRAY);`
5991                cmdpush(CS_ARRAY as u8);
5992                // c:1900 — `zshlex();`
5993                zshlex();
5994                // c:1901 — `n = par_nl_wordlist();`
5995                n = par_nl_wordlist_wordcode();
5996                // c:1902 — `ecbuf[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);`
5997                ECBUF.with_borrow_mut(|b| {
5998                    b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);
5999                });
6000                // c:1903 — `cmdpop();`
6001                cmdpop();
6002                // c:1904-1905 — `if (tok != OUTPAR) YYERROR(oecused);`
6003                if tok() != OUTPAR_TOK {
6004                    crate::ported::utils::zerr("par_simple: expected `)' after array assignment");
6005                    return 0;
6006                }
6007                // c:1906 — `incmdpos = oldcmdpos;`
6008                set_incmdpos(oldcmdpos);
6009                // c:1907 — `isnull = 0;`
6010                isnull = false;
6011                // c:1908 — `assignments = 1;`
6012                assignments = true;
6013            }
6014            t if IS_REDIROP(t) => {
6015                // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
6016                // NULL); continue;`. The wordcode-emitting redir is
6017                // distinct from the AST par_redir — it INSERTS
6018                // WCB_REDIR + fd + ecstrcode(name) at offset `r`
6019                // via ecispace, shifting any later words down.
6020                *cmplx = 1;
6021                let added = par_redir_wordcode(&mut r);
6022                if added == 0 {
6023                    break;
6024                }
6025                nr += added;
6026                continue;
6027            }
6028            _ => break,
6029        }
6030        zshlex(); // c:1907 `zshlex();`
6031    }
6032
6033    // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
6034    if tok() == AMPER || tok() == AMPERBANG {
6035        crate::ported::utils::zerr("par_simple: unexpected &");
6036        return 0;
6037    }
6038
6039    // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
6040    let mut p = ecadd(WCB_SIMPLE(0));
6041
6042    // c:1924-2105 — main words loop. is_typeset tracks whether the
6043    // outer command was `typeset`/`export`/etc. so the final
6044    // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
6045    let mut is_typeset = false;
6046    let mut postassigns: u32 = 0;
6047    let mut ppost: usize = 0;
6048    loop {
6049        match tok() {
6050            STRING_LEX | TYPESET => {
6051                // c:1926 — `int redir_var = 0;`
6052                let mut redir_var = false;
6053                // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
6054                *cmplx = 1;
6055                set_incmdpos(false);
6056                // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
6057                if tok() == TYPESET {
6058                    set_intypeset(true);
6059                    is_typeset = true;
6060                }
6061                let s = tokstr().unwrap_or_default();
6062                // c:1934-1974 — `{var}>file` brace-FD detection.
6063                // `if (!isset(IGNOREBRACES) && *tokstr == Inbrace)`
6064                let bytes = s.as_bytes();
6065                let first_is_inbrace = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f)
6066                    || (bytes.len() >= 1 && bytes[0] == b'{');
6067                if !isset(IGNOREBRACES) && first_is_inbrace {
6068                    // c:1937-1938 — `char *eptr = tokstr + strlen(tokstr) - 1;`
6069                    //                `char *ptr = eptr;`
6070                    // C tests `*eptr == Outbrace` (0x90 marker or `}`) AND
6071                    // there's content between `{` and `}` (`ptr > tokstr + 1`).
6072                    let last_two_outbrace = bytes.len() >= 2
6073                        && (bytes.ends_with(&[0xc2, 0x90]) || bytes.last() == Some(&b'}'));
6074                    let opener_len = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f {
6075                        2
6076                    } else {
6077                        1
6078                    };
6079                    let closer_len = if bytes.len() >= 2 && bytes.ends_with(&[0xc2, 0x90]) {
6080                        2
6081                    } else if bytes.last() == Some(&b'}') {
6082                        1
6083                    } else {
6084                        0
6085                    };
6086                    if last_two_outbrace && bytes.len() > opener_len + closer_len {
6087                        // c:1944 — `if (itype_end(tokstr+1, IIDENT, 0) >= ptr)`
6088                        // Inner content is the identifier between `{` and `}`.
6089                        let inner_start = opener_len;
6090                        let inner_end = bytes.len() - closer_len;
6091                        let inner = &s[inner_start..inner_end];
6092                        if !inner.is_empty()
6093                            && crate::ported::utils::isident(inner)
6094                        {
6095                            // c:1946-1948 — `char *idstring = dupstrpfx(...);`
6096                            //                `redir_var = 1; zshlex();`
6097                            let idstring = inner.to_string();
6098                            redir_var = true;
6099                            zshlex();
6100                            // c:1953-1958 — `if (IS_REDIROP(tok) && tokfd == -1)
6101                            //   { *cmplx = c = 1; nrediradd = par_redir(&r, id);
6102                            //     p += nrediradd; sr += nrediradd; }`
6103                            if IS_REDIROP(tok()) && tokfd() == -1 {
6104                                *cmplx = 1;
6105                                let nrediradd = par_redir_wordcode_inner(&mut r, Some(&idstring));
6106                                p += nrediradd as usize;
6107                                sr += nrediradd;
6108                            } else if postassigns > 0 {
6109                                // c:1959-1966 — postassigns path: emit
6110                                // WCB_ASSIGN(SCALAR, INC, 0) + name + ""
6111                                postassigns += 1;
6112                                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6113                                ecstr(&s);
6114                                ecstr("");
6115                            } else {
6116                                // c:1968-1972 — `else { ecstr(toksave); argc++; }`
6117                                ecstr(&s);
6118                                argc += 1;
6119                            }
6120                        }
6121                    }
6122                }
6123                if !redir_var {
6124                    // c:1977-1996 — normal (non-redir-var) STRING/TYPESET.
6125                    if postassigns > 0 {
6126                        // c:1979-1989 — typeset with bare-name arg → INC
6127                        postassigns += 1;
6128                        ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6129                        ecstr(&s);
6130                        ecstr("");
6131                    } else {
6132                        ecstr(&s);
6133                        argc += 1;
6134                    }
6135                    zshlex();
6136                }
6137                isnull = false;
6138            }
6139            ENVSTRING => {
6140                // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
6141                // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
6142                // ecstr(name) + ecstr(value), tracking the first
6143                // postassign offset in `ppost` (which the trailing
6144                // WCB_TYPESET header points to).
6145                if postassigns == 0 {
6146                    ppost = ecadd(0);
6147                }
6148                postassigns += 1;
6149                let raw = tokstr().unwrap_or_default();
6150                let bytes: Vec<char> = raw.chars().collect();
6151                let mut idx = 0usize;
6152                while idx < bytes.len() {
6153                    let ch = bytes[idx];
6154                    if ch == '\u{91}' /* Inbrack */ {
6155                        let mut depth = 1;
6156                        idx += 1;
6157                        while idx < bytes.len() && depth > 0 {
6158                            match bytes[idx] {
6159                                '\u{91}' => depth += 1,
6160                                '\u{92}' => depth -= 1,
6161                                _ => {}
6162                            }
6163                            idx += 1;
6164                        }
6165                        continue;
6166                    }
6167                    if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
6168                        break;
6169                    }
6170                    idx += 1;
6171                }
6172                let name: String = bytes[..idx].iter().collect();
6173                let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}') {
6174                    idx + 1
6175                } else {
6176                    idx
6177                };
6178                let value: String = bytes[str_off..].iter().collect();
6179                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
6180                ecstr(&name);
6181                ecstr(&value);
6182                isnull = false;
6183                zshlex();
6184            }
6185            ENVARRAY => {
6186                // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
6187                // C tracks postassigns + ppost the same as ENVSTRING,
6188                // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
6189                // with `n` patched in after par_nl_wordlist consumes
6190                // the elements. C also toggles intypeset=0 around the
6191                // wordlist so the lexer doesn't try to re-emit
6192                // assignments inside the array.
6193                *cmplx = 1;
6194                if postassigns == 0 {
6195                    ppost = ecadd(0);
6196                }
6197                postassigns += 1;
6198                let parr = ecadd(0);
6199                let raw = tokstr().unwrap_or_default();
6200                let is_inc = raw.ends_with('+');
6201                let name = if is_inc { &raw[..raw.len() - 1] } else { raw.as_str() };
6202                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6203                ecstr(name);
6204                cmdpush(CS_ARRAY as u8);
6205                set_intypeset(false);
6206                zshlex();
6207                // c:2044 — `n = par_nl_wordlist();` (parse.c:2379-2391).
6208                // SEPER + NEWLIN both allowed between elements.
6209                let mut nelem = 0u32;
6210                loop {
6211                    let t = tok();
6212                    if t != STRING_LEX && t != SEPER && t != NEWLIN {
6213                        break;
6214                    }
6215                    if t == STRING_LEX {
6216                        ecstr(&tokstr().unwrap_or_default());
6217                        nelem += 1;
6218                    }
6219                    zshlex();
6220                }
6221                ECBUF.with_borrow_mut(|b| {
6222                    if parr < b.len() {
6223                        b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
6224                    }
6225                });
6226                cmdpop();
6227                set_intypeset(true);
6228                if tok() != OUTPAR_TOK {
6229                    crate::ported::utils::zerr("expected `)' after array assignment");
6230                    return 0;
6231                }
6232                isnull = false;
6233                zshlex();
6234            }
6235            t if IS_REDIROP(t) => {
6236                // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
6237                // p += nrediradd; if (ppost) ppost += nrediradd;
6238                // sr += nrediradd;`
6239                *cmplx = 1;
6240                let added = par_redir_wordcode(&mut r);
6241                if added == 0 {
6242                    break;
6243                }
6244                p += added as usize;
6245                if ppost != 0 {
6246                    ppost += added as usize;
6247                }
6248                sr += added;
6249            }
6250            INOUTPAR => {
6251                // c:2051 — `} else if (tok == INOUTPAR) {`
6252                // c:2052 — `zlong oldlineno = lineno;`
6253                let oldlineno = lineno();
6254                // c:2053 — `int onp, so, oecssub = ecssub;`
6255                let oecssub = ECSSUB.get();
6256                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1) YYERROR;`
6257                if !isset(MULTIFUNCDEF) && argc > 1 {
6258                    crate::ported::utils::zerr("par_simple: too many function names for funcdef");
6259                    return 0;
6260                }
6261                // c:2058-2060 — `if (assignments || postassigns) YYERROR;`
6262                if assignments || postassigns > 0 {
6263                    crate::ported::utils::zerr("par_simple: assignments before funcdef");
6264                    return 0;
6265                }
6266                // c:2061-2068 — hasalias check + zwarn — skipped (no
6267                // alias tracking on the wordcode path).
6268
6269                // c:2070 — `*cmplx = c;`
6270                *cmplx = c_saved;
6271                // c:2071 — `lineno = 0;`
6272                set_lineno(0);
6273                // c:2072 — `incmdpos = 1;`
6274                set_incmdpos(true);
6275                // c:2073 — `cmdpush(CS_FUNCDEF);`
6276                cmdpush(CS_FUNCDEF as u8);
6277                // c:2074 — `zshlex();`
6278                zshlex();
6279                // c:2075-2076 — `while (tok == SEPER) zshlex();`
6280                while tok() == SEPER {
6281                    zshlex();
6282                }
6283                // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
6284                // ecadd(0)*4`. Insert the argc word at p+1, then
6285                // append 4 placeholder words.
6286                ecispace(p + 1, 1);
6287                ECBUF.with_borrow_mut(|b| {
6288                    if p + 1 < b.len() {
6289                        b[p + 1] = argc;
6290                    }
6291                });
6292                // c:2080-2083 — four metadata placeholder slots.
6293                ecadd(0);
6294                ecadd(0);
6295                ecadd(0);
6296                ecadd(0);
6297
6298                // c:2085 — `ecnfunc++;`
6299                ECNFUNC.set(ECNFUNC.get() + 1);
6300                // c:2086 — `ecssub = so = ecsoffs;`
6301                let so = ECSOFFS.get();
6302                ECSSUB.set(so);
6303                // c:2087 — `onp = ecnpats;`
6304                let onp = ECNPATS.with(|cc| cc.get());
6305                // c:2088 — `ecnpats = 0;`
6306                ECNPATS.with(|cc| cc.set(0));
6307
6308                // c:2091 — `int c = 0;` — INNER cmplx for the body
6309                // parse. Local to each branch; C's enclosing *cmplx
6310                // is NOT modified by the body.
6311                let mut body_c: i32 = 0;
6312                // c:2090 — `if (tok == INBRACE) {`
6313                if tok() == INBRACE_TOK {
6314                    // c:2093 — `zshlex();`
6315                    zshlex();
6316                    // c:2094 — `par_list(&c);`
6317                    par_list_wordcode(&mut body_c);
6318                    // c:2095-2101 — `if (tok != OUTBRACE) { cmdpop();
6319                    //   lineno += oldlineno; ecnpats = onp;
6320                    //   ecssub = oecssub; YYERROR; }`
6321                    if tok() != OUTBRACE_TOK {
6322                        cmdpop();
6323                        set_lineno(lineno() + oldlineno);
6324                        ECNPATS.with(|cc| cc.set(onp));
6325                        ECSSUB.set(oecssub);
6326                        crate::ported::utils::zerr("par_simple: funcdef expected `}`");
6327                        return 0;
6328                    }
6329                    // c:2102-2105 — `if (argc == 0) incmdpos = 0;`
6330                    if argc == 0 {
6331                        set_incmdpos(false);
6332                    }
6333                    // c:2106 — `zshlex();`
6334                    zshlex();
6335                } else {
6336                    // c:2107-2132 — short-body funcdef form: `f() cmd`
6337                    // or `() cmd`. Wraps single par_cmd result in a
6338                    // synthetic WC_LIST / WC_SUBLIST /
6339                    // WC_PIPE(WC_PIPE_END, 0) header trio.
6340                    let ll = ecadd(0);
6341                    let sl = ecadd(0);
6342                    ecadd(WCB_PIPE(WC_PIPE_END, 0));
6343                    let ok = par_cmd_wordcode(&mut body_c, if argc == 0 { 1 } else { 0 });
6344                    if !ok {
6345                        cmdpop();
6346                        crate::ported::utils::zerr("par_simple: funcdef short-body: missing command");
6347                        return 0;
6348                    }
6349                    if argc == 0 {
6350                        // c:2118-2127 — anonymous funcdef may take args
6351                        // after the body; first one already read.
6352                        set_incmdpos(false);
6353                    }
6354                    // c:2130-2131 — inner sublist/list use inner cmplx.
6355                    let used = ECUSED.get() as usize;
6356                    set_sublist_code(
6357                        sl,
6358                        WC_SUBLIST_END as i32,
6359                        0,
6360                        (used.saturating_sub(1 + sl)) as i32,
6361                        body_c != 0,
6362                    );
6363                    set_list_code(ll, Z_SYNC | Z_END, body_c != 0);
6364                }
6365                let _ = body_c;
6366                // c:2133 — `cmdpop();`
6367                cmdpop();
6368
6369                // c:2135 — `ecadd(WCB_END());`
6370                ecadd(WCB_END());
6371                // c:2136-2139 — fill 4 metadata slots at p+argc+2..5
6372                let p_argc = (p + (argc as usize) + 2) as usize;
6373                let cur_so = ECSOFFS.get();
6374                let np_now = ECNPATS.with(|cc| cc.get());
6375                ECBUF.with_borrow_mut(|b| {
6376                    b[p_argc] = (so - oecssub) as wordcode;
6377                    b[p_argc + 1] = (cur_so - so) as wordcode;
6378                    b[p_argc + 2] = np_now as wordcode;
6379                    b[p_argc + 3] = 0;
6380                });
6381
6382                // c:2141-2143 — `ecnpats = onp; ecssub = oecssub; ecnfunc++;`
6383                ECNPATS.with(|cc| cc.set(onp));
6384                ECSSUB.set(oecssub);
6385                ECNFUNC.set(ECNFUNC.get() + 1);
6386
6387                // c:2145 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
6388                let used = ECUSED.get() as usize;
6389                let header_off = used.saturating_sub(1 + p) as wordcode;
6390                ECBUF.with_borrow_mut(|b| {
6391                    b[p] = WCB_FUNCDEF(header_off);
6392                });
6393
6394                // c:2147-2172 — `if (argc == 0) { /* anonymous fn args */ }`
6395                if argc == 0 {
6396                    // c:2150 — `int parg = ecadd(0);`
6397                    let mut parg = ecadd(0);
6398                    // c:2151 — `ecadd(0);`
6399                    ecadd(0);
6400                    // c:2152 — `while (tok == STRING || IS_REDIROP(tok)) {`
6401                    while tok() == STRING_LEX || IS_REDIROP(tok()) {
6402                        if tok() == STRING_LEX {
6403                            // c:2155-2157
6404                            ecstr(&tokstr().unwrap_or_default());
6405                            argc += 1;
6406                            zshlex();
6407                        } else {
6408                            // c:2159-2165 — *cmplx=c=1; nrediradd=par_redir;
6409                            // p += nrediradd; ppost += nrediradd if ppost;
6410                            // sr += nrediradd; parg += nrediradd;
6411                            *cmplx = 1;
6412                            let added = par_redir_wordcode(&mut r);
6413                            if added == 0 {
6414                                break;
6415                            }
6416                            p += added as usize;
6417                            if ppost != 0 {
6418                                ppost += added as usize;
6419                            }
6420                            sr += added;
6421                            parg += added as usize;
6422                        }
6423                    }
6424                    // c:2168-2169 — `if (argc > 0) *cmplx = 1;`
6425                    if argc > 0 {
6426                        *cmplx = 1;
6427                    }
6428                    // c:2170 — `ecbuf[parg] = ecused - parg;`
6429                    // c:2171 — `ecbuf[parg+1] = argc;`
6430                    let used2 = ECUSED.get() as usize;
6431                    ECBUF.with_borrow_mut(|b| {
6432                        b[parg] = (used2 - parg) as wordcode;
6433                        b[parg + 1] = argc;
6434                    });
6435                }
6436                // c:2173 — `lineno += oldlineno;`
6437                set_lineno(lineno() + oldlineno);
6438
6439                // c:2175-2177 — `isfunc = 1; isnull = 0; break;`
6440                isfunc = true;
6441                isnull = false;
6442                break;
6443            }
6444            _ => break,
6445        }
6446    }
6447
6448    // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
6449    // return 0; }` — undo everything including pre-cmd assignments
6450    // if no actual command word emerged.
6451    if isnull && sr + nr == 0 && !assignments {
6452        ECUSED.set(p as i32);
6453        return 0;
6454    }
6455    // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
6456    // the placeholder patch so the next-token lex doesn't carry
6457    // typeset/incond state.
6458    set_incmdpos(true);
6459    set_intypeset(false);
6460    // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
6461    // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
6462    // When isfunc=true the INOUTPAR branch already wrote WCB_FUNCDEF
6463    // at p; do NOT clobber it.
6464    if !isfunc {
6465        let header = if is_typeset {
6466            if postassigns > 0 {
6467                ECBUF.with_borrow_mut(|b| {
6468                    if ppost < b.len() {
6469                        b[ppost] = postassigns;
6470                    }
6471                });
6472            } else {
6473                ecadd(0);
6474            }
6475            WCB_TYPESET(argc)
6476        } else {
6477            WCB_SIMPLE(argc)
6478        };
6479        ECBUF.with_borrow_mut(|b| {
6480            if p < b.len() {
6481                b[p] = header;
6482            }
6483        });
6484    }
6485    1 + sr
6486}
6487
6488/// Wrapper for callers without a cmplx accumulator. Allocates a
6489/// local cmplx and ignores it — only used by legacy dispatch sites.
6490pub fn par_simple_wordcode() {
6491    let mut cmplx: i32 = 0;
6492    par_simple_wordcode_impl(&mut cmplx, 0);
6493}
6494
6495/// Port of `par_redir(int *rp, char *idstring)` from
6496/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
6497/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
6498/// from the AST `par_redir` (parse.rs:3771) which builds a
6499/// ZshRedir struct for the AST executor pipeline.
6500///
6501/// Returns the number of wordcodes added (3 for the basic shape,
6502/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
6503/// terminator strings inline). Returns 0 on parse error.
6504fn par_redir_wordcode(rp: &mut usize) -> i32 {
6505    par_redir_wordcode_inner(rp, None)
6506}
6507
6508/// par_redir variant taking the `idstring` parameter for the
6509/// `{var}>file` shape. C signature `par_redir(int *rp, char *idstring)`
6510/// passes NULL when there's no var-id. Rust uses Option<&str>.
6511fn par_redir_wordcode_inner(rp: &mut usize, idstring: Option<&str>) -> i32 {
6512    // c:2231 — `int r = *rp, type, fd1, oldcmdpos, oldnc, ncodes;`
6513    let r: usize = *rp;
6514    let mut r#type: i32;
6515    let fd1: i32;
6516    let oldcmdpos: bool;
6517    let oldnc: i32;
6518    let mut ncodes: usize;
6519    // c:2232 — `char *name;`
6520    let name: String;
6521
6522    // c:2234 — `oldcmdpos = incmdpos;`
6523    oldcmdpos = incmdpos();
6524    // c:2235 — `incmdpos = 0;`
6525    set_incmdpos(false);
6526    // c:2236 — `oldnc = nocorrect;`
6527    oldnc = nocorrect();
6528    // c:2237-2238 — `if (tok != INANG && tok != INOUTANG) nocorrect = 1;`
6529    if tok() != INANG_TOK && tok() != INOUTANG {
6530        set_nocorrect(1);
6531    }
6532    // c:2239 — `type = redirtab[tok - OUTANG];`
6533    // Map current redirop token to redirtab index — matches order of
6534    // C `enum { OUTANG, OUTANGBANG, DOUTANG, DOUTANGBANG, INANG,
6535    // INOUTANG, DINANG, DINANGDASH, INANGAMP, OUTANGAMP, AMPOUTANG,
6536    // OUTANGAMPBANG, DOUTANGAMP, DOUTANGAMPBANG, TRINANG }`.
6537    r#type = match tok() {
6538        OUTANG_TOK => REDIR_WRITE,
6539        OUTANGBANG => REDIR_WRITENOW,
6540        DOUTANG => REDIR_APP,
6541        DOUTANGBANG => REDIR_APPNOW,
6542        INANG_TOK => REDIR_READ,
6543        INOUTANG => REDIR_READWRITE,
6544        DINANG => REDIR_HEREDOC,
6545        DINANGDASH => REDIR_HEREDOCDASH,
6546        INANGAMP => REDIR_MERGEIN,
6547        OUTANGAMP => REDIR_MERGEOUT,
6548        AMPOUTANG => REDIR_ERRWRITE,
6549        OUTANGAMPBANG => REDIR_ERRWRITENOW,
6550        DOUTANGAMP => REDIR_ERRAPP,
6551        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
6552        TRINANG => REDIR_HERESTR,
6553        _ => {
6554            set_incmdpos(oldcmdpos);
6555            set_nocorrect(oldnc);
6556            return 0;
6557        }
6558    };
6559    // c:2240 — `fd1 = tokfd;`
6560    fd1 = tokfd();
6561    // c:2241 — `zshlex();`
6562    zshlex();
6563    // c:2242-2243 — `if (tok != STRING && tok != ENVSTRING) YYERROR(ecused);`
6564    if tok() != STRING_LEX && tok() != ENVSTRING {
6565        set_incmdpos(oldcmdpos);
6566        set_nocorrect(oldnc);
6567        crate::ported::utils::zerr("expected word after redirection");
6568        return 0;
6569    }
6570    // c:2244 — `incmdpos = oldcmdpos;`
6571    set_incmdpos(oldcmdpos);
6572    // c:2245 — `nocorrect = oldnc;`
6573    set_nocorrect(oldnc);
6574
6575    // c:2248-2249 — `if (fd1 == -1) fd1 = IS_READFD(type) ? 0 : 1;`
6576    let fd1 = if fd1 == -1 {
6577        if is_readfd(r#type) {
6578            0
6579        } else {
6580            1
6581        }
6582    } else {
6583        fd1
6584    };
6585
6586    // c:2251 — `name = tokstr;`
6587    name = tokstr().unwrap_or_default();
6588
6589    // c:2253-2321 — switch on type:
6590    match r#type {
6591        // c:2254-2300 — REDIR_HEREDOC / REDIR_HEREDOCDASH
6592        x if x == REDIR_HEREDOC || x == REDIR_HEREDOCDASH => {
6593            // c:2257 — `struct heredocs **hd;`
6594            // c:2258 — `int htype = type;`
6595            let htype = r#type;
6596            // c:2260-2261 — `if (strchr(tokstr, '\n')) YYERROR(ecused);`
6597            if name.contains('\n') {
6598                crate::ported::utils::zerr("here-doc terminator contains newline");
6599                return 0;
6600            }
6601            // c:2263-2273 — `ncodes = 5; if (idstring) { type |= MASK; ncodes = 6; }`
6602            if idstring.is_some() {
6603                r#type |= REDIR_VARID_MASK;
6604                ncodes = 6;
6605            } else {
6606                ncodes = 5;
6607            }
6608            // c:2277 — `ecispace(r, ncodes);`
6609            ecispace(r, ncodes);
6610            // c:2278 — `*rp = r + ncodes;`
6611            *rp = r + ncodes;
6612            // c:2279 — `ecbuf[r] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);`
6613            ECBUF.with_borrow_mut(|b| {
6614                b[r] = WCB_REDIR((r#type | REDIR_FROM_HEREDOC_MASK) as wordcode);
6615                // c:2280 — `ecbuf[r + 1] = fd1;`
6616                b[r + 1] = fd1 as wordcode;
6617            });
6618            // c:2282-2286 — r+2..4 are filled later by setheredoc.
6619            // c:2287-2288 — `if (idstring) ecbuf[r + 5] = ecstrcode(idstring);`
6620            if let Some(id) = idstring {
6621                let coded = ecstrcode(id);
6622                ECBUF.with_borrow_mut(|b| {
6623                    b[r + 5] = coded;
6624                });
6625            }
6626            // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
6627            //                 *hd = zalloc(sizeof(struct heredocs));
6628            //                 (*hd)->next = NULL;
6629            //                 (*hd)->type = htype;
6630            //                 (*hd)->pc = r;
6631            //                 (*hd)->str = tokstr;`
6632            HDOCS.with_borrow_mut(|head| {
6633                let mut cur = head;
6634                while cur.is_some() {
6635                    cur = &mut cur.as_mut().unwrap().next;                        // c:2290
6636                }
6637                *cur = Some(Box::new(crate::ported::zsh_h::heredocs {             // c:2292-2296
6638                    next: None,
6639                    typ: htype,
6640                    pc: r as i32,
6641                    str: Some(name.clone()),
6642                }));
6643            });
6644            // c:2298 — `zshlex();`
6645            zshlex();
6646            // c:2299 — `return ncodes;`
6647            return ncodes as i32;
6648        }
6649        // c:2301-2308 — REDIR_WRITE / REDIR_WRITENOW
6650        x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
6651            // c:2303-2305 — `if (tokstr[0] == OutangProc && tokstr[1] == Inpar)
6652            //                  type = REDIR_OUTPIPE;`
6653            let nb: Vec<char> = name.chars().collect();
6654            if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
6655                r#type = REDIR_OUTPIPE;
6656            } else if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
6657                // c:2306-2307 — `else if (tokstr[0] == Inang && tokstr[1] == Inpar) YYERROR;`
6658                crate::ported::utils::zerr("par_redir: < before >");
6659                return 0;
6660            }
6661        }
6662        // c:2309-2315 — REDIR_READ
6663        x if x == REDIR_READ => {
6664            let nb: Vec<char> = name.chars().collect();
6665            if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
6666                r#type = REDIR_INPIPE;
6667            } else if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
6668                crate::ported::utils::zerr("par_redir: > before <");
6669                return 0;
6670            }
6671        }
6672        // c:2316-2320 — REDIR_READWRITE
6673        x if x == REDIR_READWRITE => {
6674            let nb: Vec<char> = name.chars().collect();
6675            if nb.len() >= 2
6676                && (nb[0] == '\u{94}' || nb[0] == '\u{96}')
6677                && nb[1] == '\u{88}'
6678            {
6679                r#type = if nb[0] == '\u{94}' {
6680                    REDIR_INPIPE
6681                } else {
6682                    REDIR_OUTPIPE
6683                };
6684            }
6685        }
6686        _ => {}
6687    }
6688    // c:2322 — `zshlex();`
6689    zshlex();
6690
6691    // c:2326-2333 — `if (idstring) { type |= MASK; ncodes = 4; } else ncodes = 3;`
6692    if idstring.is_some() {
6693        r#type |= REDIR_VARID_MASK;
6694        ncodes = 4;
6695    } else {
6696        ncodes = 3;
6697    }
6698
6699    // c:2334 — `ecispace(r, ncodes);`
6700    ecispace(r, ncodes);
6701    // c:2335 — `*rp = r + ncodes;`
6702    *rp = r + ncodes;
6703    // c:2336 — `ecbuf[r] = WCB_REDIR(type);`
6704    let coded_name = ecstrcode(&name);
6705    ECBUF.with_borrow_mut(|b| {
6706        b[r] = WCB_REDIR(r#type as wordcode);
6707        // c:2337 — `ecbuf[r + 1] = fd1;`
6708        b[r + 1] = fd1 as wordcode;
6709        // c:2338 — `ecbuf[r + 2] = ecstrcode(name);`
6710        b[r + 2] = coded_name;
6711    });
6712    // c:2339-2340 — `if (idstring) ecbuf[r + 3] = ecstrcode(idstring);`
6713    if let Some(id) = idstring {
6714        let coded_id = ecstrcode(id);
6715        ECBUF.with_borrow_mut(|b| {
6716            b[r + 3] = coded_id;
6717        });
6718    }
6719    // c:2342 — `return ncodes;`
6720    ncodes as i32
6721}
6722
6723/// Port of `IS_READFD(type)` macro from `Src/zsh.h` — determines
6724/// default fd (0 for read-ish, 1 for write-ish) when none specified.
6725fn is_readfd(t: i32) -> bool {
6726    matches!(
6727        t,
6728        x if x == REDIR_READ
6729            || x == REDIR_READWRITE
6730            || x == REDIR_MERGEIN
6731            || x == REDIR_HEREDOC
6732            || x == REDIR_HEREDOCDASH
6733            || x == REDIR_HERESTR
6734    )
6735}
6736
6737/// Parse a program (list of lists)
6738/// Parse a complete program (top-level entry). Calls
6739/// parse_program_until with no end-token sentinel. Direct port of
6740/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
6741/// `par_event` flow. C distinguishes COND_EVENT (single command
6742/// for here-string) from full event parse; zshrs's parse_program
6743/// is the full-event entry.
6744fn parse_program() -> ZshProgram {
6745    parse_program_until(None)
6746}
6747
6748/// Parse a program until we hit an end token
6749/// Parse a program until one of `end_tokens` is seen (or EOF).
6750/// Drives par_list in a loop. C equivalent: the body of par_event
6751/// (parse.c:635-695) iterating par_list against the lexer.
6752fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
6753    let mut lists = Vec::new();
6754
6755    loop {
6756        // Skip separators
6757        while tok() == SEPER || tok() == NEWLIN {
6758            zshlex();
6759        }
6760
6761        if tok() == ENDINPUT || tok() == LEXERR {
6762            break;
6763        }
6764
6765        // Check for end tokens
6766        if let Some(end_toks) = end_tokens {
6767            if end_toks.contains(&tok()) {
6768                break;
6769            }
6770        }
6771
6772        // Also stop at these tokens when not explicitly looking for them
6773        // Note: Else/Elif/Then are NOT here - they're handled by par_if
6774        // to allow nested if statements inside case arms, loops, etc.
6775        match tok() {
6776            OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
6777            _ => {}
6778        }
6779
6780        match par_list() {
6781            Some(list) => {
6782                let detected = simple_name_with_inoutpar(&list);
6783                lists.push(list);
6784                // Synthesize a FuncDef for the `name() { body }` shape
6785                // at parse time so body_source is captured while the
6786                // lexer still has the input. The lexer port emits
6787                // `name(` as a single Word ending in `<Inpar><Outpar>`,
6788                // so the Simple list is followed by an Inbrace once
6789                // separators are skipped. For `name() cmd args` the
6790                // body has already been swallowed into the same
6791                // Simple's words tail — synthesize directly from there.
6792                if let Some((names, body_argv)) = detected {
6793                    if !body_argv.is_empty() {
6794                        // One-line body already in the Simple. Build
6795                        // a Simple from body_argv as the function body.
6796                        lists.pop();
6797                        let body_simple = ZshCommand::Simple(ZshSimple {
6798                            assigns: Vec::new(),
6799                            words: body_argv,
6800                            redirs: Vec::new(),
6801                        });
6802                        let body_list = ZshList {
6803                            sublist: ZshSublist {
6804                                pipe: ZshPipe {
6805                                    cmd: body_simple,
6806                                    next: None,
6807                                    lineno: lineno(),
6808                                    merge_stderr: false,
6809                                },
6810                                next: None,
6811                                flags: SublistFlags::default(),
6812                            },
6813                            flags: ListFlags::default(),
6814                        };
6815                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
6816                            names,
6817                            body: Box::new(ZshProgram {
6818                                lists: vec![body_list],
6819                            }),
6820                            tracing: false,
6821                            auto_call_args: None,
6822                            body_source: None,
6823                        });
6824                        let synthetic = ZshList {
6825                            sublist: ZshSublist {
6826                                pipe: ZshPipe {
6827                                    cmd: funcdef,
6828                                    next: None,
6829                                    lineno: lineno(),
6830                                    merge_stderr: false,
6831                                },
6832                                next: None,
6833                                flags: SublistFlags::default(),
6834                            },
6835                            flags: ListFlags::default(),
6836                        };
6837                        lists.push(synthetic);
6838                        continue;
6839                    }
6840                    // Else: words.len() == 1 (only the trailing `name()`
6841                    // word), brace body follows. `names` may carry
6842                    // multiple identifiers from the `fna fnb fnc()`
6843                    // shorthand — all share the same brace body per
6844                    // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
6845                    // Skip separators on the real lexer; safe because
6846                    // parse_program's next iteration would also skip them.
6847                    while tok() == SEPER || tok() == NEWLIN {
6848                        zshlex();
6849                    }
6850                    if tok() == INBRACE_TOK {
6851                        // Capture body_start BEFORE the lexer
6852                        // advances past the first body token. The
6853                        // outer zshlex() consumed `{`; lexer.pos
6854                        // is now right after `{`. The next
6855                        // `zshlex()` would advance past `echo`,
6856                        // making body_start land mid-body and
6857                        // lose the first word — `typeset -f f`
6858                        // printed `a; echo b` instead of
6859                        // `echo a; echo b` for `f() { echo a;
6860                        // echo b }`.
6861                        let body_start = pos();
6862                        zshlex();
6863                        let body = parse_program();
6864                        let body_end = if tok() == OUTBRACE_TOK {
6865                            pos().saturating_sub(1)
6866                        } else {
6867                            pos()
6868                        };
6869                        let body_source = input_slice(body_start, body_end)
6870                            .map(|s| s.trim().to_string())
6871                            .filter(|s| !s.is_empty());
6872                        if tok() == OUTBRACE_TOK {
6873                            zshlex();
6874                        }
6875                        // Replace the Simple list with a FuncDef list.
6876                        lists.pop();
6877                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
6878                            names,
6879                            body: Box::new(body),
6880                            tracing: false,
6881                            auto_call_args: None,
6882                            body_source,
6883                        });
6884                        let synthetic = ZshList {
6885                            sublist: ZshSublist {
6886                                pipe: ZshPipe {
6887                                    cmd: funcdef,
6888                                    next: None,
6889                                    lineno: lineno(),
6890                                    merge_stderr: false,
6891                                },
6892                                next: None,
6893                                flags: SublistFlags::default(),
6894                            },
6895                            flags: ListFlags::default(),
6896                        };
6897                        lists.push(synthetic);
6898                    } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
6899                        // No-brace one-line body: `foo() echo hello`.
6900                        // Parse a single command for the body.
6901                        let body_cmd = par_cmd();
6902                        if let Some(cmd) = body_cmd {
6903                            let body_list = ZshList {
6904                                sublist: ZshSublist {
6905                                    pipe: ZshPipe {
6906                                        cmd,
6907                                        next: None,
6908                                        lineno: lineno(),
6909                                        merge_stderr: false,
6910                                    },
6911                                    next: None,
6912                                    flags: SublistFlags::default(),
6913                                },
6914                                flags: ListFlags::default(),
6915                            };
6916                            lists.pop();
6917                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
6918                                names: names.clone(),
6919                                body: Box::new(ZshProgram {
6920                                    lists: vec![body_list],
6921                                }),
6922                                tracing: false,
6923                                auto_call_args: None,
6924                                body_source: None,
6925                            });
6926                            let synthetic = ZshList {
6927                                sublist: ZshSublist {
6928                                    pipe: ZshPipe {
6929                                        cmd: funcdef,
6930                                        next: None,
6931                                        lineno: lineno(),
6932                                        merge_stderr: false,
6933                                    },
6934                                    next: None,
6935                                    flags: SublistFlags::default(),
6936                                },
6937                                flags: ListFlags::default(),
6938                            };
6939                            lists.push(synthetic);
6940                        }
6941                    }
6942                }
6943            }
6944            None => break,
6945        }
6946    }
6947
6948    ZshProgram { lists }
6949}
6950
6951/// Parse an assignment
6952/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
6953/// Sub-routine of par_simple. The C source handles assignments
6954/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
6955/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
6956/// helper for clarity.
6957fn parse_assign() -> Option<ZshAssign> {
6958    // Helper: locate the Equals-marker that delimits NAME from
6959    // VALUE in an assignment-shaped tokstr. The lexer META-encodes
6960    // EVERY `=` (including those inside `${var%%=foo}` strip
6961    // patterns or `[idx]=...` subscripts), so a naive
6962    // `tokstr.find(Equals)` would split at the first inner `=`
6963    // and break the whole assignment. Walk the string skipping
6964    // brace and bracket depth so the assignment's `=` (the one
6965    // after the last `]` of the LHS subscript / or after the
6966    // bare name) is the one we land on.
6967    fn find_assign_equals(s: &str) -> Option<usize> {
6968        let target = crate::ported::zsh_h::Equals;
6969        let mut brace = 0i32;
6970        let mut bracket = 0i32;
6971        let mut paren = 0i32;
6972        for (i, c) in s.char_indices() {
6973            match c {
6974                    '{' | '\u{8f}' /* Inbrace */ => brace += 1,
6975                    '}' | '\u{90}' /* Outbrace */ => {
6976                        if brace > 0 {
6977                            brace -= 1;
6978                        }
6979                    }
6980                    '[' | '\u{91}' /* Inbrack */ => bracket += 1,
6981                    ']' | '\u{92}' /* Outbrack */ => {
6982                        if bracket > 0 {
6983                            bracket -= 1;
6984                        }
6985                    }
6986                    '(' | '\u{88}' /* Inpar */ => paren += 1,
6987                    ')' | '\u{8a}' /* Outpar */ => {
6988                        if paren > 0 {
6989                            paren -= 1;
6990                        }
6991                    }
6992                    _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
6993                        return Some(i);
6994                    }
6995                    _ => {}
6996                }
6997        }
6998        None
6999    }
7000
7001    let _ts_tokstr = tokstr()?;
7002    let tokstr = _ts_tokstr.as_str();
7003
7004    // Parse name=value or name+=value.
7005    let (name, value_str, append) = if tok() == ENVARRAY {
7006        let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
7007            (stripped, true)
7008        } else {
7009            (tokstr, false)
7010        };
7011        (name.to_string(), String::new(), append)
7012    } else if let Some(pos) = find_assign_equals(tokstr) {
7013        let name_part = &tokstr[..pos];
7014        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7015            (stripped, true)
7016        } else {
7017            (name_part, false)
7018        };
7019        (
7020            name.to_string(),
7021            tokstr[pos + Equals.len_utf8()..].to_string(),
7022            append,
7023        )
7024    } else if let Some(pos) = tokstr.find('=') {
7025        // Fallback to literal '=' for compatibility
7026        let name_part = &tokstr[..pos];
7027        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7028            (stripped, true)
7029        } else {
7030            (name_part, false)
7031        };
7032        (name.to_string(), tokstr[pos + 1..].to_string(), append)
7033    } else {
7034        return None;
7035    };
7036
7037    let value = if tok() == ENVARRAY {
7038        // Array assignment: name=(...)
7039        let mut elements = Vec::new();
7040        zshlex(); // skip past token
7041
7042        let mut arr_iters = 0;
7043        const MAX_ARRAY_ELEMENTS: usize = 10_000;
7044        while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
7045            arr_iters += 1;
7046            if arr_iters > MAX_ARRAY_ELEMENTS {
7047                crate::ported::utils::zerr("array assignment exceeded maximum elements");
7048                break;
7049            }
7050            if tok() == STRING_LEX {
7051                let _ts_s = crate::ported::lex::tokstr();
7052                if let Some(s) = _ts_s.as_deref() {
7053                    elements.push(s.to_string());
7054                }
7055            }
7056            zshlex();
7057        }
7058
7059        // The closing Outpar is consumed here. The outer par_simple
7060        // loop will then `zshlex()` past whatever follows (typically
7061        // a separator or the next word) — calling zshlex twice in
7062        // tandem (here AND in par_simple) over-advances and merges
7063        // a following `name() { … }` funcdef into the same Simple.
7064        // We only consume Outpar; let the caller handle the rest.
7065        // Without this guard `g=(o1); f() { :; }` parsed as one
7066        // Simple with assigns=[g] and words=["f()"] (one token).
7067        if tok() == OUTPAR_TOK {
7068            // Note: do NOT zshlex() here. par_simple's `lexer
7069            // .zshlex()` after `parse_assign` returns advances past
7070            // the Outpar onto the next significant token.
7071            //
7072            // Force `incmdpos=true` so the next zshlex() recognizes
7073            // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
7074            // The lexer flips incmdpos to false on bare Outpar (which
7075            // is correct for subshell-close context), but for an
7076            // array-assignment close more assigns/words may follow.
7077            set_incmdpos(true);
7078        }
7079
7080        ZshAssignValue::Array(elements)
7081    } else {
7082        ZshAssignValue::Scalar(value_str)
7083    };
7084
7085    Some(ZshAssign {
7086        name,
7087        value,
7088        append,
7089    })
7090}
7091
7092/// AST `par_redir` variant accepting an idstring for the
7093/// `{var}>file` brace-FD shape. C signature
7094/// `par_redir(int *rp, char *idstring)` (parse.c:2229). The
7095/// idstring is stored in the resulting ZshRedir.varid for the
7096/// executor to bind the named variable to the chosen fd.
7097fn par_redir_with_id(idstring: Option<&str>) -> Option<ZshRedir> {
7098    let varid: Option<String> = idstring.map(|s| s.to_string());
7099    let rtype = match tok() {
7100        OUTANG_TOK => REDIR_WRITE,
7101        OUTANGBANG => REDIR_WRITENOW,
7102        DOUTANG => REDIR_APP,
7103        DOUTANGBANG => REDIR_APPNOW,
7104        INANG_TOK => REDIR_READ,
7105        INOUTANG => REDIR_READWRITE,
7106        DINANG => REDIR_HEREDOC,
7107        DINANGDASH => REDIR_HEREDOCDASH,
7108        TRINANG => REDIR_HERESTR,
7109        INANGAMP => REDIR_MERGEIN,
7110        OUTANGAMP => REDIR_MERGEOUT,
7111        AMPOUTANG => REDIR_ERRWRITE,
7112        OUTANGAMPBANG => REDIR_ERRWRITENOW,
7113        DOUTANGAMP => REDIR_ERRAPP,
7114        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7115        _ => return None,
7116    };
7117
7118    let fd = if tokfd() >= 0 {
7119        tokfd()
7120    } else if matches!(
7121        rtype,
7122        REDIR_READ
7123            | REDIR_READWRITE
7124            | REDIR_MERGEIN
7125            | REDIR_HEREDOC
7126            | REDIR_HEREDOCDASH
7127            | REDIR_HERESTR
7128    ) {
7129        0
7130    } else {
7131        1
7132    };
7133
7134    // c:2234-2245 — save/restore incmdpos and nocorrect around the
7135    // zshlex that consumes the redir target word:
7136    //   oldcmdpos = incmdpos; incmdpos = 0;
7137    //   oldnc = nocorrect;
7138    //   if (tok != INANG && tok != INOUTANG) nocorrect = 1;
7139    //   ... zshlex; check tok; ...
7140    //   incmdpos = oldcmdpos; nocorrect = oldnc;
7141    // Without this, a redir target lexes in the parent's incmdpos
7142    // (re-promoting `{` / reswords) AND with parent nocorrect (so
7143    // spelling-correction wrongly runs inside `> $(cmd)` etc.).
7144    let oldcmdpos = incmdpos();
7145    set_incmdpos(false);
7146    let oldnc = nocorrect();
7147    let cur = tok();
7148    if cur != INANG_TOK && cur != INOUTANG {
7149        set_nocorrect(1);
7150    }
7151    zshlex();
7152
7153    let name = match tok() {
7154        STRING_LEX | ENVSTRING => {
7155            let n = tokstr().unwrap_or_default();
7156            // Restore BEFORE the next zshlex so trailing tokens lex
7157            // in the original parent context (mirrors C ordering at
7158            // parse.c:2244-2245 — restore right after the word is
7159            // confirmed, before any downstream advance).
7160            set_incmdpos(oldcmdpos);
7161            set_nocorrect(oldnc);
7162            zshlex();
7163            n
7164        }
7165        _ => {
7166            set_incmdpos(oldcmdpos);
7167            set_nocorrect(oldnc);
7168            crate::ported::utils::zerr("expected word after redirection");
7169            return None;
7170        }
7171    };
7172
7173    // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
7174    // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]. zshrs
7175    // pushes onto HDOCS (canonical C linked list, c:2290-2296) AND
7176    // onto LEX_HEREDOCS (Rust-only AST-glue Vec carrying parsed-out
7177    // terminator/strip_tabs/quoted metadata for downstream AST
7178    // consumers). Quoted terminators (`<<'EOF'` / `<<"EOF"` / `<<\EOF`)
7179    // disable expansion in the body — Snull `\u{9d}` marks single-quote,
7180    // Dnull `\u{9e}` marks double-quote, Bnull `\u{9f}` marks
7181    // backslash-escaped chars.
7182    let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
7183        let strip_tabs = rtype == REDIR_HEREDOCDASH;
7184        let quoted = name.contains('\u{9d}')
7185            || name.contains('\u{9e}')
7186            || name.contains('\u{9f}')
7187            || name.starts_with('\'')
7188            || name.starts_with('"');
7189        let term = name
7190            .chars()
7191            .filter(|c| {
7192                *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
7193            })
7194            .collect::<String>();
7195        // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7196        //                 *hd = zalloc(sizeof(struct heredocs));
7197        //                 (*hd)->next = NULL;
7198        //                 (*hd)->type = htype;
7199        //                 (*hd)->pc = r;
7200        //                 (*hd)->str = tokstr;`
7201        // AST path has no wordcode pc to patch; use -1 sentinel so the
7202        // inline NEWLIN walk in `zshlex()` skips the setheredoc call.
7203        HDOCS.with_borrow_mut(|head| {
7204            let mut cur = head;
7205            while cur.is_some() {
7206                cur = &mut cur.as_mut().unwrap().next;                            // c:2290
7207            }
7208            *cur = Some(Box::new(crate::ported::zsh_h::heredocs {                 // c:2292-2296
7209                next: None,
7210                typ: rtype,
7211                pc: -1,
7212                str: Some(name.clone()),
7213            }));
7214        });
7215        // zshrs-only: push parallel AST-glue entry onto LEX_HEREDOCS.
7216        let idx = crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| {
7217            v.push(crate::ported::lex::HereDoc {
7218                terminator: term,
7219                strip_tabs,
7220                content: String::new(),
7221                quoted,
7222                processed: false,
7223            });
7224            v.len() - 1
7225        });
7226        Some(idx)
7227    } else {
7228        None
7229    };
7230
7231    Some(ZshRedir {
7232        rtype,
7233        fd,
7234        name,
7235        heredoc: None,
7236        varid,
7237        heredoc_idx,
7238    })
7239}
7240
7241/// Parse C-style for loop: for (( init; cond; step ))
7242/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
7243/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
7244/// Recognized when the token after FOR is DINPAR (the `((`
7245/// detected by gettok via dbparens setup).
7246fn parse_for_cstyle() -> Option<ZshCommand> {
7247    // We're at (( (Dinpar None) - the opening ((
7248    // Lexer returns:
7249    //   Dinpar None     - opening ((
7250    //   Dinpar "init"   - init expression, semicolon consumed
7251    //   Dinpar "cond"   - cond expression, semicolon consumed
7252    //   Doutpar "step"  - step expression, closing )) consumed
7253
7254    zshlex(); // Get init: Dinpar "i=0"
7255
7256    if tok() != DINPAR {
7257        crate::ported::utils::zerr("expected init expression in for ((");
7258        return None;
7259    }
7260    let init = tokstr().unwrap_or_default();
7261
7262    zshlex(); // Get cond: Dinpar "i<10"
7263
7264    if tok() != DINPAR {
7265        crate::ported::utils::zerr("expected condition in for ((");
7266        return None;
7267    }
7268    let cond = tokstr().unwrap_or_default();
7269
7270    zshlex(); // Get step: Doutpar "i++"
7271
7272    if tok() != DOUTPAR {
7273        crate::ported::utils::zerr("expected )) in for");
7274        return None;
7275    }
7276    let step = tokstr().unwrap_or_default();
7277
7278    zshlex(); // Move past ))
7279
7280    skip_separators();
7281    let body = parse_loop_body(false, false)?;
7282
7283    Some(ZshCommand::For(ZshFor {
7284        var: String::new(),
7285        list: ForList::CStyle { init, cond, step },
7286        body: Box::new(body),
7287        is_select: false,
7288    }))
7289}
7290
7291/// Parse select loop (same syntax as for)
7292/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
7293/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
7294/// the executor. C equivalent: the SELECT case in par_for at
7295/// parse.c:1087-1207 (selects share parser flow with foreach).
7296fn parse_select() -> Option<ZshCommand> {
7297    // `select` shares par_for's grammar (var, words, body) but the
7298    // compile path is different (interactive prompt loop).
7299    match par_for()? {
7300        ZshCommand::For(mut f) => {
7301            f.is_select = true;
7302            Some(ZshCommand::For(f))
7303        }
7304        other => Some(other),
7305    }
7306}
7307
7308/// Parse loop body (do...done, {...}, or shortloop)
7309/// Parse the `do BODY done` body of a for/while/until/select/
7310/// repeat loop. Direct equivalent of zsh's parse.c handling
7311/// inside the loop builders — they all consume DOLOOP, parse a
7312/// list until DONE, and return the list. The `foreach_style`
7313/// flag signals foreach (where short-form `for NAME in WORDS;
7314/// CMD` may skip do/done) vs c-style (which always requires
7315/// do/done).
7316///
7317/// `is_repeat` widens the SHORTLOOPS gate so `SHORTREPEAT` also
7318/// unlocks the short form for `repeat N CMD` (per c:1600
7319/// `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
7320fn parse_loop_body(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
7321    // c:1180-1194 — body dispatch order per par_for:
7322    //   `do ... done` (DOLOOP) — primary form.
7323    //   `{ ... }`   (INBRACE) — alternate.
7324    //   csh/CSHJUNKIELOOPS — terminator is `end`.
7325    //   else if (unset(SHORTLOOPS)) — YYERROR.
7326    //   else — short form (single command).
7327    if tok() == DOLOOP {
7328        zshlex();
7329        let body = parse_program();
7330        if tok() == DONE {
7331            zshlex();
7332        }
7333        Some(body)
7334    } else if tok() == INBRACE_TOK {
7335        zshlex();
7336        let body = parse_program();
7337        if tok() == OUTBRACE_TOK {
7338            zshlex();
7339        }
7340        Some(body)
7341    } else if foreach_style || isset(CSHJUNKIELOOPS) {
7342        // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
7343        let body = parse_program();
7344        if tok() == ZEND {
7345            zshlex();
7346        }
7347        Some(body)
7348    } else {
7349        // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
7350        // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
7351        // unset(SHORTREPEAT))`. zshrs's option machinery isn't
7352        // initialised at parse-test time (no `init_main` →
7353        // `install_emulation_defaults`), so a strict port here
7354        // body. parse_init seeds SHORTLOOPS=on mirroring C
7355        // `install_emulation_defaults`, so this fires only when a
7356        // script explicitly disabled the option.
7357        if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
7358            crate::ported::utils::zerr("parse error: short loop form requires SHORTLOOPS option");
7359            return None;
7360        }
7361        // c:1192-1193 — short form: single command body.
7362        par_list().map(|list| ZshProgram { lists: vec![list] })
7363    }
7364}
7365
7366/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
7367/// function named `_zshrs_anon_N`, invokes it with the args, and the
7368/// body runs with positional params set. Implemented as the desugared
7369/// pair (FuncDef + Simple call) so the compile path doesn't need new
7370/// machinery.
7371/// Parse an anonymous function definition `() { BODY }` followed
7372/// by call args. zsh treats `() { echo hi; } a b c` as defining
7373/// and immediately calling an anon fn with args a/b/c. C
7374/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
7375/// triggers an anon-funcdef path.
7376fn parse_anon_funcdef() -> Option<ZshCommand> {
7377    zshlex(); // skip ()
7378    skip_separators();
7379    // No `{` after `()` → bare empty subshell shape `()`. Fall back
7380    // to a Subsh with an empty program so the status is 0 (matches
7381    // zsh's `()` no-op behavior).
7382    if tok() != INBRACE_TOK {
7383        return Some(ZshCommand::Subsh(Box::new(ZshProgram {
7384            lists: Vec::new(),
7385        })));
7386    }
7387    zshlex(); // skip {
7388    let body = parse_program();
7389    if tok() == OUTBRACE_TOK {
7390        zshlex();
7391    }
7392    // Collect any trailing args until a separator. zsh's anon-fn form
7393    // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
7394    let mut args = Vec::new();
7395    while tok() == STRING_LEX {
7396        if let Some(s) = tokstr() {
7397            args.push(s);
7398        }
7399        zshlex();
7400    }
7401
7402    // Generate a unique name. Module-level static would be cleaner but
7403    // a thread-local atomic is enough — anonymous functions are
7404    // ephemeral and the name isn't user-visible.
7405    static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
7406    let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
7407    let name = format!("_zshrs_anon_{}", n);
7408    Some(ZshCommand::FuncDef(ZshFuncDef {
7409        names: vec![name],
7410        body: Box::new(body),
7411        tracing: false,
7412        auto_call_args: Some(args),
7413        body_source: None,
7414    }))
7415}
7416
7417/// Parse {...} cursh
7418/// Parse a current-shell brace block `{ BODY }`. C source
7419/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
7420/// and recurses into the list. zshrs's parse_cursh extracts that
7421/// arm into a dedicated method.
7422fn parse_cursh() -> Option<ZshCommand> {
7423    zshlex(); // skip {
7424    let prog = parse_program();
7425
7426    // Check for { ... } always { ... }. Direct port of zsh's
7427    // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
7428    // forces (parse.c:1632, 1637): after consuming the closing
7429    // Outbrace AND after matching the `always` keyword, the parser
7430    // explicitly resets command position so the next `{` lexes as
7431    // Inbrace. Without these resets the lexer's String-clears-cmdpos
7432    // rule (lex.rs:976-983) leaves the second `{` in word position,
7433    // turning `always { ... }` into a Simple `{` `echo` … and the
7434    // try/always pairing is silently lost.
7435    if tok() == OUTBRACE_TOK {
7436        set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
7437        zshlex();
7438
7439        // Check for 'always'
7440        if tok() == STRING_LEX {
7441            let s = tokstr();
7442            if s.map(|s| s == "always").unwrap_or(false) {
7443                set_incmdpos(true); // parse.c:1637 incmdpos = 1
7444                zshlex();
7445                skip_separators();
7446
7447                if tok() == INBRACE_TOK {
7448                    zshlex();
7449                    let always = parse_program();
7450                    if tok() == OUTBRACE_TOK {
7451                        zshlex();
7452                    }
7453                    return Some(ZshCommand::Try(ZshTry {
7454                        try_block: Box::new(prog),
7455                        always: Box::new(always),
7456                    }));
7457                }
7458            }
7459        }
7460    }
7461
7462    Some(ZshCommand::Cursh(Box::new(prog)))
7463}
7464
7465/// Parse inline function definition: name() { ... }
7466/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
7467/// without the `function` keyword). The name has already been
7468/// consumed and pushed by par_simple before this method fires.
7469/// C source: handled inline in par_simple's INOUTPAR-after-name
7470/// arm (parse.c:1836-2228).
7471fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
7472    // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
7473    // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
7474    // as INBRACE_TOK (current-shell block opener) instead of a
7475    // literal `{` STRING. Without this, `myfunc() { echo body }`
7476    // parsed the body as the single STRING `"{"`, then `echo body`
7477    // fell out at top level. Mirrors the C path where par_cmd's
7478    // dispatcher (parse.c:958) is called with `incmdpos = 1` for
7479    // the funcdef body.
7480    set_incmdpos(true);
7481    // Skip ()
7482    if tok() == INOUTPAR {
7483        zshlex();
7484    }
7485
7486    skip_separators();
7487
7488    // Parse body
7489    if tok() == INBRACE_TOK {
7490        // Same body_start-before-zshlex fix as par_funcdef.
7491        let body_start = pos();
7492        zshlex();
7493        let body = parse_program();
7494        let body_end = if tok() == OUTBRACE_TOK {
7495            pos().saturating_sub(1)
7496        } else {
7497            pos()
7498        };
7499        let body_source = input_slice(body_start, body_end)
7500            .map(|s| s.trim().to_string())
7501            .filter(|s| !s.is_empty());
7502        if tok() == OUTBRACE_TOK {
7503            zshlex();
7504        }
7505        Some(ZshCommand::FuncDef(ZshFuncDef {
7506            names: vec![name],
7507            body: Box::new(body),
7508            tracing: false,
7509            auto_call_args: None,
7510            body_source,
7511        }))
7512    } else if unset(SHORTLOOPS) {
7513        // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
7514        // funcdef short body (`name() cmd` without `{...}`) only
7515        // accepted when SHORTLOOPS is set. parse_init seeds
7516        // SHORTLOOPS=on so this fires only when a script
7517        // explicitly disabled the option.
7518        crate::ported::utils::zerr("parse error: short function body form requires SHORTLOOPS option");
7519        None
7520    } else {
7521        match par_cmd() {
7522            Some(cmd) => {
7523                let list = ZshList {
7524                    sublist: ZshSublist {
7525                        pipe: ZshPipe {
7526                            cmd,
7527                            next: None,
7528                            lineno: lineno(),
7529                            merge_stderr: false,
7530                        },
7531                        next: None,
7532                        flags: SublistFlags::default(),
7533                    },
7534                    flags: ListFlags::default(),
7535                };
7536                Some(ZshCommand::FuncDef(ZshFuncDef {
7537                    names: vec![name],
7538                    body: Box::new(ZshProgram { lists: vec![list] }),
7539                    tracing: false,
7540                    auto_call_args: None,
7541                    body_source: None,
7542                }))
7543            }
7544            None => None,
7545        }
7546    }
7547}
7548
7549/// Parse conditional expression
7550/// Top of `[[ ]]` cond-expression parsing — entry to recursive
7551/// descent (or → and → not → primary). Direct port of zsh's
7552/// par_cond_1 at parse.c:2434-2475.
7553fn parse_cond_expr() -> Option<ZshCond> {
7554    parse_cond_or()
7555}
7556
7557/// Cond-expression `||` level. C: inside par_cond_1 at
7558/// parse.c:2434-2475 (the `cond_or` ladder).
7559fn parse_cond_or() -> Option<ZshCond> {
7560    let left = parse_cond_and()?;
7561    skip_cond_separators();
7562
7563    if tok() == DBAR {
7564        zshlex();
7565        skip_cond_separators();
7566        parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
7567    } else {
7568        Some(left)
7569    }
7570}
7571
7572/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
7573fn parse_cond_and() -> Option<ZshCond> {
7574    let left = parse_cond_not()?;
7575    skip_cond_separators();
7576
7577    if tok() == DAMPER {
7578        zshlex();
7579        skip_cond_separators();
7580        parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
7581    } else {
7582        Some(left)
7583    }
7584}
7585
7586/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
7587/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
7588/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
7589/// so refcount ops can find an entry without raw-pointer compare.
7590pub static DUMPS: std::sync::Mutex<Vec<crate::ported::zsh_h::funcdump>> =
7591    std::sync::Mutex::new(Vec::new());
7592
7593/// Cond-expression `!` negation level. C: handled inside
7594/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
7595fn parse_cond_not() -> Option<ZshCond> {
7596    skip_cond_separators();
7597
7598    // ! can be either BANG_TOK or String "!"
7599    let is_not =
7600        tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
7601    if is_not {
7602        zshlex();
7603        let inner = parse_cond_not()?;
7604        return Some(ZshCond::Not(Box::new(inner)));
7605    }
7606
7607    if tok() == INPAR_TOK {
7608        zshlex();
7609        skip_cond_separators();
7610        let inner = parse_cond_expr()?;
7611        skip_cond_separators();
7612        if tok() == OUTPAR_TOK {
7613            zshlex();
7614        }
7615        return Some(inner);
7616    }
7617
7618    parse_cond_primary()
7619}
7620
7621/// Cond-expression primary: unary tests (-f, -d, ...), binary
7622/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
7623/// sub-expressions. Direct port of par_cond_double / par_cond_triple
7624/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
7625fn parse_cond_primary() -> Option<ZshCond> {
7626    let s1 = match tok() {
7627        STRING_LEX => {
7628            let s = tokstr().unwrap_or_default();
7629            zshlex();
7630            s
7631        }
7632        _ => return None,
7633    };
7634
7635    skip_cond_separators();
7636
7637    // Check for unary operator. zsh's lexer tokenizes leading `-` as
7638    // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
7639    // LX2_DASH — `-` always becomes Dash, untokenized later). Match
7640    // either form here, and use char-count not byte-count since Dash
7641    // is 2 UTF-8 bytes (`\xc2\x9b`).
7642    let s1_chars: Vec<char> = s1.chars().collect();
7643    if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) {
7644        let s2 = match tok() {
7645            STRING_LEX => {
7646                let s = tokstr().unwrap_or_default();
7647                zshlex();
7648                s
7649            }
7650            _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
7651        };
7652        return Some(ZshCond::Unary(s1, s2));
7653    }
7654
7655    // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
7656    //   incond++;  /* parentheses do globbing */
7657    //   do condlex(); while (COND_SEP());
7658    //   incond--;  /* parentheses do grouping */
7659    // The bump makes the lexer treat `(` as a literal character inside
7660    // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
7661    // and splitting the regex into multiple tokens.
7662    let op = match tok() {
7663        STRING_LEX => {
7664            let s = tokstr().unwrap_or_default();
7665            set_incond(incond() + 1);
7666            zshlex();
7667            set_incond(incond() - 1);
7668            s
7669        }
7670        INANG_TOK => {
7671            set_incond(incond() + 1);
7672            zshlex();
7673            set_incond(incond() - 1);
7674            "<".to_string()
7675        }
7676        OUTANG_TOK => {
7677            set_incond(incond() + 1);
7678            zshlex();
7679            set_incond(incond() - 1);
7680            ">".to_string()
7681        }
7682        _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
7683    };
7684
7685    skip_cond_separators();
7686
7687    let s2 = match tok() {
7688        STRING_LEX => {
7689            let s = tokstr().unwrap_or_default();
7690            zshlex();
7691            s
7692        }
7693        _ => return Some(ZshCond::Binary(s1, op, String::new())),
7694    };
7695
7696    if op == "=~" {
7697        Some(ZshCond::Regex(s1, s2))
7698    } else {
7699        Some(ZshCond::Binary(s1, op, s2))
7700    }
7701}
7702
7703fn skip_cond_separators() {
7704    while tok() == SEPER && {
7705        let s = tokstr();
7706        s.map(|s| !s.contains(';')).unwrap_or(true)
7707    } {
7708        zshlex();
7709    }
7710}
7711
7712/// Parse (( ... )) arithmetic command
7713/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
7714/// `par_dinbrack` (despite the name; the function actually handles
7715/// DINPAR `(( ))` blocks too).
7716fn parse_arith() -> Option<ZshCommand> {
7717    let expr = tokstr().unwrap_or_default();
7718    zshlex();
7719    Some(ZshCommand::Arith(expr))
7720}
7721
7722/// Skip separator tokens
7723fn skip_separators() {
7724    while tok() == SEPER || tok() == NEWLIN {
7725        zshlex();
7726    }
7727}
7728
7729// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
7730// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
7731// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
7732
7733/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
7734/// length in u32 words (read from prelude word `FD_PRELEN`).
7735#[inline]
7736pub fn fdheaderlen(f: &[u32]) -> u32 {
7737    f[FD_PRELEN]
7738}
7739
7740/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
7741/// word, either `FD_MAGIC` or `FD_OMAGIC`.
7742#[inline]
7743pub fn fdmagic(f: &[u32]) -> u32 {
7744    f[0]
7745}
7746
7747/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
7748/// the packed `pre[1]` word.
7749#[inline]
7750pub fn fdflags(f: &[u32]) -> u32 {
7751    // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
7752    f[1] & 0xff
7753}
7754
7755/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
7756/// the low byte of `pre[1]`.
7757#[inline]
7758pub fn fdsetflags(f: &mut [u32], v: u8) {
7759    f[1] = (f[1] & !0xff) | (v as u32);
7760}
7761
7762/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
7763/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
7764/// dump copy.
7765#[inline]
7766pub fn fdother(f: &[u32]) -> u32 {
7767    (f[1] >> 8) & 0x00ff_ffff
7768}
7769
7770/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
7771#[inline]
7772pub fn fdsetother(f: &mut [u32], o: u32) {
7773    f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
7774}
7775
7776/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
7777/// `ZSH_VERSION` C-string from `pre[2..]`.
7778pub fn fdversion(f: &[u32]) -> String {
7779    let bytes: Vec<u8> = f[2..]
7780        .iter()
7781        .take(10)
7782        .flat_map(|w| w.to_le_bytes().into_iter())
7783        .collect();
7784    let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
7785    String::from_utf8_lossy(&bytes[..end]).into_owned()
7786}
7787
7788/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
7789/// to the first `struct fdhead` past the prelude.
7790#[inline]
7791pub fn firstfdhead_offset() -> usize {
7792    FD_PRELEN
7793}
7794
7795/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
7796/// the next header by reading the current `hlen` slot.
7797#[inline]
7798pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
7799    cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
7800}
7801
7802/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
7803/// of the header's `flags` field (the kshload/zshload marker).
7804#[inline]
7805pub fn fdhflags(h: &fdhead) -> u32 {
7806    h.flags & 0x3
7807}
7808
7809/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
7810/// of `flags`, byte offset from the name start to its basename.
7811#[inline]
7812pub fn fdhtail(h: &fdhead) -> u32 {
7813    h.flags >> 2
7814}
7815
7816/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
7817/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
7818#[inline]
7819pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
7820    flags | (tail << 2)
7821}
7822
7823/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
7824/// follows the fdhead record immediately. Reads bytes from the
7825/// dump buffer until NUL.
7826pub fn fdname(buf: &[u32], header_offset: usize) -> String {
7827    let name_word_off = header_offset + FDHEAD_WORDS;
7828    let bytes: Vec<u8> = buf[name_word_off..]
7829        .iter()
7830        .flat_map(|w| w.to_le_bytes().into_iter())
7831        .take_while(|&b| b != 0)
7832        .collect();
7833    String::from_utf8_lossy(&bytes).into_owned()
7834}
7835
7836/// Decode a `fdhead` record at the given u32-word offset in the
7837/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
7838pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
7839    if offset + FDHEAD_WORDS > buf.len() {
7840        return None;
7841    }
7842    Some(fdhead {
7843        start: buf[offset],
7844        len: buf[offset + 1],
7845        npats: buf[offset + 2],
7846        strs: buf[offset + 3],
7847        hlen: buf[offset + 4],
7848        flags: buf[offset + 5],
7849    })
7850}
7851
7852/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
7853/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
7854/// port relies on Drop for the `funcdump` (no mmap held in this
7855/// port — `addr`/`map` are byte-offset placeholders), so the
7856/// equivalent is removing the entry from the dumps list. Called
7857/// by `decrdumpcount` when the refcount hits zero (c:3988) and
7858/// by `closedumps` when shutting down (c:4008).
7859fn freedump_locked(
7860    g: &mut std::sync::MutexGuard<'_, Vec<crate::ported::zsh_h::funcdump>>,
7861    filename: &str,
7862) {
7863    // c:3976
7864    g.retain(|d| d.filename.as_deref() != Some(filename));
7865}
7866
7867// =====================================================================
7868// Remaining `Src/parse.c` ports (this section finishes the file).
7869//
7870// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
7871// are kept for completeness — the live zshrs runtime uses the
7872// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
7873// and any future `.zwc`-emit pipeline both call into these.
7874// =====================================================================
7875
7876/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
7877/// `Src/parse.c:482` used everywhere by the par_* emitters.
7878#[inline]
7879pub fn ecstr(s: &str) {
7880    let code = ecstrcode(s);
7881    ecadd(code);
7882}
7883
7884/// Port of `condlex` function-pointer global from `Src/parse.c`. C
7885/// flips this between `zshlex` and `testlex` depending on whether
7886/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
7887/// separate `testlex` yet, so this just defers to `zshlex`.
7888#[inline]
7889pub fn condlex() {
7890    zshlex();
7891}
7892
7893fn copy_ecstr_walk(node: &Option<Box<EccstrNode>>, p: &mut [u8]) {
7894    let mut cur = node.as_ref();
7895    while let Some(n) = cur {
7896        // c:540 — `memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);`
7897        let off = n.aoffs as usize;
7898        let need = off + n.str.len() + 1;
7899        if need <= p.len() {
7900            p[off..off + n.str.len()].copy_from_slice(&n.str);
7901            p[off + n.str.len()] = 0;
7902        }
7903        // c:541 — `copy_ecstr(s->left, p);`
7904        copy_ecstr_walk(&n.left, p);
7905        // c:542 — `s = s->right;`
7906        cur = n.right.as_ref();
7907    }
7908}
7909
7910/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
7911/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
7912/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
7913/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
7914/// must call into HERE so that `[[ a || b ]]` and friends land
7915/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
7916/// emitter for `[[ ... ]]` produced zero words and parity dropped
7917/// 148 words on `/etc/zshrc` alone.
7918pub fn par_cond_top() -> i32 {
7919    // c:2411 — `int p = ecused, r;`
7920    let p = ECUSED.with(|c| c.get()) as usize;
7921    let r = par_cond_1();
7922    while COND_SEP() {
7923        condlex();
7924    }
7925    if tok() == DBAR {
7926        // c:2417 — `condlex(); while (COND_SEP()) condlex();`
7927        condlex();
7928        while COND_SEP() {
7929            condlex();
7930        }
7931        // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
7932        // WCB_COND(COND_OR, ecused-1-p);`
7933        ecispace(p, 1);
7934        par_cond_top();
7935        let ecused = ECUSED.with(|c| c.get()) as usize;
7936        ECBUF.with(|c| {
7937            c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
7938        });
7939        return 1;
7940    }
7941    r
7942}
7943
7944/// Port of `static int check_cond(const char *input, const char *cond)`
7945/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
7946/// form whose `X` matches `cond` — used by par_cond_2 to detect
7947/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
7948/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
7949fn check_cond(input: &str, cond: &str) -> bool {
7950    let mut chars = input.chars();
7951    match chars.next() {
7952        Some(c) if IS_DASH(c) => chars.as_str() == cond,
7953        _ => false,
7954    }
7955}
7956
7957#[cfg(test)]
7958mod tests {
7959    use super::*;
7960    use crate::utils::{errflag, ERRFLAG_ERROR};
7961    use std::fs;
7962    use std::path::Path;
7963    use std::sync::atomic::Ordering;
7964    use std::sync::mpsc;
7965    use std::thread;
7966    use std::time::{Duration, Instant};
7967
7968    /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
7969    /// around a parse — see `Src/init.c:loop` which clears errflag
7970    /// before parse_event() and tests it after. Returns `Err` if the
7971    /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
7972    fn parse(input: &str) -> Result<ZshProgram, String> {
7973        let saved = errflag.load(Ordering::Relaxed);
7974        errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
7975        crate::ported::parse::parse_init(input);
7976        let prog = crate::ported::parse::parse();
7977        let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
7978        // Restore prior error bits; don't carry our new error into the
7979        // outer test runner.
7980        errflag.store(saved, Ordering::Relaxed);
7981        if had_err {
7982            Err("parse error".to_string())
7983        } else {
7984            Ok(prog)
7985        }
7986    }
7987
7988    #[test]
7989    fn test_simple_command() {
7990        let prog = parse("echo hello world").unwrap();
7991        assert_eq!(prog.lists.len(), 1);
7992        match &prog.lists[0].sublist.pipe.cmd {
7993            ZshCommand::Simple(s) => {
7994                assert_eq!(s.words, vec!["echo", "hello", "world"]);
7995            }
7996            _ => panic!("expected simple command"),
7997        }
7998    }
7999
8000    #[test]
8001    fn test_pipeline() {
8002        let prog = parse("ls | grep foo | wc -l").unwrap();
8003        assert_eq!(prog.lists.len(), 1);
8004
8005        let pipe = &prog.lists[0].sublist.pipe;
8006        assert!(pipe.next.is_some());
8007
8008        let pipe2 = pipe.next.as_ref().unwrap();
8009        assert!(pipe2.next.is_some());
8010    }
8011
8012    #[test]
8013    fn test_and_or() {
8014        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
8015        let sublist = &prog.lists[0].sublist;
8016
8017        assert!(sublist.next.is_some());
8018        let (op, _) = sublist.next.as_ref().unwrap();
8019        assert_eq!(*op, SublistOp::And);
8020    }
8021
8022    #[test]
8023    fn test_if_then() {
8024        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
8025        match &prog.lists[0].sublist.pipe.cmd {
8026            ZshCommand::If(_) => {}
8027            _ => panic!("expected if command"),
8028        }
8029    }
8030
8031    #[test]
8032    fn test_for_loop() {
8033        let prog = parse("for i in a b c; do echo $i; done").unwrap();
8034        match &prog.lists[0].sublist.pipe.cmd {
8035            ZshCommand::For(f) => {
8036                assert_eq!(f.var, "i");
8037                match &f.list {
8038                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
8039                    _ => panic!("expected word list"),
8040                }
8041            }
8042            _ => panic!("expected for command"),
8043        }
8044    }
8045
8046    #[test]
8047    fn test_case() {
8048        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
8049        match &prog.lists[0].sublist.pipe.cmd {
8050            ZshCommand::Case(c) => {
8051                assert_eq!(c.arms.len(), 2);
8052            }
8053            _ => panic!("expected case command"),
8054        }
8055    }
8056
8057    #[test]
8058    fn test_function() {
8059        // First test just parsing "function foo" to see what happens
8060        let prog = parse("function foo { }").unwrap();
8061        match &prog.lists[0].sublist.pipe.cmd {
8062            ZshCommand::FuncDef(f) => {
8063                assert_eq!(f.names, vec!["foo"]);
8064            }
8065            _ => panic!(
8066                "expected function, got {:?}",
8067                prog.lists[0].sublist.pipe.cmd
8068            ),
8069        }
8070    }
8071
8072    #[test]
8073    fn test_redirection() {
8074        let prog = parse("echo hello > file.txt").unwrap();
8075        match &prog.lists[0].sublist.pipe.cmd {
8076            ZshCommand::Simple(s) => {
8077                assert_eq!(s.redirs.len(), 1);
8078                assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
8079            }
8080            _ => panic!("expected simple command"),
8081        }
8082    }
8083
8084    #[test]
8085    fn test_assignment() {
8086        let prog = parse("FOO=bar echo $FOO").unwrap();
8087        match &prog.lists[0].sublist.pipe.cmd {
8088            ZshCommand::Simple(s) => {
8089                assert_eq!(s.assigns.len(), 1);
8090                assert_eq!(s.assigns[0].name, "FOO");
8091            }
8092            _ => panic!("expected simple command"),
8093        }
8094    }
8095
8096    #[test]
8097    fn test_parse_completion_function() {
8098        let input = r#"_2to3_fixes() {
8099  local -a fixes
8100  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
8101  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
8102}"#;
8103        let result = parse(input);
8104        assert!(
8105            result.is_ok(),
8106            "Failed to parse completion function: {:?}",
8107            result.err()
8108        );
8109        let prog = result.unwrap();
8110        assert!(
8111            !prog.lists.is_empty(),
8112            "Expected at least one list in program"
8113        );
8114    }
8115
8116    #[test]
8117    fn test_parse_array_with_complex_elements() {
8118        let input = r#"arguments=(
8119  '(- * :)'{-h,--help}'[show this help message and exit]'
8120  {-d,--doctests_only}'[fix up doctests only]'
8121  '*:filename:_files'
8122)"#;
8123        let result = parse(input);
8124        assert!(
8125            result.is_ok(),
8126            "Failed to parse array assignment: {:?}",
8127            result.err()
8128        );
8129    }
8130
8131    #[test]
8132    fn test_parse_full_completion_file() {
8133        let input = r##"#compdef 2to3
8134
8135# zsh completions for '2to3'
8136
8137_2to3_fixes() {
8138  local -a fixes
8139  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
8140  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
8141}
8142
8143local -a arguments
8144
8145arguments=(
8146  '(- * :)'{-h,--help}'[show this help message and exit]'
8147  {-d,--doctests_only}'[fix up doctests only]'
8148  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
8149  {-j,--processes}'[run 2to3 concurrently]:number: '
8150  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
8151  {-l,--list-fixes}'[list available transformations]'
8152  {-p,--print-function}'[modify the grammar so that print() is a function]'
8153  {-v,--verbose}'[more verbose logging]'
8154  '--no-diffs[do not show diffs of the refactoring]'
8155  {-w,--write}'[write back modified files]'
8156  {-n,--nobackups}'[do not write backups for modified files]'
8157  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
8158  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
8159  '--add-suffix[append this string to all output filenames]:suffix: '
8160  '*:filename:_files'
8161)
8162
8163_arguments -s -S $arguments
8164"##;
8165        let result = parse(input);
8166        assert!(
8167            result.is_ok(),
8168            "Failed to parse full completion file: {:?}",
8169            result.err()
8170        );
8171        let prog = result.unwrap();
8172        // Should have parsed successfully with at least one statement
8173        assert!(!prog.lists.is_empty(), "Expected at least one list");
8174    }
8175
8176    #[test]
8177    fn test_parse_logs_sh() {
8178        let input = r#"#!/usr/bin/env bash
8179shopt -s globstar
8180
8181if [[ $(uname) == Darwin ]]; then
8182    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
8183else
8184    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
8185        tail -f /var/log/**/*.log | lolcat
8186    else
8187        printf "Unsupported...\n" >&2
8188    fi
8189fi
8190"#;
8191        let result = parse(input);
8192        assert!(
8193            result.is_ok(),
8194            "Failed to parse logs.sh: {:?}",
8195            result.err()
8196        );
8197    }
8198
8199    #[test]
8200    fn test_parse_case_with_glob() {
8201        let input = r#"case "$ZPWR_OS_TYPE" in
8202    darwin*)  open_cmd='open'
8203      ;;
8204    cygwin*)  open_cmd='cygstart'
8205      ;;
8206    linux*)
8207        open_cmd='xdg-open'
8208      ;;
8209esac"#;
8210        let result = parse(input);
8211        assert!(
8212            result.is_ok(),
8213            "Failed to parse case with glob: {:?}",
8214            result.err()
8215        );
8216    }
8217
8218    #[test]
8219    fn test_parse_case_with_nested_if() {
8220        // Test case with nested if and glob patterns
8221        let input = r##"function zpwrGetOpenCommand(){
8222    local open_cmd
8223    case "$ZPWR_OS_TYPE" in
8224        darwin*)  open_cmd='open' ;;
8225        cygwin*)  open_cmd='cygstart' ;;
8226        linux*)
8227            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
8228                open_cmd='nohup xdg-open'
8229            fi
8230            ;;
8231    esac
8232}"##;
8233        let result = parse(input);
8234        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
8235    }
8236
8237    #[test]
8238    fn test_parse_zpwr_scripts() {
8239        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
8240        if !scripts_dir.exists() {
8241            eprintln!("Skipping test: scripts directory not found");
8242            return;
8243        }
8244
8245        let mut total = 0;
8246        let mut passed = 0;
8247        let mut failed_files = Vec::new();
8248        let mut timeout_files = Vec::new();
8249
8250        for ext in &["sh", "zsh"] {
8251            let pattern = scripts_dir.join(format!("*.{}", ext));
8252            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
8253                for entry in entries.flatten() {
8254                    total += 1;
8255                    let file_path = entry.display().to_string();
8256                    let content = match fs::read_to_string(&entry) {
8257                        Ok(c) => c,
8258                        Err(e) => {
8259                            failed_files.push((file_path, format!("read error: {}", e)));
8260                            continue;
8261                        }
8262                    };
8263
8264                    // Parse with timeout
8265                    let content_clone = content.clone();
8266                    let (tx, rx) = mpsc::channel();
8267                    let handle = thread::spawn(move || {
8268                        let result = parse(&content_clone);
8269                        let _ = tx.send(result);
8270                    });
8271
8272                    match rx.recv_timeout(Duration::from_secs(2)) {
8273                        Ok(Ok(_)) => passed += 1,
8274                        Ok(Err(err)) => {
8275                            failed_files.push((file_path, err));
8276                        }
8277                        Err(_) => {
8278                            timeout_files.push(file_path);
8279                            // Thread will be abandoned
8280                        }
8281                    }
8282                }
8283            }
8284        }
8285
8286        eprintln!("\n=== ZPWR Scripts Parse Results ===");
8287        eprintln!("Passed: {}/{}", passed, total);
8288
8289        if !timeout_files.is_empty() {
8290            eprintln!("\nTimeout files (>2s):");
8291            for file in &timeout_files {
8292                eprintln!("  {}", file);
8293            }
8294        }
8295
8296        if !failed_files.is_empty() {
8297            eprintln!("\nFailed files:");
8298            for (file, err) in &failed_files {
8299                eprintln!("  {} - {}", file, err);
8300            }
8301        }
8302
8303        // Allow some failures initially, but track progress
8304        let pass_rate = if total > 0 {
8305            (passed as f64 / total as f64) * 100.0
8306        } else {
8307            0.0
8308        };
8309        eprintln!("Pass rate: {:.1}%", pass_rate);
8310
8311        // Require at least 50% pass rate for now
8312        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
8313    }
8314
8315    #[test]
8316    #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
8317    fn test_parse_zsh_stdlib_functions() {
8318        let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
8319        if !functions_dir.exists() {
8320            eprintln!(
8321                "Skipping test: zsh_functions directory not found at {:?}",
8322                functions_dir
8323            );
8324            return;
8325        }
8326
8327        let mut total = 0;
8328        let mut passed = 0;
8329        let mut failed_files = Vec::new();
8330        let mut timeout_files = Vec::new();
8331
8332        if let Ok(entries) = fs::read_dir(&functions_dir) {
8333            for entry in entries.flatten() {
8334                let path = entry.path();
8335                if !path.is_file() {
8336                    continue;
8337                }
8338
8339                total += 1;
8340                let file_path = path.display().to_string();
8341                let content = match fs::read_to_string(&path) {
8342                    Ok(c) => c,
8343                    Err(e) => {
8344                        failed_files.push((file_path, format!("read error: {}", e)));
8345                        continue;
8346                    }
8347                };
8348
8349                // Parse with timeout
8350                let content_clone = content.clone();
8351                let (tx, rx) = mpsc::channel();
8352                thread::spawn(move || {
8353                    let result = parse(&content_clone);
8354                    let _ = tx.send(result);
8355                });
8356
8357                match rx.recv_timeout(Duration::from_secs(2)) {
8358                    Ok(Ok(_)) => passed += 1,
8359                    Ok(Err(err)) => {
8360                        failed_files.push((file_path, err));
8361                    }
8362                    Err(_) => {
8363                        timeout_files.push(file_path);
8364                    }
8365                }
8366            }
8367        }
8368
8369        eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
8370        eprintln!("Passed: {}/{}", passed, total);
8371
8372        if !timeout_files.is_empty() {
8373            eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
8374            for file in timeout_files.iter().take(10) {
8375                eprintln!("  {}", file);
8376            }
8377            if timeout_files.len() > 10 {
8378                eprintln!("  ... and {} more", timeout_files.len() - 10);
8379            }
8380        }
8381
8382        if !failed_files.is_empty() {
8383            eprintln!("\nFailed files: {}", failed_files.len());
8384            for (file, err) in failed_files.iter().take(20) {
8385                let filename = Path::new(file)
8386                    .file_name()
8387                    .unwrap_or_default()
8388                    .to_string_lossy();
8389                eprintln!("  {} - {}", filename, err);
8390            }
8391            if failed_files.len() > 20 {
8392                eprintln!("  ... and {} more", failed_files.len() - 20);
8393            }
8394        }
8395
8396        let pass_rate = if total > 0 {
8397            (passed as f64 / total as f64) * 100.0
8398        } else {
8399            0.0
8400        };
8401        eprintln!("Pass rate: {:.1}%", pass_rate);
8402
8403        // Require at least 50% pass rate
8404        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
8405    }
8406
8407    /// c:2643 — `get_cond_num` returns 0..=8 for the canonical binary
8408    /// test operators in order `nt ot ef eq ne lt gt le ge`. The
8409    /// index IS the wordcode opcode dispatch key; flipping any entry
8410    /// would silently mis-dispatch `[[ a -eq b ]]` to a different op.
8411    #[test]
8412    fn get_cond_num_canonical_order_matches_dispatch_table() {
8413        assert_eq!(get_cond_num("nt"), 0);
8414        assert_eq!(get_cond_num("ot"), 1);
8415        assert_eq!(get_cond_num("ef"), 2);
8416        assert_eq!(get_cond_num("eq"), 3);
8417        assert_eq!(get_cond_num("ne"), 4);
8418        assert_eq!(get_cond_num("lt"), 5);
8419        assert_eq!(get_cond_num("gt"), 6);
8420        assert_eq!(get_cond_num("le"), 7);
8421        assert_eq!(get_cond_num("ge"), 8);
8422    }
8423
8424    /// c:2643 — unknown operator returns -1 (sentinel for "not in the
8425    /// binary set"). Regression returning 0 silently would alias
8426    /// every unknown op to `-nt`, dispatching to the wrong handler.
8427    #[test]
8428    fn get_cond_num_unknown_operator_returns_minus_one() {
8429        assert_eq!(get_cond_num("xx"),     -1);
8430        assert_eq!(get_cond_num(""),       -1);
8431        assert_eq!(get_cond_num("eqnt"),   -1, "exact-match required");
8432        assert_eq!(get_cond_num("NT"),     -1, "case-sensitive — uppercase rejected");
8433    }
8434
8435    /// c:2628 — `par_cond_double` requires arg `a` to start with `-`
8436    /// AND have at least one more char. Empty string OR single `-`
8437    /// must error (return 1 via zerr). Regression accepting empty
8438    /// would dispatch `[[ "" string ]]` as a unary test.
8439    #[test]
8440    fn par_cond_double_rejects_short_or_non_dash_first_arg() {
8441        // empty
8442        let _ = par_cond_double("", "b");
8443        // not-dash
8444        let _ = par_cond_double("foo", "b");
8445        // bare dash
8446        let _ = par_cond_double("-", "b");
8447        // All three must NOT crash + return 1 (error path).
8448    }
8449
8450    /// c:2647 CONDSTRS table — exhaustive iteration: every entry's
8451    /// index round-trips through get_cond_num. A regression that
8452    /// drops an entry would let `[[ a -ef b ]]` silently mis-dispatch.
8453    #[test]
8454    fn get_cond_num_round_trips_for_every_table_entry() {
8455        for (i, op) in ["nt","ot","ef","eq","ne","lt","gt","le","ge"].iter().enumerate() {
8456            assert_eq!(get_cond_num(op) as usize, i,
8457                "{op} must map to index {i}");
8458        }
8459    }
8460
8461    /// c:2643 — `get_cond_num` is byte-exact: a partial-prefix string
8462    /// must NOT match. `e` (one char) is not `eq`. Catches a
8463    /// regression using `starts_with` instead of equality.
8464    #[test]
8465    fn get_cond_num_partial_prefix_does_not_match() {
8466        assert_eq!(get_cond_num("e"),  -1);
8467        assert_eq!(get_cond_num("eq2"), -1);
8468        assert_eq!(get_cond_num("n"),  -1);
8469    }
8470
8471    /// c:2628 — `par_cond_double` checks `IS_DASH(ac[0])` so any
8472    /// non-dash first char fails. The lexed Dash sentinel `\u{9b}`
8473    /// MUST be accepted alongside ASCII `-` (the lexer emits it
8474    /// inside `[[ ... ]]`). Regression dropping the sentinel form
8475    /// would break every cond expression after lexing.
8476    #[test]
8477    fn par_cond_double_accepts_lexed_dash_sentinel() {
8478        // First char being the Dash sentinel + valid unary letter
8479        // must NOT trigger the "condition expected" error path.
8480        // We can't easily probe the wordcode emission here, but
8481        // the function MUST return without panic for both forms.
8482        let _ = par_cond_double("-z", "foo");
8483        let _ = par_cond_double("\u{9b}z", "foo");
8484    }
8485
8486    /// c:2643 — case sensitivity: uppercase `EQ` MUST NOT match `eq`.
8487    /// zsh's `[[ a -EQ b ]]` is documented as a parse error (only
8488    /// lowercase variants are recognised). Regression doing
8489    /// case-insensitive lookup would silently accept it.
8490    #[test]
8491    fn get_cond_num_is_case_sensitive() {
8492        assert_eq!(get_cond_num("EQ"), -1);
8493        assert_eq!(get_cond_num("Eq"), -1);
8494        assert_eq!(get_cond_num("eQ"), -1);
8495        // Lowercase still works.
8496        assert_eq!(get_cond_num("eq"), 3);
8497    }
8498
8499    /// `Src/parse.c:2862-2868` — `ecgetstr` inline-3-byte case packs
8500    /// up to 3 chars into bits 3-26 of the wordcode word, then C emits
8501    /// `buf[3] = '\0'; r = dupstring(buf);`. `dupstring` uses `strlen`
8502    /// so the resulting string TRUNCATES at the first NUL byte —
8503    /// short strings of 1 or 2 chars get their tail NUL-padded and
8504    /// silently dropped by strlen.
8505    ///
8506    /// The previous Rust port used `retain(|&x| x != 0)` which SPLICES
8507    /// OUT interior NULs (so `[a, 0, b]` would yield "ab" instead of
8508    /// C's "a"). Verify both endpoints work correctly:
8509    ///   * 1-char string ("a", 0, 0)        → "a"   (strlen-truncate)
8510    ///   * 2-char string ("ab", 0)          → "ab"  (strlen-truncate)
8511    ///   * 3-char string ("abc")            → "abc" (full)
8512    ///   * pathological ("a", 0, "b")       → "a"   (NOT "ab")
8513    #[test]
8514    fn ecgetstr_inline_string_truncates_at_first_nul_like_c_strlen() {
8515        // Build a wordcode word with `c & 2 != 0` (inline-string flag)
8516        // and the 3 bytes packed at offsets 3, 11, 19. `c & 1` is the
8517        // tokflag; clear it for this test.
8518        fn pack_inline(b0: u8, b1: u8, b2: u8) -> u32 {
8519            // c:2862 layout — bit0 = tokflag (0 here), bit1 = inline (1),
8520            // bits 3-10 = b0, bits 11-18 = b1, bits 19-26 = b2.
8521            (2u32)
8522                | ((b0 as u32) << 3)
8523                | ((b1 as u32) << 11)
8524                | ((b2 as u32) << 19)
8525        }
8526        use crate::ported::zsh_h::{eprog, estate};
8527        let mk_state = |word: u32| -> estate {
8528            let p = eprog {
8529                flags: 0,
8530                len: 1,
8531                npats: 0,
8532                nref: 0,
8533                pats: Vec::new(),
8534                prog: vec![word],
8535                strs: None,
8536                shf: None,
8537                dump: None,
8538            };
8539            estate { prog: Box::new(p), pc: 0, strs: None, strs_offset: 0 }
8540        };
8541
8542        // 1-char: ('a', 0, 0) → "a"
8543        let mut st = mk_state(pack_inline(b'a', 0, 0));
8544        assert_eq!(ecgetstr(&mut st, 0, None), "a",
8545            "c:2869 strlen truncates 1-char inline at the NUL tail");
8546
8547        // 2-char: ('a', 'b', 0) → "ab"
8548        let mut st = mk_state(pack_inline(b'a', b'b', 0));
8549        assert_eq!(ecgetstr(&mut st, 0, None), "ab",
8550            "c:2869 strlen truncates 2-char inline at the NUL tail");
8551
8552        // 3-char: ('a', 'b', 'c') → "abc"
8553        let mut st = mk_state(pack_inline(b'a', b'b', b'c'));
8554        assert_eq!(ecgetstr(&mut st, 0, None), "abc",
8555            "c:2869 full 3-byte inline preserved");
8556
8557        // Pathological: ('a', 0, 'b') → "a" (NOT "ab" from retain-splice)
8558        let mut st = mk_state(pack_inline(b'a', 0, b'b'));
8559        assert_eq!(ecgetstr(&mut st, 0, None), "a",
8560            "c:2869 strlen STOPS at first NUL; must not splice 'b' through");
8561    }
8562
8563    /// Pin: `init_parse_status` resets ALL six lexer-parser flags
8564    /// per `Src/parse.c:500-502`. Specifically `inrepeat_ = 0` at
8565    /// c:501 was previously missing in the Rust port. Pin every
8566    /// reset so a future regression that drops one is caught.
8567    #[test]
8568    fn init_parse_status_resets_all_lexer_parser_flags() {
8569        use crate::ported::lex::{
8570            incasepat, incond, incmdpos, infor, inrepeat, inredir,
8571            intypeset, set_incasepat, set_incond, set_incmdpos,
8572            set_infor, set_inrepeat, set_inredir, set_intypeset,
8573        };
8574        // Dirty every flag to a non-default value.
8575        set_incasepat(5);
8576        set_incond(7);
8577        set_inredir(true);
8578        set_infor(3);
8579        set_intypeset(true);
8580        set_inrepeat(2);
8581        set_incmdpos(false);
8582        // Reset.
8583        init_parse_status();
8584        // c:500-502 — every flag back to its default.
8585        assert_eq!(incasepat(), 0, "c:500 — incasepat = 0");
8586        assert_eq!(incond(),    0, "c:500 — incond = 0");
8587        assert!(!inredir(),         "c:500 — inredir = 0");
8588        assert_eq!(infor(),     0, "c:500 — infor = 0");
8589        assert!(!intypeset(),       "c:500 — intypeset = 0");
8590        assert_eq!(inrepeat(),  0, "c:501 — inrepeat_ = 0 (was previously missing)");
8591        assert!(incmdpos(),         "c:502 — incmdpos = 1");
8592    }
8593}
zsh/ported/parse.rs

zsh/ported/
parse.rs